qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 4/4] spapr: introduce a new IRQ backend using fi


From: Cédric Le Goater
Subject: Re: [Qemu-devel] [PATCH 4/4] spapr: introduce a new IRQ backend using fixed IRQ number ranges
Date: Mon, 28 May 2018 17:42:09 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.5.2

On 05/28/2018 05:18 PM, Greg Kurz wrote:
> On Fri, 18 May 2018 18:44:05 +0200
> Cédric Le Goater <address@hidden> wrote:
> 
>> The proposed layout of the IRQ number space is organized as follow :
>>
>>    RANGES             DEVICES
>>
>>    0x0000 - 0x0FFF    Reserved for future use (IPI = 2)
>>    0x1000 - 0x1000    1 EPOW
>>    0x1001 - 0x1001    1 HOTPLUG
>>    0x1002 - 0x10FF    unused
>>    0x1100 - 0x11FF    256 VIO devices    (1 IRQ each)
>>    0x1200 - 0x1283    32 PCI LSI devices (4 IRQs each)
>>    0x1284 - 0x13FF    unused
>>    0x1400 - 0x17FF    PCI MSI device 1   (1024 IRQs each)
>>    0x1800 - 0x1BFF    PCI MSI device 1
> 
> device 2 and...
> 
>>    0x1c00 - 0x1FFF    PCI MSI device 2
> 
> device 3 ?

ah yes :)
> 
>>
>>    0x2000    ....     not allocated. Need to increase NR_IRQS
>>
> 
> What's NR_IRQS ? 

The total number of IRQs in the backend. 

> What's the goal to have several 1k ranges ?

to support multiple PHBs with different allocatable MSI ranges,
which is not the case today.

> So that each one may be affected to a single device ?

yes.

>> The MSI range is a bit more complex to handle as the IRQS are dynamically
>> allocated by the guest OS. In consequence, we use a bitmap allocator
>> under the machine for these.
>>
>> The XICS IRQ number space is increased to 4K, which gives three MSI
>> ranges of 1K for the PHBs. The XICS source IRQ numbers still have the
>> 4K offset.
>>
> 
> Why ?

It's the legacy IRQ offset value for the sPAPR sources (2 being 
reserved for XICS IPIs) and because XIVE will use the range for 
IPIs :

    0x0000 - 0x0FFF    Reserved for future use (IPI = 2)

So not changing the value serve our purpose.

>> Signed-off-by: Cédric Le Goater <address@hidden>
>> ---
>>  include/hw/ppc/spapr.h     |   2 +
>>  include/hw/ppc/spapr_irq.h |  12 +++
>>  hw/ppc/spapr.c             |  22 +++++
>>  hw/ppc/spapr_irq.c         | 220 
>> ++++++++++++++++++++++++++++++++++++++++++++-
>>  4 files changed, 255 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 4eb212b16a51..fcc1b1c1451d 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -165,6 +165,8 @@ struct sPAPRMachineState {
>>      char *kvm_type;
>>      MemoryHotplugState hotplug_memory;
>>  
>> +    int32_t nr_irqs;
>> +    unsigned long *irq_map;
>>      const char *icp_type;
>>  
>>      bool cmd_line_caps[SPAPR_CAP_NUM];
>> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
>> index caf4c33d4cec..d1af4c4d11ba 100644
>> --- a/include/hw/ppc/spapr_irq.h
>> +++ b/include/hw/ppc/spapr_irq.h
>> @@ -22,8 +22,16 @@
>>  #define SPAPR_IRQ_PCI_MSI    0x1400  /* 1K IRQs per device covered by
>>                                        * a bitmap allocator */
>>  
>> +typedef struct sPAPRPIrqRange {
>> +    const char   *name;
>> +    uint32_t     offset;
>> +    uint32_t     width;
>> +    uint32_t     max_index;
>> +} sPAPRPIrqRange;
>> +
>>  typedef struct sPAPRIrq {
>>      uint32_t    nr_irqs;
>> +    const sPAPRPIrqRange *ranges;
>>  
>>      void (*init)(sPAPRMachineState *spapr, Error **errp);
>>      int (*alloc)(sPAPRMachineState *spapr, uint32_t range, uint32_t index,
>> @@ -35,6 +43,10 @@ typedef struct sPAPRIrq {
>>  } sPAPRIrq;
>>  
>>  extern sPAPRIrq spapr_irq_default;
>> +extern sPAPRIrq spapr_irq_2_12;
>> +
>> +const sPAPRPIrqRange *spapr_irq_get_range(sPAPRMachineState *spapr,
>> +                                          uint32_t offset);
>>  
>>  int spapr_irq_alloc(sPAPRMachineState *spapr, uint32_t range, uint32_t 
>> index,
>>                      Error **errp);
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 09f095d73eae..f2ebd6f20414 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -1848,6 +1848,24 @@ static const VMStateDescription 
>> vmstate_spapr_patb_entry = {
>>      },
>>  };
>>  
>> +static bool spapr_irq_map_needed(void *opaque)
>> +{
>> +    sPAPRMachineState *spapr = opaque;
>> +
>> +    return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->nr_irqs);
>> +}
>> +
>> +static const VMStateDescription vmstate_spapr_irq_map = {
>> +    .name = "spapr_irq_map",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .needed = spapr_irq_map_needed,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, nr_irqs),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>>  static const VMStateDescription vmstate_spapr = {
>>      .name = "spapr",
>>      .version_id = 3,
>> @@ -1875,6 +1893,7 @@ static const VMStateDescription vmstate_spapr = {
>>          &vmstate_spapr_cap_cfpc,
>>          &vmstate_spapr_cap_sbbc,
>>          &vmstate_spapr_cap_ibs,
>> +        &vmstate_spapr_irq_map,
>>          NULL
>>      }
>>  };
>> @@ -3916,7 +3935,10 @@ static void 
>> spapr_machine_2_12_instance_options(MachineState *machine)
>>  
>>  static void spapr_machine_2_12_class_options(MachineClass *mc)
>>  {
>> +    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
>> +
>>      spapr_machine_2_13_class_options(mc);
>> +    smc->irq = &spapr_irq_2_12;
>>      SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_12);
>>  }
>>  
>> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
>> index ff6cb1aafd25..bfffb1467336 100644
>> --- a/hw/ppc/spapr_irq.c
>> +++ b/hw/ppc/spapr_irq.c
>> @@ -192,7 +192,7 @@ static qemu_irq spapr_qirq_2_12(sPAPRMachineState 
>> *spapr, int irq)
>>      return NULL;
>>  }
>>  
>> -sPAPRIrq spapr_irq_default = {
>> +sPAPRIrq spapr_irq_2_12 = {
>>      .nr_irqs     = XICS_IRQS_SPAPR,
>>      .init        = spapr_irq_init_2_12,
>>      .alloc       = spapr_irq_alloc_2_12,
>> @@ -201,6 +201,224 @@ sPAPRIrq spapr_irq_default = {
>>      .qirq        = spapr_qirq_2_12,
>>  };
>>  
>> +/*
>> + * IRQ range helpers for new IRQ backends
>> + */
>> +const sPAPRPIrqRange *spapr_irq_get_range(sPAPRMachineState *spapr,
>> +                                          uint32_t offset)
>> +{
>> +    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>> +    const sPAPRPIrqRange *range = smc->irq->ranges;
>> +
>> +    if (range) {
>> +        while (range->name && range->offset != offset) {
>> +            range++;
>> +        }
>> +
>> +        if (!range->name) {
>> +            return NULL;
>> +        }
>> +    }
>> +
>> +    return range;
>> +}
>> +
>> +static int spapr_irq_get_base(sPAPRMachineState *spapr, uint32_t offset,
>> +                              uint32_t index, Error **errp)
>> +{
>> +    const sPAPRPIrqRange *range = spapr_irq_get_range(spapr, offset);
>> +
>> +    if (!range) {
>> +        error_setg(errp, "Invalid IRQ range %x", offset);
>> +        return -1;
>> +    }
>> +
>> +    if (index > range->max_index) {
>> +        error_setg(errp, "Index %d too big for IRQ range %s", index,
>> +                   range->name);
>> +        return -1;
>> +    }
>> +
>> +    return range->offset + index * range->width;
>> +}
>> +
>> +static int spapr_irq_range_alloc(sPAPRMachineState *spapr,
>> +                                 uint32_t range, uint32_t index, Error 
>> **errp)
>> +{
>> +    return spapr_irq_get_base(spapr, range, index, errp);
>> +}
>> +
>> +static int spapr_irq_range_alloc_msi(sPAPRMachineState *spapr, uint32_t 
>> range,
>> +                                     uint32_t index, int num, bool align,
>> +                                     Error **errp)
>> +{
>> +    int msi_base = spapr_irq_get_base(spapr, SPAPR_IRQ_PCI_MSI, index, 
>> errp);
>> +    int irq;
>> +
>> +    /*
>> +     * The 'align_mask' parameter of bitmap_find_next_zero_area()
>> +     * should be one less than a power of 2; 0 means no
>> +     * alignment. Adapt the 'align' value of the former allocator
>> +     * to fit the requirements of bitmap_find_next_zero_area()
>> +     */
>> +    align -= 1;
>> +
>> +    irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->nr_irqs,
>> +                                     msi_base, num, align);
>> +    if (irq == spapr->nr_irqs) {
>> +        error_setg(errp, "can't find a free MSI %d-IRQ block", num);
>> +        return -1;
>> +    }
>> +
>> +    bitmap_set(spapr->irq_map, irq, num);
>> +    return irq;
>> +}
>> +
>> +static void spapr_irq_range_free_msi(sPAPRMachineState *spapr, int irq, int 
>> num)
>> +{
>> +    bitmap_clear(spapr->irq_map, irq, num);
>> +}
>> +
>> +/*
>> + * New XICS IRQ backend
>> + *
>> + * using the IRQ ranges and device indexes
>> + */
>> +static void spapr_irq_init_2_13(sPAPRMachineState *spapr, Error **errp)
> 
> FYI, next QEMU version will likely be 3.0:
> 
> https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg04879.html

ah yes.

> Also, it is useful to add version information to the name in case of
> existing machine types. I guess _default or _new is a better choice
> here.

ok.
 
>> +{
>> +    MachineState *machine = MACHINE(spapr);
>> +    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
>> +
>> +    spapr_irq_init_2_12(spapr, errp);
> 
> Hmm... it's a bit confusing. I'd rather have each init function to call a
> common helper.

well, I followed the machine class pattern.

> 
>> +
>> +    /*
>> +     * Initialize the MSI IRQ allocator. The full XICS IRQ number
>> +     * space is covered even though the bottow IRQ numbers below the
>> +     * XICS source number offset (4K) are unused and that only MSI IRQ
>> +     * numbers can be allocated. We does waste some bytes but it makes
>> +     * things easier. We will optimize later.
>> +     */
>> +    spapr->nr_irqs  = smc->irq->nr_irqs + spapr->ics->offset;
>> +    spapr->irq_map  = bitmap_new(spapr->nr_irqs);
>> +}
>> +
>> +static int spapr_irq_alloc_2_13(sPAPRMachineState *spapr,
>> +                                uint32_t range, uint32_t index, Error 
>> **errp)
>> +{
>> +    ICSState *ics = spapr->ics;
>> +    bool lsi = (range == SPAPR_IRQ_PCI_LSI);
>> +    int irq = spapr_irq_range_alloc(spapr, range, index, errp);
>> +
>> +    if (irq < 0) {
>> +        return irq;
>> +    }
>> +
>> +    /* Update the IRQState in the XICS source */
>> +    ics_set_irq_type(ics, irq - ics->offset, lsi);
>> +
>> +    return irq;
>> +}
>> +
>> +static int spapr_irq_alloc_block_2_13(sPAPRMachineState *spapr, uint32_t 
>> range,
>> +                                      uint32_t index, int num, bool align,
>> +                                      Error **errp)
>> +{
>> +    ICSState *ics = spapr->ics;
>> +    bool lsi = (range == SPAPR_IRQ_PCI_LSI);
>> +    int irq;
>> +    int i;
>> +
>> +    if (range == SPAPR_IRQ_PCI_MSI) {
>> +        irq = spapr_irq_range_alloc_msi(spapr, range, index, num, align, 
>> errp);
>> +    } else {
>> +        /* TODO: check IRQ range width vs. required block size */
>> +        irq = spapr_irq_range_alloc(spapr, range, index, errp);
>> +    }
>> +
>> +    if (irq < 0) {
>> +        return irq;
>> +    }
>> +
>> +    /* Update the IRQState in the XICS source */
>> +    for (i = irq; i < irq + num; ++i) {
>> +        ics_set_irq_type(ics, i - ics->offset, lsi);
>> +    }
>> +
>> +    return irq;
>> +}
>> +
>> +static void spapr_irq_free_2_13(sPAPRMachineState *spapr, int irq, int num,
>> +                                Error **errp)
>> +{
>> +    ICSState *ics = spapr->ics;
>> +    int msi_base = spapr_irq_get_base(spapr, SPAPR_IRQ_PCI_MSI, 0, NULL);
>> +    int i;
>> +
>> +    /* Any IRQ below MSI base should not be freed */
>> +    if (irq < msi_base) {
>> +        error_setg(errp, "IRQs %x-%x can not be freed", irq, irq + num);
>> +        return;
>> +    }
>> +
>> +    spapr_irq_range_free_msi(spapr, irq, num);
>> +
>> +    /* Clear out the IRQState from the XICS source */
>> +    for (i = irq; i < irq + num; ++i) {
>> +        if (ics_valid_irq(ics, i)) {
>> +            uint32_t srcno = i - ics->offset;
>> +            memset(&ics->irqs[srcno], 0, sizeof(ICSIRQState));
>> +        }
>> +    }
>> +}
>> +
>> +static qemu_irq spapr_qirq_2_13(sPAPRMachineState *spapr, int irq)
>> +{
>> +    return spapr_qirq_2_12(spapr, irq);
>> +}
>> +
>> +/*
>> + *   RANGES             DEVICES
>> + *
>> + *   0x0000 - 0x0FFF    Reserved (IPI = 2)
>> + *
>> + *   0x1000 - 0x1000    1 EPOW
>> + *   0x1001 - 0x1001    1 HOTPLUG
>> + *   0x1002 - 0x10FF    unused
>> + *   0x1100 - 0x11FF    256 VIO devices    (1 IRQ each)
>> + *   0x1200 - 0x1283    32 PCI LSI devices (4 IRQs each)
>> + *   0x1284 - 0x13FF    unused
>> + *   0x1400 - 0x17FF    PCI MSI device 1   (1024 IRQs each)
>> + *   0x1800 - 0x1BFF    PCI MSI device 1
>> + *   0x1c00 - 0x1FFF    PCI MSI device 2
>> + *   0x2000    ....     not allocated. Need to increase NR_IRQS
>> + */
>> +static const sPAPRPIrqRange spapr_irq_ranges_2_13[] = {
>> +    /* "IPI" Not used */
>> +    { "EPOW",       SPAPR_IRQ_EPOW,       1,               0      },
>> +    { "HOTPLUG",    SPAPR_IRQ_HOTPLUG,    1,               0      },
>> +    { "VIO",        SPAPR_IRQ_VIO,        1,               0xFF   },
>> +    { "PCI LSI",    SPAPR_IRQ_PCI_LSI,    PCI_NUM_PINS,    0x1F   },
>> +    { "PCI MSI",    SPAPR_IRQ_PCI_MSI,    0x400,           0x1F   },
>> +    { NULL,         0,                    0,               0      },
>> +};
>> +
>> +/*
>> + * Increase the XICS IRQ number space to 4K. It gives us 3 possible
>> + * MSI ranges for the PHBs. The XICS Source IRQ numbers still have the
>> + * 4K offset.
>> + */
>> +#define SPAPR_NR_IRQS_2_13    0x1000
>> +
>> +sPAPRIrq spapr_irq_default = {
>> +    .nr_irqs     = SPAPR_NR_IRQS_2_13,
> 
> Since there's only one user for this define, why not open-coding the value
> here ?

yes. I agree.

Thanks,

C.

>> +    .init        = spapr_irq_init_2_13,
>> +    .ranges      = spapr_irq_ranges_2_13,
>> +    .alloc       = spapr_irq_alloc_2_13,
>> +    .alloc_block = spapr_irq_alloc_block_2_13,
>> +    .free        = spapr_irq_free_2_13,
>> +    .qirq        = spapr_qirq_2_13,
>> +};
>> +
>>  int spapr_irq_alloc(sPAPRMachineState *spapr, uint32_t range, uint32_t 
>> index,
>>                      Error **errp)
>>  {
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]