qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v6 08/37] ppc/xive: introduce a simplified XIVE


From: Cédric Le Goater
Subject: Re: [Qemu-devel] [PATCH v6 08/37] ppc/xive: introduce a simplified XIVE presenter
Date: Fri, 7 Dec 2018 09:49:29 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.3.1

On 12/7/18 4:10 AM, David Gibson wrote:
> On Thu, Dec 06, 2018 at 12:22:22AM +0100, Cédric Le Goater wrote:
>> The last sub-engine of the XIVE architecture is the Interrupt
>> Virtualization Presentation Engine (IVPE). On HW, the IVRE and the
>> IVPE share elements, the Power Bus interface (CQ), the routing table
>> descriptors, and they can be combined in the same HW logic. We do the
>> same in QEMU and combine both engines in the XiveRouter for
>> simplicity.
>>
>> When the IVRE has completed its job of matching an event source with a
>> Notification Virtual Target (NVT) to notify, it forwards the event
>> notification to the IVPE sub-engine. The IVPE scans the thread
>> interrupt contexts of the Notification Virtual Targets (NVT)
>> dispatched on the HW processor threads and if a match is found, it
>> signals the thread. If not, the IVPE escalates the notification to
>> some other targets and records the notification in a backlog queue.
>>
>> The IVPE maintains the thread interrupt context state for each of its
>> NVTs not dispatched on HW processor threads in the Notification
>> Virtual Target table (NVTT).
>>
>> The model currently only supports single NVT notifications.
>>
>> Signed-off-by: Cédric Le Goater <address@hidden>
>> ---
>>  include/hw/ppc/xive.h      |  15 +++
>>  include/hw/ppc/xive_regs.h |  24 ++++
>>  hw/intc/xive.c             | 227 +++++++++++++++++++++++++++++++++++++
>>  3 files changed, 266 insertions(+)
>>
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> index 74b547707b17..e9b06e75fc1c 100644
>> --- a/include/hw/ppc/xive.h
>> +++ b/include/hw/ppc/xive.h
>> @@ -327,6 +327,10 @@ typedef struct XiveRouterClass {
>>                     XiveEND *end);
>>      int (*write_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>>                       XiveEND *end, uint8_t word_number);
>> +    int (*get_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
>> +                   XiveNVT *nvt);
>> +    int (*write_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
>> +                     XiveNVT *nvt, uint8_t word_number);
>>  } XiveRouterClass;
>>  
>>  void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
>> @@ -337,6 +341,11 @@ int xive_router_get_end(XiveRouter *xrtr, uint8_t 
>> end_blk, uint32_t end_idx,
>>                          XiveEND *end);
>>  int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t 
>> end_idx,
>>                            XiveEND *end, uint8_t word_number);
>> +int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
>> +                        XiveNVT *nvt);
>> +int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t 
>> nvt_idx,
>> +                          XiveNVT *nvt, uint8_t word_number);
>> +
>>  
>>  /*
>>   * XIVE END ESBs
>> @@ -393,6 +402,7 @@ typedef struct XiveTCTX {
>>      qemu_irq    output;
>>  
>>      uint8_t     regs[XIVE_TM_RING_COUNT * XIVE_TM_RING_SIZE];
>> +    uint32_t    hw_cam;
> 
> I don't love having this as a separate field.  Since it also appears
> within the register space, it's kind of redundant. 

yes.

> On the other hand,
> I see that wiring up the property directly to the register space
> doesn't really work.  Not sure how to deal with that one.

We could use get/set properties for "hw-cam" to assign WORD2 of the 
physical ring and exclude it from reset, which makes some sense. The
test on the PHYS ring in xive_presenter_tctx_match() would also look 
like the other tests. I think this is better. 

On a related topic, WORD2 of the OS ring is assigned by the hypervisor. 
For the sPAPR machine, this is done when the sPAPR IRQ backend is 
reseted. See patch 21 in v6.
 
> 
>>  } XiveTCTX;
>>  
>>  /*
>> @@ -412,4 +422,9 @@ extern const MemoryRegionOps xive_tm_ops;
>>  
>>  void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon);
>>  
>> +static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx)
>> +{
>> +    return (nvt_blk << 19) | nvt_idx;
>> +}
>> +
>>  #endif /* PPC_XIVE_H */
>> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
>> index ede3d04c5eda..85557e730cd8 100644
>> --- a/include/hw/ppc/xive_regs.h
>> +++ b/include/hw/ppc/xive_regs.h
>> @@ -186,4 +186,28 @@ typedef struct XiveEND {
>>  #define GETFIELD_BE32(m, v)       GETFIELD(m, be32_to_cpu(v))
>>  #define SETFIELD_BE32(m, v, val)  cpu_to_be32(SETFIELD(m, be32_to_cpu(v), 
>> val))
>>  
>> +/* Notification Virtual Target (NVT) */
>> +typedef struct XiveNVT {
>> +        uint32_t        w0;
>> +#define NVT_W0_VALID             PPC_BIT32(0)
>> +        uint32_t        w1;
>> +        uint32_t        w2;
>> +        uint32_t        w3;
>> +        uint32_t        w4;
>> +        uint32_t        w5;
>> +        uint32_t        w6;
>> +        uint32_t        w7;
>> +        uint32_t        w8;
>> +#define NVT_W8_GRP_VALID         PPC_BIT32(0)
>> +        uint32_t        w9;
>> +        uint32_t        wa;
>> +        uint32_t        wb;
>> +        uint32_t        wc;
>> +        uint32_t        wd;
>> +        uint32_t        we;
>> +        uint32_t        wf;
>> +} XiveNVT;
>> +
>> +#define xive_nvt_is_valid(nvt)    (be32_to_cpu((nvt)->w0) & NVT_W0_VALID)
>> +
>>  #endif /* PPC_XIVE_REGS_H */
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index 80a965c14200..891542920683 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -358,6 +358,25 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor 
>> *mon)
>>      }
>>  }
>>  
>> +/* The HW CAM (23bits) is hardwired to :
>> + *
>> + *   0x000||0b1||4Bit chip number||7Bit Thread number.
>> + *
>> + * and when the block grouping extension is enabled :
>> + *
>> + *   4Bit chip number||0x001||7Bit Thread number.
>> + */
>> +static uint32_t hw_cam_line(uint8_t chip_id, uint8_t tid)
>> +{
>> +    bool block_group = false; /* TODO (PowerNV) */
>> +
>> +    if (block_group) {
>> +        return 1 << 11 | (chip_id & 0xf) << 7 | (tid & 0x7f);
>> +    } else {
>> +        return (chip_id & 0xf) << 11 | 1 << 7 | (tid & 0x7f);
>> +    }
>> +}
>> +
>>  static void xive_tctx_reset(void *dev)
>>  {
>>      XiveTCTX *tctx = XIVE_TCTX(dev);
>> @@ -388,6 +407,12 @@ static void xive_tctx_realize(DeviceState *dev, Error 
>> **errp)
>>      cpu = POWERPC_CPU(obj);
>>      tctx->cs = CPU(obj);
>>  
>> +    if (!tctx->hw_cam) {
>> +        error_setg(errp, "XIVE: HW CAM is not set for CPU %d",
>> +                   tctx->cs->cpu_index);
> 
> You could do this at realize, rather than reset, couldn't you?

yes but I will remove "hw-cam" I think.
 
>> +        return;
>> +    }
>> +
>>      env = &cpu->env;
>>      switch (PPC_INPUT(env)) {
>>      case PPC_FLAGS_INPUT_POWER7:
>> @@ -418,11 +443,17 @@ static const VMStateDescription vmstate_xive_tctx = {
>>      },
>>  };
>>  
>> +static Property  xive_tctx_properties[] = {
>> +    DEFINE_PROP_UINT32("hw-cam", XiveTCTX, hw_cam, 0),
>> +    DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>>  static void xive_tctx_class_init(ObjectClass *klass, void *data)
>>  {
>>      DeviceClass *dc = DEVICE_CLASS(klass);
>>  
>>      dc->desc = "XIVE Interrupt Thread Context";
>> +    dc->props = xive_tctx_properties;
>>      dc->realize = xive_tctx_realize;
>>      dc->unrealize = xive_tctx_unrealize;
>>      dc->vmsd = &vmstate_xive_tctx;
>> @@ -978,6 +1009,194 @@ int xive_router_write_end(XiveRouter *xrtr, uint8_t 
>> end_blk, uint32_t end_idx,
>>     return xrc->write_end(xrtr, end_blk, end_idx, end, word_number);
>>  }
>>  
>> +int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
>> +                        XiveNVT *nvt)
>> +{
>> +   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
>> +
>> +   return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt);
>> +}
>> +
>> +int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t 
>> nvt_idx,
>> +                        XiveNVT *nvt, uint8_t word_number)
>> +{
>> +   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
>> +
>> +   return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number);
>> +}
>> +
>> +/*
>> + * The thread context register words are in big-endian format.
>> + */
>> +static int xive_presenter_tctx_match(XiveTCTX *tctx, uint8_t format,
>> +                                     uint8_t nvt_blk, uint32_t nvt_idx,
>> +                                     bool cam_ignore, uint32_t logic_serv)
>> +{
>> +    uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx);
>> +    uint8_t *regs;
>> +    uint32_t qw3w2;
>> +    uint32_t qw2w2;
>> +    uint32_t qw1w2;
>> +    uint32_t qw0w2;
>> +
>> +    /* TODO (PowerNV): ignore low order bits of nvt id */
>> +
>> +    regs = &tctx->regs[TM_QW3_HV_PHYS];
>> +    qw3w2 = be32_to_cpu(*((uint32_t *) &regs[TM_WORD2]));
> 
> This is one of the main places we access regs and we have to do
> horrible casting.  Would it make more sense for it to be a uint32_t
> array?  Or at least for the local *regs to be.

The register array is accessed by byte (patch 9) for the first two 
words and by word for WORD2. I don't see any good solution apart 
from a helper routine maybe : 

  static inline uint32_t xive_tctx_word2(int8_t *regs)
  {
      return be32_to_cpu(*((uint32_t *) &regs[TM_WORD2]));
  }

which I need for xive_tctx_ring_print() also.
 
>> +    regs = &tctx->regs[TM_QW2_HV_POOL];
>> +    qw2w2 = be32_to_cpu(*((uint32_t *) &regs[TM_WORD2]));
>> +    regs = &tctx->regs[TM_QW1_OS];
>> +    qw1w2 = be32_to_cpu(*((uint32_t *) &regs[TM_WORD2]));
>> +    regs = &tctx->regs[TM_QW0_USER];
>> +    qw0w2 = be32_to_cpu(*((uint32_t *) &regs[TM_WORD2]));
>> +
>> +    if (format == 0) {
>> +        /* F=0 & i=1: Logical server notification */
> 
> I'm guessing the i=1 is the cam_ignore==true check?  Maybe put this
> comment inside the if block to make that clearer.

yes. 

> 
>> +        if (cam_ignore == true) {
>> +            qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
>> +                          nvt_blk, nvt_idx);
>> +             return -1;
>> +        }
>> +
>> +        /* F=0 & i=0: Specific NVT notification */
>> +
>> +        /* PHYS ring */
>> +        if ((qw3w2 & TM_QW3W2_VT) &&
>> +            tctx->hw_cam == hw_cam_line(nvt_blk, nvt_idx)) {
>> +            return TM_QW3_HV_PHYS;
>> +        }
>> +
>> +        /* HV POOL ring */
>> +        if ((qw2w2 & TM_QW2W2_VP) &&
>> +            cam == GETFIELD(TM_QW2W2_POOL_CAM, qw2w2)) {
> 
> Does that need to be a GETFIELD_BE32?

the qw[0123]w2 variables have been byteswapped already. But, that might
not be a good idea. in that case, we should byteswap the V[TPOU] bit value 
instead ? What's your opinion.

we would get rid of the be32_to_cpu() above 

> 
>> +            return TM_QW2_HV_POOL;
>> +        }
>> +
>> +        /* OS ring */
>> +        if ((qw1w2 & TM_QW1W2_VO) &&
>> +            cam == GETFIELD(TM_QW1W2_OS_CAM, qw1w2)) {
> 
> And here.
> 
>> +            return TM_QW1_OS;
>> +        }
>> +    } else {
>> +        /* F=1 : User level Event-Based Branch (EBB) notification */
>> +
>> +        /* USER ring */
>> +        if  ((qw1w2 & TM_QW1W2_VO) &&
>> +             (cam == GETFIELD(TM_QW1W2_OS_CAM, qw1w2)) &&
> 
> And here.
> 
>> +             (qw0w2 & TM_QW0W2_VU) &&
>> +             (logic_serv == GETFIELD(TM_QW0W2_LOGIC_SERV, qw0w2))) {
>> +            return TM_QW0_USER;
>> +        }
>> +    }
>> +    return -1;
>> +}
>> +
>> +typedef struct XiveTCTXMatch {
>> +    XiveTCTX *tctx;
>> +    uint8_t ring;
>> +} XiveTCTXMatch;
>> +
>> +static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
>> +                                 uint8_t nvt_blk, uint32_t nvt_idx,
>> +                                 bool cam_ignore, uint8_t priority,
>> +                                 uint32_t logic_serv, XiveTCTXMatch *match)
>> +{
>> +    CPUState *cs;
>> +
>> +    /* TODO (PowerNV): handle chip_id overwrite of block field for
>> +     * hardwired CAM compares */
>> +
>> +    CPU_FOREACH(cs) {
>> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
>> +        XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
>> +        int ring;
>> +
>> +        /*
>> +         * HW checks that the CPU is enabled in the Physical Thread
>> +         * Enable Register (PTER).
>> +         */
>> +
>> +        /*
>> +         * Check the thread context CAM lines and record matches. We
>> +         * will handle CPU exception delivery later
>> +         */
>> +        ring = xive_presenter_tctx_match(tctx, format, nvt_blk, nvt_idx,
>> +                                         cam_ignore, logic_serv);
>> +        /*
>> +         * Save the context and follow on to catch duplicates, that we
>> +         * don't support yet.
>> +         */
>> +        if (ring != -1) {
>> +            if (match->tctx) {
>> +                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a 
>> thread "
>> +                              "context NVT %x/%x\n", nvt_blk, nvt_idx);
>> +                return false;
>> +            }
>> +
>> +            match->ring = ring;
>> +            match->tctx = tctx;
>> +        }
>> +    }
>> +
>> +    if (!match->tctx) {
>> +        qemu_log_mask(LOG_UNIMP, "XIVE: NVT %x/%x is not dispatched\n",
>> +                      nvt_blk, nvt_idx);
>> +        return false;
>> +    }
>> +
>> +    return true;
>> +}
>> +
>> +/*
>> + * This is our simple Xive Presenter Engine model. It is merged in the
>> + * Router as it does not require an extra object.
>> + *
>> + * It receives notification requests sent by the IVRE to find one
>> + * matching NVT (or more) dispatched on the processor threads. In case
>> + * of a single NVT notification, the process is abreviated and the
>> + * thread is signaled if a match is found. In case of a logical server
>> + * notification (bits ignored at the end of the NVT identifier), the
>> + * IVPE and IVRE select a winning thread using different filters. This
>> + * involves 2 or 3 exchanges on the PowerBus that the model does not
>> + * support.
>> + *
>> + * The parameters represent what is sent on the PowerBus
>> + */
>> +static void xive_presenter_notify(XiveRouter *xrtr, uint8_t format,
>> +                                  uint8_t nvt_blk, uint32_t nvt_idx,
>> +                                  bool cam_ignore, uint8_t priority,
>> +                                  uint32_t logic_serv)
>> +{
>> +    XiveNVT nvt;
>> +    XiveTCTXMatch match = { 0 };
> 
> IIUC that's initializing the tctx pointer field of match, so should be
> NULL, not 0 (yes, technically they're equivalent in C, but using 0 for
> a pointer is confusing).

OK. I will clarify.

> 
>> +    bool found;
>> +
>> +    /* NVT cache lookup */
>> +    if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n",
>> +                      nvt_blk, nvt_idx);
>> +        return;
>> +    }
>> +
>> +    if (!xive_nvt_is_valid(&nvt)) {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n",
>> +                      nvt_blk, nvt_idx);
>> +        return;
>> +    }
>> +
>> +    found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore,
>> +                                 priority, logic_serv, &match);
>> +    if (found) {
>> +        return;
>> +    }
>> +
>> +    /* If no matching NVT is dispatched on a HW thread :
>> +     * - update the NVT structure if backlog is activated
>> +     * - escalate (ESe PQ bits and EAS in w4-5) if escalation is
>> +     *   activated
>> +     */
>> +}
>> +
>>  /*
>>   * An END trigger can come from an event trigger (IPI or HW) or from
>>   * another chip. We don't model the PowerBus but the END trigger
>> @@ -1047,6 +1266,14 @@ static void xive_router_end_notify(XiveRouter *xrtr, 
>> uint8_t end_blk,
>>      /*
>>       * Follows IVPE notification
>>       */
>> +    xive_presenter_notify(xrtr, format,
>> +                          GETFIELD_BE32(END_W6_NVT_BLOCK, end.w6),
>> +                          GETFIELD_BE32(END_W6_NVT_INDEX, end.w6),
>> +                          GETFIELD_BE32(END_W7_F0_IGNORE, end.w7),
>> +                          priority,
>> +                          GETFIELD_BE32(END_W7_F1_LOG_SERVER_ID, end.w7));
>> +
>> +    /* TODO: Auto EOI. */
>>  }
>>  
>>  static void xive_router_notify(XiveNotifier *xn, uint32_t lisn)
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]