[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [PATCH] spapr-pci: rework MSI/MSIX
From: |
Michael S. Tsirkin |
Subject: |
Re: [Qemu-ppc] [PATCH] spapr-pci: rework MSI/MSIX |
Date: |
Thu, 1 Aug 2013 12:01:27 +0300 |
On Fri, Jul 12, 2013 at 05:38:24PM +1000, Alexey Kardashevskiy wrote:
> On the sPAPR platform a guest allocates MSI/MSIX vectors via RTAS
> hypercalls which return global IRQ numbers to a guest so it only
> operates with those and never touches MSIMessage.
>
> Therefore MSIMessage handling is completely hidden in QEMU.
>
> Previously every sPAPR PCI host bridge implemented its own MSI window
> to catch msi_notify()/msix_notify() calls from QEMU devices (virtio-pci
> or vfio) and route them to the guest via qemu_pulse_irq().
> MSIMessage used to be encoded as:
> .addr - address within the PHB MSI window;
> .data - the device index on PHB plus vector number.
> The MSI MR write function translated this MSIMessage to a global IRQ
> number and called qemu_pulse_irq().
>
> However the total number of IRQs is not really big (at the moment it is
> 1024 IRQs starting from 4096) and even 16bit data field of MSIMessage
> seems to be enough to store an IRQ number there.
>
> This simplifies MSI handling in sPAPR PHB. Specifically, this does:
> 1. remove a MSI window from a PHB;
> 2. add a single memory region for all MSIs to sPAPREnvironment
> and spapr_pci_msi_init() to initialize it;
> 3. encode MSIMessage as:
> * .addr - a fixed address of SPAPR_PCI_MSI_WINDOW==0x40000000000ULL;
> * .data as an IRQ number.
> 4. change IRQ allocator to align first IRQ number in a block for MSI.
> MSI uses lower bits to specify the vector number so the first IRQ has to
> be aligned. MSIX does not need any special allocator though.
>
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
Acked-by: Michael S. Tsirkin <address@hidden>
> ---
> hw/ppc/spapr.c | 29 ++++++++++++++++++---
> hw/ppc/spapr_pci.c | 61
> ++++++++++++++++++++-------------------------
> include/hw/pci-host/spapr.h | 8 +++---
> include/hw/ppc/spapr.h | 4 ++-
> 4 files changed, 60 insertions(+), 42 deletions(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 671bbd9..6b21191 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -88,6 +88,9 @@ int spapr_allocate_irq(int hint, bool lsi)
>
> if (hint) {
> irq = hint;
> + if (hint >= spapr->next_irq) {
> + spapr->next_irq = hint + 1;
> + }
> /* FIXME: we should probably check for collisions somehow */
> } else {
> irq = spapr->next_irq++;
> @@ -103,22 +106,39 @@ int spapr_allocate_irq(int hint, bool lsi)
> return irq;
> }
>
> -/* Allocate block of consequtive IRQs, returns a number of the first */
> -int spapr_allocate_irq_block(int num, bool lsi)
> +/*
> + * Allocate block of consequtive IRQs, returns a number of the first.
> + * If msi==true, aligns the first IRQ number to num.
> + */
> +int spapr_allocate_irq_block(int num, bool lsi, bool msi)
> {
> int first = -1;
> - int i;
> + int i, hint = 0;
> +
> + /*
> + * MSIMesage::data is used for storing VIRQ so
> + * it has to be aligned to num to support multiple
> + * MSI vectors. MSI-X is not affected by this.
> + * The hint is used for the first IRQ, the rest should
> + * be allocated continously.
> + */
> + if (msi) {
> + assert((num == 1) || (num == 2) || (num == 4) ||
> + (num == 8) || (num == 16) || (num == 32));
> + hint = (spapr->next_irq + num - 1) & ~(num - 1);
> + }
>
> for (i = 0; i < num; ++i) {
> int irq;
>
> - irq = spapr_allocate_irq(0, lsi);
> + irq = spapr_allocate_irq(hint, lsi);
> if (!irq) {
> return -1;
> }
>
> if (0 == i) {
> first = irq;
> + hint = 0;
> }
>
> /* If the above doesn't create a consecutive block then that's
> @@ -1127,6 +1147,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
> spapr_create_nvram(spapr);
>
> /* Set up PCI */
> + spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
> spapr_pci_rtas_init();
>
> phb = spapr_create_phb(spapr, 0);
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index dfe4d04..c8ea777 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -258,11 +258,11 @@ static int spapr_msicfg_find(sPAPRPHBState *phb,
> uint32_t config_addr,
> * This is required for msi_notify()/msix_notify() which
> * will write at the addresses via spapr_msi_write().
> */
> -static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr,
> - bool msix, unsigned req_num)
> +static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
> + unsigned first_irq, unsigned req_num)
> {
> unsigned i;
> - MSIMessage msg = { .address = addr, .data = 0 };
> + MSIMessage msg = { .address = addr, .data = first_irq };
>
> if (!msix) {
> msi_set_message(pdev, msg);
> @@ -270,8 +270,7 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr,
> return;
> }
>
> - for (i = 0; i < req_num; ++i) {
> - msg.address = addr | (i << 2);
> + for (i = 0; i < req_num; ++i, ++msg.data) {
> msix_set_message(pdev, i, msg);
> trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
> }
> @@ -351,7 +350,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
>
> /* There is no cached config, allocate MSIs */
> if (!phb->msi_table[ndev].nvec) {
> - irq = spapr_allocate_irq_block(req_num, false);
> + irq = spapr_allocate_irq_block(req_num, false,
> + ret_intr_type == RTAS_TYPE_MSI);
> if (irq < 0) {
> fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev);
> rtas_st(rets, 0, -1); /* Hardware error */
> @@ -363,8 +363,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> }
>
> /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
> - spapr_msi_setmsg(pdev, phb->msi_win_addr | (ndev << 16),
> - ret_intr_type == RTAS_TYPE_MSIX, req_num);
> + spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type ==
> RTAS_TYPE_MSIX,
> + phb->msi_table[ndev].irq, req_num);
>
> rtas_st(rets, 0, 0);
> rtas_st(rets, 1, req_num);
> @@ -487,10 +487,7 @@ static const MemoryRegionOps spapr_io_ops = {
> static void spapr_msi_write(void *opaque, hwaddr addr,
> uint64_t data, unsigned size)
> {
> - sPAPRPHBState *phb = opaque;
> - int ndev = addr >> 16;
> - int vec = ((addr & 0xFFFF) >> 2) | data;
> - uint32_t irq = phb->msi_table[ndev].irq + vec;
> + uint32_t irq = data;
>
> trace_spapr_pci_msi_write(addr, data, irq);
>
> @@ -504,6 +501,23 @@ static const MemoryRegionOps spapr_msi_ops = {
> .endianness = DEVICE_LITTLE_ENDIAN
> };
>
> +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
> +{
> + /*
> + * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
> + * we need to allocate some memory to catch those writes coming
> + * from msi_notify()/msix_notify().
> + * As MSIMessage:addr is going to be the same and MSIMessage:data
> + * is going to be a VIRQ number, 4 bytes of the MSI MR will only
> + * be used.
> + */
> + spapr->msi_win_addr = addr;
> + memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
> + "msi", getpagesize());
> + memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
> + &spapr->msiwindow);
> +}
> +
> /*
> * PHB PCI device
> */
> @@ -528,8 +542,7 @@ static int spapr_phb_init(SysBusDevice *s)
>
> if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
> || (sphb->mem_win_addr != -1)
> - || (sphb->io_win_addr != -1)
> - || (sphb->msi_win_addr != -1)) {
> + || (sphb->io_win_addr != -1)) {
> fprintf(stderr, "Either \"index\" or other parameters must"
> " be specified for PAPR PHB, not both\n");
> return -1;
> @@ -542,7 +555,6 @@ static int spapr_phb_init(SysBusDevice *s)
> + sphb->index * SPAPR_PCI_WINDOW_SPACING;
> sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
> sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
> - sphb->msi_win_addr = windows_base + SPAPR_PCI_MSI_WIN_OFF;
> }
>
> if (sphb->buid == -1) {
> @@ -565,11 +577,6 @@ static int spapr_phb_init(SysBusDevice *s)
> return -1;
> }
>
> - if (sphb->msi_win_addr == -1) {
> - fprintf(stderr, "MSI window address not specified for PHB\n");
> - return -1;
> - }
> -
> if (find_phb(spapr, sphb->buid)) {
> fprintf(stderr, "PCI host bridges must have unique BUIDs\n");
> return -1;
> @@ -609,18 +616,6 @@ static int spapr_phb_init(SysBusDevice *s)
> namebuf, SPAPR_PCI_IO_WIN_SIZE);
> memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
> &sphb->iowindow);
> -
> - /* As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
> - * we need to allocate some memory to catch those writes coming
> - * from msi_notify()/msix_notify() */
> - if (msi_supported) {
> - sprintf(namebuf, "%s.msi", sphb->dtbusname);
> - memory_region_init_io(&sphb->msiwindow, OBJECT(sphb),
> &spapr_msi_ops, sphb,
> - namebuf, SPAPR_MSIX_MAX_DEVS * 0x10000);
> - memory_region_add_subregion(get_system_memory(), sphb->msi_win_addr,
> - &sphb->msiwindow);
> - }
> -
> /*
> * Selecting a busname is more complex than you'd think, due to
> * interacting constraints. If the user has specified an id
> @@ -695,7 +690,6 @@ static Property spapr_phb_properties[] = {
> DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
> DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size,
> SPAPR_PCI_IO_WIN_SIZE),
> - DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, -1),
> DEFINE_PROP_END_OF_LIST(),
> };
>
> @@ -737,7 +731,6 @@ static const VMStateDescription vmstate_spapr_pci = {
> VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
> VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
> VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
> - VMSTATE_UINT64_EQUAL(msi_win_addr, sPAPRPHBState),
> VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
> vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
> VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS,
> 0,
> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> index 93f9511..970b4a9 100644
> --- a/include/hw/pci-host/spapr.h
> +++ b/include/hw/pci-host/spapr.h
> @@ -43,8 +43,7 @@ typedef struct sPAPRPHBState {
>
> MemoryRegion memspace, iospace;
> hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
> - hwaddr msi_win_addr;
> - MemoryRegion memwindow, iowindow, msiwindow;
> + MemoryRegion memwindow, iowindow;
>
> uint32_t dma_liobn;
> uint64_t dma_window_start;
> @@ -73,7 +72,8 @@ typedef struct sPAPRPHBState {
> #define SPAPR_PCI_MMIO_WIN_SIZE 0x20000000
> #define SPAPR_PCI_IO_WIN_OFF 0x80000000
> #define SPAPR_PCI_IO_WIN_SIZE 0x10000
> -#define SPAPR_PCI_MSI_WIN_OFF 0x90000000
> +
> +#define SPAPR_PCI_MSI_WINDOW 0x40000000000ULL
>
> #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL
>
> @@ -88,6 +88,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> uint32_t xics_phandle,
> void *fdt);
>
> +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr);
> +
> void spapr_pci_rtas_init(void);
>
> #endif /* __HW_SPAPR_PCI_H__ */
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 7aa9a3d..aed02e1 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -14,6 +14,8 @@ struct icp_state;
> typedef struct sPAPREnvironment {
> struct VIOsPAPRBus *vio_bus;
> QLIST_HEAD(, sPAPRPHBState) phbs;
> + hwaddr msi_win_addr;
> + MemoryRegion msiwindow;
> struct sPAPRNVRAM *nvram;
> struct icp_state *icp;
>
> @@ -303,7 +305,7 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu,
> target_ulong opcode,
> target_ulong *args);
>
> int spapr_allocate_irq(int hint, bool lsi);
> -int spapr_allocate_irq_block(int num, bool lsi);
> +int spapr_allocate_irq_block(int num, bool lsi, bool msi);
>
> static inline int spapr_allocate_msi(int hint)
> {
> --
> 1.8.3.2
>
- Re: [Qemu-ppc] [PATCH] spapr-pci: rework MSI/MSIX,
Michael S. Tsirkin <=