qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v6 15/19] vfio-user: handle device interrupts


From: Stefan Hajnoczi
Subject: Re: [PATCH v6 15/19] vfio-user: handle device interrupts
Date: Mon, 7 Mar 2022 10:24:17 +0000

On Thu, Feb 17, 2022 at 02:49:02AM -0500, Jagannathan Raman wrote:
> Forward remote device's interrupts to the guest
> 
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
>  include/hw/pci/pci.h              |   6 ++
>  include/hw/remote/vfio-user-obj.h |   6 ++
>  hw/pci/msi.c                      |  13 +++-
>  hw/pci/msix.c                     |  12 +++-
>  hw/remote/machine.c               |  11 +--
>  hw/remote/vfio-user-obj.c         | 107 ++++++++++++++++++++++++++++++
>  stubs/vfio-user-obj.c             |   6 ++
>  MAINTAINERS                       |   1 +
>  hw/remote/trace-events            |   1 +
>  stubs/meson.build                 |   1 +
>  10 files changed, 158 insertions(+), 6 deletions(-)
>  create mode 100644 include/hw/remote/vfio-user-obj.h
>  create mode 100644 stubs/vfio-user-obj.c
> 
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index c3f3c90473..d42d526a48 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -129,6 +129,8 @@ typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev,
>  typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
>                                  pcibus_t addr, pcibus_t size, int type);
>  typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
> +typedef void PCIMSINotify(PCIDevice *pci_dev, unsigned vector);
> +typedef void PCIMSIxNotify(PCIDevice *pci_dev, unsigned vector);
>  
>  typedef struct PCIIORegion {
>      pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
> @@ -323,6 +325,10 @@ struct PCIDevice {
>      /* Space to store MSIX table & pending bit array */
>      uint8_t *msix_table;
>      uint8_t *msix_pba;
> +
> +    PCIMSINotify *msi_notify;
> +    PCIMSIxNotify *msix_notify;
> +
>      /* MemoryRegion container for msix exclusive BAR setup */
>      MemoryRegion msix_exclusive_bar;
>      /* Memory Regions for MSIX table and pending bit entries. */
> diff --git a/include/hw/remote/vfio-user-obj.h 
> b/include/hw/remote/vfio-user-obj.h
> new file mode 100644
> index 0000000000..87ab78b875
> --- /dev/null
> +++ b/include/hw/remote/vfio-user-obj.h
> @@ -0,0 +1,6 @@
> +#ifndef VFIO_USER_OBJ_H
> +#define VFIO_USER_OBJ_H
> +
> +void vfu_object_set_bus_irq(PCIBus *pci_bus);
> +
> +#endif
> diff --git a/hw/pci/msi.c b/hw/pci/msi.c
> index 47d2b0f33c..93f5e400cc 100644
> --- a/hw/pci/msi.c
> +++ b/hw/pci/msi.c
> @@ -51,6 +51,8 @@
>   */
>  bool msi_nonbroken;
>  
> +static void pci_msi_notify(PCIDevice *dev, unsigned int vector);
> +
>  /* If we get rid of cap allocator, we won't need this. */
>  static inline uint8_t msi_cap_sizeof(uint16_t flags)
>  {
> @@ -225,6 +227,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
>      dev->msi_cap = config_offset;
>      dev->cap_present |= QEMU_PCI_CAP_MSI;
>  
> +    dev->msi_notify = pci_msi_notify;

Are you sure it's correct to skip the msi_is_masked() logic? I think the
callback function should only override the behavior of
msi_send_message(), not the entire msi_notify() function.

The same applies to MSI-X.

> +
>      pci_set_word(dev->config + msi_flags_off(dev), flags);
>      pci_set_word(dev->wmask + msi_flags_off(dev),
>                   PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
> @@ -307,7 +311,7 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int 
> vector)
>      return mask & (1U << vector);
>  }
>  
> -void msi_notify(PCIDevice *dev, unsigned int vector)
> +static void pci_msi_notify(PCIDevice *dev, unsigned int vector)
>  {
>      uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
>      bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> @@ -332,6 +336,13 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
>      msi_send_message(dev, msg);
>  }
>  
> +void msi_notify(PCIDevice *dev, unsigned int vector)
> +{
> +    if (dev->msi_notify) {

Can this ever be NULL?

> +        dev->msi_notify(dev, vector);
> +    }
> +}
> +
>  void msi_send_message(PCIDevice *dev, MSIMessage msg)
>  {
>      MemTxAttrs attrs = {};
> diff --git a/hw/pci/msix.c b/hw/pci/msix.c
> index ae9331cd0b..1c71e67f53 100644
> --- a/hw/pci/msix.c
> +++ b/hw/pci/msix.c
> @@ -31,6 +31,8 @@
>  #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
>  #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
>  
> +static void pci_msix_notify(PCIDevice *dev, unsigned vector);
> +
>  MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>  {
>      uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
> @@ -334,6 +336,7 @@ int msix_init(struct PCIDevice *dev, unsigned short 
> nentries,
>      dev->msix_table = g_malloc0(table_size);
>      dev->msix_pba = g_malloc0(pba_size);
>      dev->msix_entry_used = g_malloc0(nentries * sizeof 
> *dev->msix_entry_used);
> +    dev->msix_notify = pci_msix_notify;
>  
>      msix_mask_all(dev, nentries);
>  
> @@ -485,7 +488,7 @@ int msix_enabled(PCIDevice *dev)
>  }
>  
>  /* Send an MSI-X message */
> -void msix_notify(PCIDevice *dev, unsigned vector)
> +static void pci_msix_notify(PCIDevice *dev, unsigned vector)
>  {
>      MSIMessage msg;
>  
> @@ -503,6 +506,13 @@ void msix_notify(PCIDevice *dev, unsigned vector)
>      msi_send_message(dev, msg);
>  }
>  
> +void msix_notify(PCIDevice *dev, unsigned vector)
> +{
> +    if (dev->msix_notify) {

Can this ever be NULL?

> +        dev->msix_notify(dev, vector);
> +    }
> +}
> +
>  void msix_reset(PCIDevice *dev)
>  {
>      if (!msix_present(dev)) {
> diff --git a/hw/remote/machine.c b/hw/remote/machine.c
> index db4ae30710..a8b4a3aef3 100644
> --- a/hw/remote/machine.c
> +++ b/hw/remote/machine.c
> @@ -23,6 +23,7 @@
>  #include "hw/remote/iohub.h"
>  #include "hw/qdev-core.h"
>  #include "hw/remote/iommu.h"
> +#include "hw/remote/vfio-user-obj.h"
>  
>  static void remote_machine_init(MachineState *machine)
>  {
> @@ -54,12 +55,14 @@ static void remote_machine_init(MachineState *machine)
>  
>      if (s->vfio_user) {
>          remote_configure_iommu(pci_host->bus);
> -    }
>  
> -    remote_iohub_init(&s->iohub);
> +        vfu_object_set_bus_irq(pci_host->bus);
> +    } else {
> +        remote_iohub_init(&s->iohub);
>  
> -    pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
> -                 &s->iohub, REMOTE_IOHUB_NB_PIRQS);
> +        pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, 
> remote_iohub_map_irq,
> +                     &s->iohub, REMOTE_IOHUB_NB_PIRQS);
> +    }
>  
>      qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s));
>  }
> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
> index 2feabd06a4..d79bab87f1 100644
> --- a/hw/remote/vfio-user-obj.c
> +++ b/hw/remote/vfio-user-obj.c
> @@ -54,6 +54,9 @@
>  #include "hw/pci/pci.h"
>  #include "qemu/timer.h"
>  #include "exec/memory.h"
> +#include "hw/pci/msi.h"
> +#include "hw/pci/msix.h"
> +#include "hw/remote/vfio-user-obj.h"
>  
>  #define TYPE_VFU_OBJECT "x-vfio-user-server"
>  OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
> @@ -107,6 +110,10 @@ struct VfuObject {
>      int vfu_poll_fd;
>  };
>  
> +static GHashTable *vfu_object_bdf_to_ctx_table;

I suggest adding a void *msi_notify_opaque field to PCIDevice and
passing the value as an argument to ->msi_notify(). vfio-user-obj.c can
set the value to vfu_ctx and eliminate the vfu_object_bdf_to_ctx_table
hash table.

This simplifies the code, makes it faster, and solves race conditions
during hot plug/unplug if other instances are running in IOThreads.

> +
> +#define INT2VOIDP(i) (void *)(uintptr_t)(i)
> +
>  static void vfu_object_init_ctx(VfuObject *o, Error **errp);
>  
>  static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
> @@ -463,6 +470,86 @@ static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, 
> PCIDevice *pdev)
>      }
>  }
>  
> +static void vfu_object_irq_trigger(int pci_bdf, unsigned vector)
> +{
> +    vfu_ctx_t *vfu_ctx = NULL;
> +
> +    if (!vfu_object_bdf_to_ctx_table) {

Can this ever be NULL?

> +        return;
> +    }
> +
> +    vfu_ctx = g_hash_table_lookup(vfu_object_bdf_to_ctx_table,
> +                                  INT2VOIDP(pci_bdf));
> +
> +    if (vfu_ctx) {
> +        vfu_irq_trigger(vfu_ctx, vector);
> +    }
> +}
> +
> +static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
> +{
> +    int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
> +                                pci_dev->devfn);
> +
> +    return pci_bdf;
> +}
> +
> +static void vfu_object_set_irq(void *opaque, int pirq, int level)
> +{
> +    if (level) {
> +        vfu_object_irq_trigger(pirq, 0);
> +    }
> +}
> +
> +static void vfu_object_msi_notify(PCIDevice *pci_dev, unsigned vector)
> +{
> +    int pci_bdf;
> +
> +    pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), 
> pci_dev->devfn);
> +
> +    vfu_object_irq_trigger(pci_bdf, vector);
> +}
> +
> +static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
> +{
> +    vfu_ctx_t *vfu_ctx = o->vfu_ctx;
> +    int ret, pci_bdf;
> +
> +    ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    ret = 0;
> +    if (msix_nr_vectors_allocated(pci_dev)) {
> +        ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
> +                                       msix_nr_vectors_allocated(pci_dev));
> +
> +        pci_dev->msix_notify = vfu_object_msi_notify;
> +    } else if (msi_nr_vectors_allocated(pci_dev)) {
> +        ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
> +                                       msi_nr_vectors_allocated(pci_dev));
> +
> +        pci_dev->msi_notify = vfu_object_msi_notify;
> +    }
> +
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), 
> pci_dev->devfn);
> +
> +    g_hash_table_insert(vfu_object_bdf_to_ctx_table, INT2VOIDP(pci_bdf),
> +                        o->vfu_ctx);
> +
> +    return 0;
> +}
> +
> +void vfu_object_set_bus_irq(PCIBus *pci_bus)
> +{
> +    pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, NULL, 1);
> +}
> +
>  /*
>   * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
>   * properties. It also depends on devices instantiated in QEMU. These
> @@ -559,6 +646,13 @@ static void vfu_object_init_ctx(VfuObject *o, Error 
> **errp)
>  
>      vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
>  
> +    ret = vfu_object_setup_irqs(o, o->pci_dev);
> +    if (ret < 0) {
> +        error_setg(errp, "vfu: Failed to setup interrupts for %s",
> +                   o->device);
> +        goto fail;
> +    }
> +
>      ret = vfu_realize_ctx(o->vfu_ctx);
>      if (ret < 0) {
>          error_setg(errp, "vfu: Failed to realize device %s- %s",
> @@ -612,6 +706,7 @@ static void vfu_object_finalize(Object *obj)
>  {
>      VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
>      VfuObject *o = VFU_OBJECT(obj);
> +    int pci_bdf;
>  
>      k->nr_devs--;
>  
> @@ -638,9 +733,17 @@ static void vfu_object_finalize(Object *obj)
>          o->unplug_blocker = NULL;
>      }
>  
> +    if (o->pci_dev) {
> +        pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(o->pci_dev)),
> +                                o->pci_dev->devfn);
> +        g_hash_table_remove(vfu_object_bdf_to_ctx_table, INT2VOIDP(pci_bdf));
> +    }
> +
>      o->pci_dev = NULL;
>  
>      if (!k->nr_devs && k->auto_shutdown) {
> +        g_hash_table_destroy(vfu_object_bdf_to_ctx_table);
> +        vfu_object_bdf_to_ctx_table = NULL;
>          qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
>      }
>  
> @@ -658,6 +761,10 @@ static void vfu_object_class_init(ObjectClass *klass, 
> void *data)
>  
>      k->auto_shutdown = true;
>  
> +    msi_nonbroken = true;

This should go in hw/remote/machine.c. It's a global variable related to
the machine's interrupt controller capabilities. The value is not
related to vfu_object_class_init(), which will be called by any QEMU
binary that links hw/remote/vfio-user-obj.o regardless of which machine
type is instantiated.

> +
> +    vfu_object_bdf_to_ctx_table = g_hash_table_new_full(NULL, NULL, NULL, 
> NULL);
> +
>      object_class_property_add(klass, "socket", "SocketAddress", NULL,
>                                vfu_object_set_socket, NULL, NULL);
>      object_class_property_set_description(klass, "socket",
> diff --git a/stubs/vfio-user-obj.c b/stubs/vfio-user-obj.c
> new file mode 100644
> index 0000000000..79100d768e
> --- /dev/null
> +++ b/stubs/vfio-user-obj.c
> @@ -0,0 +1,6 @@
> +#include "qemu/osdep.h"
> +#include "hw/remote/vfio-user-obj.h"
> +
> +void vfu_object_set_bus_irq(PCIBus *pci_bus)
> +{
> +}
> diff --git a/MAINTAINERS b/MAINTAINERS
> index f47232c78c..e274cb46af 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -3569,6 +3569,7 @@ F: hw/remote/iohub.c
>  F: include/hw/remote/iohub.h
>  F: subprojects/libvfio-user
>  F: hw/remote/vfio-user-obj.c
> +F: include/hw/remote/vfio-user-obj.h
>  F: hw/remote/iommu.c
>  F: include/hw/remote/iommu.h
>  
> diff --git a/hw/remote/trace-events b/hw/remote/trace-events
> index 847d50d88f..c167b3c7a5 100644
> --- a/hw/remote/trace-events
> +++ b/hw/remote/trace-events
> @@ -12,3 +12,4 @@ vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 
> 0x%"PRIx64""
>  vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 
> 0x%"PRIx64" size 0x%"PRIx64""
>  vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR 
> address 0x%"PRIx64""
>  vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR 
> address 0x%"PRIx64""
> +vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d"
> diff --git a/stubs/meson.build b/stubs/meson.build
> index d359cbe1ad..c5ce979dc3 100644
> --- a/stubs/meson.build
> +++ b/stubs/meson.build
> @@ -57,3 +57,4 @@ if have_system
>  else
>    stub_ss.add(files('qdev.c'))
>  endif
> +stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: 
> files('vfio-user-obj.c'))
> -- 
> 2.20.1
> 

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]