[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v6 15/19] vfio-user: handle device interrupts
From: |
Stefan Hajnoczi |
Subject: |
Re: [PATCH v6 15/19] vfio-user: handle device interrupts |
Date: |
Mon, 7 Mar 2022 10:24:17 +0000 |
On Thu, Feb 17, 2022 at 02:49:02AM -0500, Jagannathan Raman wrote:
> Forward remote device's interrupts to the guest
>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
> include/hw/pci/pci.h | 6 ++
> include/hw/remote/vfio-user-obj.h | 6 ++
> hw/pci/msi.c | 13 +++-
> hw/pci/msix.c | 12 +++-
> hw/remote/machine.c | 11 +--
> hw/remote/vfio-user-obj.c | 107 ++++++++++++++++++++++++++++++
> stubs/vfio-user-obj.c | 6 ++
> MAINTAINERS | 1 +
> hw/remote/trace-events | 1 +
> stubs/meson.build | 1 +
> 10 files changed, 158 insertions(+), 6 deletions(-)
> create mode 100644 include/hw/remote/vfio-user-obj.h
> create mode 100644 stubs/vfio-user-obj.c
>
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index c3f3c90473..d42d526a48 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -129,6 +129,8 @@ typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev,
> typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
> pcibus_t addr, pcibus_t size, int type);
> typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
> +typedef void PCIMSINotify(PCIDevice *pci_dev, unsigned vector);
> +typedef void PCIMSIxNotify(PCIDevice *pci_dev, unsigned vector);
>
> typedef struct PCIIORegion {
> pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
> @@ -323,6 +325,10 @@ struct PCIDevice {
> /* Space to store MSIX table & pending bit array */
> uint8_t *msix_table;
> uint8_t *msix_pba;
> +
> + PCIMSINotify *msi_notify;
> + PCIMSIxNotify *msix_notify;
> +
> /* MemoryRegion container for msix exclusive BAR setup */
> MemoryRegion msix_exclusive_bar;
> /* Memory Regions for MSIX table and pending bit entries. */
> diff --git a/include/hw/remote/vfio-user-obj.h
> b/include/hw/remote/vfio-user-obj.h
> new file mode 100644
> index 0000000000..87ab78b875
> --- /dev/null
> +++ b/include/hw/remote/vfio-user-obj.h
> @@ -0,0 +1,6 @@
> +#ifndef VFIO_USER_OBJ_H
> +#define VFIO_USER_OBJ_H
> +
> +void vfu_object_set_bus_irq(PCIBus *pci_bus);
> +
> +#endif
> diff --git a/hw/pci/msi.c b/hw/pci/msi.c
> index 47d2b0f33c..93f5e400cc 100644
> --- a/hw/pci/msi.c
> +++ b/hw/pci/msi.c
> @@ -51,6 +51,8 @@
> */
> bool msi_nonbroken;
>
> +static void pci_msi_notify(PCIDevice *dev, unsigned int vector);
> +
> /* If we get rid of cap allocator, we won't need this. */
> static inline uint8_t msi_cap_sizeof(uint16_t flags)
> {
> @@ -225,6 +227,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
> dev->msi_cap = config_offset;
> dev->cap_present |= QEMU_PCI_CAP_MSI;
>
> + dev->msi_notify = pci_msi_notify;
Are you sure it's correct to skip the msi_is_masked() logic? I think the
callback function should only override the behavior of
msi_send_message(), not the entire msi_notify() function.
The same applies to MSI-X.
> +
> pci_set_word(dev->config + msi_flags_off(dev), flags);
> pci_set_word(dev->wmask + msi_flags_off(dev),
> PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
> @@ -307,7 +311,7 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int
> vector)
> return mask & (1U << vector);
> }
>
> -void msi_notify(PCIDevice *dev, unsigned int vector)
> +static void pci_msi_notify(PCIDevice *dev, unsigned int vector)
> {
> uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> @@ -332,6 +336,13 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
> msi_send_message(dev, msg);
> }
>
> +void msi_notify(PCIDevice *dev, unsigned int vector)
> +{
> + if (dev->msi_notify) {
Can this ever be NULL?
> + dev->msi_notify(dev, vector);
> + }
> +}
> +
> void msi_send_message(PCIDevice *dev, MSIMessage msg)
> {
> MemTxAttrs attrs = {};
> diff --git a/hw/pci/msix.c b/hw/pci/msix.c
> index ae9331cd0b..1c71e67f53 100644
> --- a/hw/pci/msix.c
> +++ b/hw/pci/msix.c
> @@ -31,6 +31,8 @@
> #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
> #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
>
> +static void pci_msix_notify(PCIDevice *dev, unsigned vector);
> +
> MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
> {
> uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
> @@ -334,6 +336,7 @@ int msix_init(struct PCIDevice *dev, unsigned short
> nentries,
> dev->msix_table = g_malloc0(table_size);
> dev->msix_pba = g_malloc0(pba_size);
> dev->msix_entry_used = g_malloc0(nentries * sizeof
> *dev->msix_entry_used);
> + dev->msix_notify = pci_msix_notify;
>
> msix_mask_all(dev, nentries);
>
> @@ -485,7 +488,7 @@ int msix_enabled(PCIDevice *dev)
> }
>
> /* Send an MSI-X message */
> -void msix_notify(PCIDevice *dev, unsigned vector)
> +static void pci_msix_notify(PCIDevice *dev, unsigned vector)
> {
> MSIMessage msg;
>
> @@ -503,6 +506,13 @@ void msix_notify(PCIDevice *dev, unsigned vector)
> msi_send_message(dev, msg);
> }
>
> +void msix_notify(PCIDevice *dev, unsigned vector)
> +{
> + if (dev->msix_notify) {
Can this ever be NULL?
> + dev->msix_notify(dev, vector);
> + }
> +}
> +
> void msix_reset(PCIDevice *dev)
> {
> if (!msix_present(dev)) {
> diff --git a/hw/remote/machine.c b/hw/remote/machine.c
> index db4ae30710..a8b4a3aef3 100644
> --- a/hw/remote/machine.c
> +++ b/hw/remote/machine.c
> @@ -23,6 +23,7 @@
> #include "hw/remote/iohub.h"
> #include "hw/qdev-core.h"
> #include "hw/remote/iommu.h"
> +#include "hw/remote/vfio-user-obj.h"
>
> static void remote_machine_init(MachineState *machine)
> {
> @@ -54,12 +55,14 @@ static void remote_machine_init(MachineState *machine)
>
> if (s->vfio_user) {
> remote_configure_iommu(pci_host->bus);
> - }
>
> - remote_iohub_init(&s->iohub);
> + vfu_object_set_bus_irq(pci_host->bus);
> + } else {
> + remote_iohub_init(&s->iohub);
>
> - pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
> - &s->iohub, REMOTE_IOHUB_NB_PIRQS);
> + pci_bus_irqs(pci_host->bus, remote_iohub_set_irq,
> remote_iohub_map_irq,
> + &s->iohub, REMOTE_IOHUB_NB_PIRQS);
> + }
>
> qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s));
> }
> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
> index 2feabd06a4..d79bab87f1 100644
> --- a/hw/remote/vfio-user-obj.c
> +++ b/hw/remote/vfio-user-obj.c
> @@ -54,6 +54,9 @@
> #include "hw/pci/pci.h"
> #include "qemu/timer.h"
> #include "exec/memory.h"
> +#include "hw/pci/msi.h"
> +#include "hw/pci/msix.h"
> +#include "hw/remote/vfio-user-obj.h"
>
> #define TYPE_VFU_OBJECT "x-vfio-user-server"
> OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
> @@ -107,6 +110,10 @@ struct VfuObject {
> int vfu_poll_fd;
> };
>
> +static GHashTable *vfu_object_bdf_to_ctx_table;
I suggest adding a void *msi_notify_opaque field to PCIDevice and
passing the value as an argument to ->msi_notify(). vfio-user-obj.c can
set the value to vfu_ctx and eliminate the vfu_object_bdf_to_ctx_table
hash table.
This simplifies the code, makes it faster, and solves race conditions
during hot plug/unplug if other instances are running in IOThreads.
> +
> +#define INT2VOIDP(i) (void *)(uintptr_t)(i)
> +
> static void vfu_object_init_ctx(VfuObject *o, Error **errp);
>
> static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
> @@ -463,6 +470,86 @@ static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx,
> PCIDevice *pdev)
> }
> }
>
> +static void vfu_object_irq_trigger(int pci_bdf, unsigned vector)
> +{
> + vfu_ctx_t *vfu_ctx = NULL;
> +
> + if (!vfu_object_bdf_to_ctx_table) {
Can this ever be NULL?
> + return;
> + }
> +
> + vfu_ctx = g_hash_table_lookup(vfu_object_bdf_to_ctx_table,
> + INT2VOIDP(pci_bdf));
> +
> + if (vfu_ctx) {
> + vfu_irq_trigger(vfu_ctx, vector);
> + }
> +}
> +
> +static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
> +{
> + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
> + pci_dev->devfn);
> +
> + return pci_bdf;
> +}
> +
> +static void vfu_object_set_irq(void *opaque, int pirq, int level)
> +{
> + if (level) {
> + vfu_object_irq_trigger(pirq, 0);
> + }
> +}
> +
> +static void vfu_object_msi_notify(PCIDevice *pci_dev, unsigned vector)
> +{
> + int pci_bdf;
> +
> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
> pci_dev->devfn);
> +
> + vfu_object_irq_trigger(pci_bdf, vector);
> +}
> +
> +static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
> +{
> + vfu_ctx_t *vfu_ctx = o->vfu_ctx;
> + int ret, pci_bdf;
> +
> + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
> + if (ret < 0) {
> + return ret;
> + }
> +
> + ret = 0;
> + if (msix_nr_vectors_allocated(pci_dev)) {
> + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
> + msix_nr_vectors_allocated(pci_dev));
> +
> + pci_dev->msix_notify = vfu_object_msi_notify;
> + } else if (msi_nr_vectors_allocated(pci_dev)) {
> + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
> + msi_nr_vectors_allocated(pci_dev));
> +
> + pci_dev->msi_notify = vfu_object_msi_notify;
> + }
> +
> + if (ret < 0) {
> + return ret;
> + }
> +
> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
> pci_dev->devfn);
> +
> + g_hash_table_insert(vfu_object_bdf_to_ctx_table, INT2VOIDP(pci_bdf),
> + o->vfu_ctx);
> +
> + return 0;
> +}
> +
> +void vfu_object_set_bus_irq(PCIBus *pci_bus)
> +{
> + pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, NULL, 1);
> +}
> +
> /*
> * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
> * properties. It also depends on devices instantiated in QEMU. These
> @@ -559,6 +646,13 @@ static void vfu_object_init_ctx(VfuObject *o, Error
> **errp)
>
> vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
>
> + ret = vfu_object_setup_irqs(o, o->pci_dev);
> + if (ret < 0) {
> + error_setg(errp, "vfu: Failed to setup interrupts for %s",
> + o->device);
> + goto fail;
> + }
> +
> ret = vfu_realize_ctx(o->vfu_ctx);
> if (ret < 0) {
> error_setg(errp, "vfu: Failed to realize device %s- %s",
> @@ -612,6 +706,7 @@ static void vfu_object_finalize(Object *obj)
> {
> VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
> VfuObject *o = VFU_OBJECT(obj);
> + int pci_bdf;
>
> k->nr_devs--;
>
> @@ -638,9 +733,17 @@ static void vfu_object_finalize(Object *obj)
> o->unplug_blocker = NULL;
> }
>
> + if (o->pci_dev) {
> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(o->pci_dev)),
> + o->pci_dev->devfn);
> + g_hash_table_remove(vfu_object_bdf_to_ctx_table, INT2VOIDP(pci_bdf));
> + }
> +
> o->pci_dev = NULL;
>
> if (!k->nr_devs && k->auto_shutdown) {
> + g_hash_table_destroy(vfu_object_bdf_to_ctx_table);
> + vfu_object_bdf_to_ctx_table = NULL;
> qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
> }
>
> @@ -658,6 +761,10 @@ static void vfu_object_class_init(ObjectClass *klass,
> void *data)
>
> k->auto_shutdown = true;
>
> + msi_nonbroken = true;
This should go in hw/remote/machine.c. It's a global variable related to
the machine's interrupt controller capabilities. The value is not
related to vfu_object_class_init(), which will be called by any QEMU
binary that links hw/remote/vfio-user-obj.o regardless of which machine
type is instantiated.
> +
> + vfu_object_bdf_to_ctx_table = g_hash_table_new_full(NULL, NULL, NULL,
> NULL);
> +
> object_class_property_add(klass, "socket", "SocketAddress", NULL,
> vfu_object_set_socket, NULL, NULL);
> object_class_property_set_description(klass, "socket",
> diff --git a/stubs/vfio-user-obj.c b/stubs/vfio-user-obj.c
> new file mode 100644
> index 0000000000..79100d768e
> --- /dev/null
> +++ b/stubs/vfio-user-obj.c
> @@ -0,0 +1,6 @@
> +#include "qemu/osdep.h"
> +#include "hw/remote/vfio-user-obj.h"
> +
> +void vfu_object_set_bus_irq(PCIBus *pci_bus)
> +{
> +}
> diff --git a/MAINTAINERS b/MAINTAINERS
> index f47232c78c..e274cb46af 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -3569,6 +3569,7 @@ F: hw/remote/iohub.c
> F: include/hw/remote/iohub.h
> F: subprojects/libvfio-user
> F: hw/remote/vfio-user-obj.c
> +F: include/hw/remote/vfio-user-obj.h
> F: hw/remote/iommu.c
> F: include/hw/remote/iommu.h
>
> diff --git a/hw/remote/trace-events b/hw/remote/trace-events
> index 847d50d88f..c167b3c7a5 100644
> --- a/hw/remote/trace-events
> +++ b/hw/remote/trace-events
> @@ -12,3 +12,4 @@ vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA
> 0x%"PRIx64""
> vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr
> 0x%"PRIx64" size 0x%"PRIx64""
> vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR
> address 0x%"PRIx64""
> vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR
> address 0x%"PRIx64""
> +vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d"
> diff --git a/stubs/meson.build b/stubs/meson.build
> index d359cbe1ad..c5ce979dc3 100644
> --- a/stubs/meson.build
> +++ b/stubs/meson.build
> @@ -57,3 +57,4 @@ if have_system
> else
> stub_ss.add(files('qdev.c'))
> endif
> +stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false:
> files('vfio-user-obj.c'))
> --
> 2.20.1
>
signature.asc
Description: PGP signature
- Re: [PATCH v6 15/19] vfio-user: handle device interrupts,
Stefan Hajnoczi <=