[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v6 15/19] vfio-user: handle device interrupts
From: |
Jag Raman |
Subject: |
Re: [PATCH v6 15/19] vfio-user: handle device interrupts |
Date: |
Mon, 7 Mar 2022 15:10:41 +0000 |
> On Mar 7, 2022, at 5:24 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>
> On Thu, Feb 17, 2022 at 02:49:02AM -0500, Jagannathan Raman wrote:
>> Forward remote device's interrupts to the guest
>>
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> ---
>> include/hw/pci/pci.h | 6 ++
>> include/hw/remote/vfio-user-obj.h | 6 ++
>> hw/pci/msi.c | 13 +++-
>> hw/pci/msix.c | 12 +++-
>> hw/remote/machine.c | 11 +--
>> hw/remote/vfio-user-obj.c | 107 ++++++++++++++++++++++++++++++
>> stubs/vfio-user-obj.c | 6 ++
>> MAINTAINERS | 1 +
>> hw/remote/trace-events | 1 +
>> stubs/meson.build | 1 +
>> 10 files changed, 158 insertions(+), 6 deletions(-)
>> create mode 100644 include/hw/remote/vfio-user-obj.h
>> create mode 100644 stubs/vfio-user-obj.c
>>
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index c3f3c90473..d42d526a48 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -129,6 +129,8 @@ typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev,
>> typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
>> pcibus_t addr, pcibus_t size, int type);
>> typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
>> +typedef void PCIMSINotify(PCIDevice *pci_dev, unsigned vector);
>> +typedef void PCIMSIxNotify(PCIDevice *pci_dev, unsigned vector);
>>
>> typedef struct PCIIORegion {
>> pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
>> @@ -323,6 +325,10 @@ struct PCIDevice {
>> /* Space to store MSIX table & pending bit array */
>> uint8_t *msix_table;
>> uint8_t *msix_pba;
>> +
>> + PCIMSINotify *msi_notify;
>> + PCIMSIxNotify *msix_notify;
>> +
>> /* MemoryRegion container for msix exclusive BAR setup */
>> MemoryRegion msix_exclusive_bar;
>> /* Memory Regions for MSIX table and pending bit entries. */
>> diff --git a/include/hw/remote/vfio-user-obj.h
>> b/include/hw/remote/vfio-user-obj.h
>> new file mode 100644
>> index 0000000000..87ab78b875
>> --- /dev/null
>> +++ b/include/hw/remote/vfio-user-obj.h
>> @@ -0,0 +1,6 @@
>> +#ifndef VFIO_USER_OBJ_H
>> +#define VFIO_USER_OBJ_H
>> +
>> +void vfu_object_set_bus_irq(PCIBus *pci_bus);
>> +
>> +#endif
>> diff --git a/hw/pci/msi.c b/hw/pci/msi.c
>> index 47d2b0f33c..93f5e400cc 100644
>> --- a/hw/pci/msi.c
>> +++ b/hw/pci/msi.c
>> @@ -51,6 +51,8 @@
>> */
>> bool msi_nonbroken;
>>
>> +static void pci_msi_notify(PCIDevice *dev, unsigned int vector);
>> +
>> /* If we get rid of cap allocator, we won't need this. */
>> static inline uint8_t msi_cap_sizeof(uint16_t flags)
>> {
>> @@ -225,6 +227,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
>> dev->msi_cap = config_offset;
>> dev->cap_present |= QEMU_PCI_CAP_MSI;
>>
>> + dev->msi_notify = pci_msi_notify;
>
> Are you sure it's correct to skip the msi_is_masked() logic? I think the
pci_msi_notify() callback includes the test for msi_is_masked() - that
covers the non vfio-user case.
The vfio-user callback should add this test - will do it if we continue
this approach. Thanks for the catch!
> callback function should only override the behavior of
> msi_send_message(), not the entire msi_notify() function.
OK, this sounds like a better approach.
>
> The same applies to MSI-X.
>
>> +
>> pci_set_word(dev->config + msi_flags_off(dev), flags);
>> pci_set_word(dev->wmask + msi_flags_off(dev),
>> PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
>> @@ -307,7 +311,7 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int
>> vector)
>> return mask & (1U << vector);
>> }
>>
>> -void msi_notify(PCIDevice *dev, unsigned int vector)
>> +static void pci_msi_notify(PCIDevice *dev, unsigned int vector)
>> {
>> uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
>> bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
>> @@ -332,6 +336,13 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
>> msi_send_message(dev, msg);
>> }
>>
>> +void msi_notify(PCIDevice *dev, unsigned int vector)
>> +{
>> + if (dev->msi_notify) {
>
> Can this ever be NULL?
Unlikely in the current code flow, but it could change in the future.
As a matter of principle, I thought that we should check if a function
pointer is non-NULL before invoking it in QEMU. Is that not the case?
>
>> + dev->msi_notify(dev, vector);
>> + }
>> +}
>> +
>> void msi_send_message(PCIDevice *dev, MSIMessage msg)
>> {
>> MemTxAttrs attrs = {};
>> diff --git a/hw/pci/msix.c b/hw/pci/msix.c
>> index ae9331cd0b..1c71e67f53 100644
>> --- a/hw/pci/msix.c
>> +++ b/hw/pci/msix.c
>> @@ -31,6 +31,8 @@
>> #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
>> #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
>>
>> +static void pci_msix_notify(PCIDevice *dev, unsigned vector);
>> +
>> MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>> {
>> uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
>> @@ -334,6 +336,7 @@ int msix_init(struct PCIDevice *dev, unsigned short
>> nentries,
>> dev->msix_table = g_malloc0(table_size);
>> dev->msix_pba = g_malloc0(pba_size);
>> dev->msix_entry_used = g_malloc0(nentries * sizeof
>> *dev->msix_entry_used);
>> + dev->msix_notify = pci_msix_notify;
>>
>> msix_mask_all(dev, nentries);
>>
>> @@ -485,7 +488,7 @@ int msix_enabled(PCIDevice *dev)
>> }
>>
>> /* Send an MSI-X message */
>> -void msix_notify(PCIDevice *dev, unsigned vector)
>> +static void pci_msix_notify(PCIDevice *dev, unsigned vector)
>> {
>> MSIMessage msg;
>>
>> @@ -503,6 +506,13 @@ void msix_notify(PCIDevice *dev, unsigned vector)
>> msi_send_message(dev, msg);
>> }
>>
>> +void msix_notify(PCIDevice *dev, unsigned vector)
>> +{
>> + if (dev->msix_notify) {
>
> Can this ever be NULL?
>
>> + dev->msix_notify(dev, vector);
>> + }
>> +}
>> +
>> void msix_reset(PCIDevice *dev)
>> {
>> if (!msix_present(dev)) {
>> diff --git a/hw/remote/machine.c b/hw/remote/machine.c
>> index db4ae30710..a8b4a3aef3 100644
>> --- a/hw/remote/machine.c
>> +++ b/hw/remote/machine.c
>> @@ -23,6 +23,7 @@
>> #include "hw/remote/iohub.h"
>> #include "hw/qdev-core.h"
>> #include "hw/remote/iommu.h"
>> +#include "hw/remote/vfio-user-obj.h"
>>
>> static void remote_machine_init(MachineState *machine)
>> {
>> @@ -54,12 +55,14 @@ static void remote_machine_init(MachineState *machine)
>>
>> if (s->vfio_user) {
>> remote_configure_iommu(pci_host->bus);
>> - }
>>
>> - remote_iohub_init(&s->iohub);
>> + vfu_object_set_bus_irq(pci_host->bus);
>> + } else {
>> + remote_iohub_init(&s->iohub);
>>
>> - pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
>> - &s->iohub, REMOTE_IOHUB_NB_PIRQS);
>> + pci_bus_irqs(pci_host->bus, remote_iohub_set_irq,
>> remote_iohub_map_irq,
>> + &s->iohub, REMOTE_IOHUB_NB_PIRQS);
>> + }
>>
>> qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s));
>> }
>> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
>> index 2feabd06a4..d79bab87f1 100644
>> --- a/hw/remote/vfio-user-obj.c
>> +++ b/hw/remote/vfio-user-obj.c
>> @@ -54,6 +54,9 @@
>> #include "hw/pci/pci.h"
>> #include "qemu/timer.h"
>> #include "exec/memory.h"
>> +#include "hw/pci/msi.h"
>> +#include "hw/pci/msix.h"
>> +#include "hw/remote/vfio-user-obj.h"
>>
>> #define TYPE_VFU_OBJECT "x-vfio-user-server"
>> OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
>> @@ -107,6 +110,10 @@ struct VfuObject {
>> int vfu_poll_fd;
>> };
>>
>> +static GHashTable *vfu_object_bdf_to_ctx_table;
>
> I suggest adding a void *msi_notify_opaque field to PCIDevice and
> passing the value as an argument to ->msi_notify(). vfio-user-obj.c can
> set the value to vfu_ctx and eliminate the vfu_object_bdf_to_ctx_table
> hash table.
>
> This simplifies the code, makes it faster, and solves race conditions
> during hot plug/unplug if other instances are running in IOThreads.
OK, will do.
>
>> +
>> +#define INT2VOIDP(i) (void *)(uintptr_t)(i)
>> +
>> static void vfu_object_init_ctx(VfuObject *o, Error **errp);
>>
>> static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
>> @@ -463,6 +470,86 @@ static void vfu_object_register_bars(vfu_ctx_t
>> *vfu_ctx, PCIDevice *pdev)
>> }
>> }
>>
>> +static void vfu_object_irq_trigger(int pci_bdf, unsigned vector)
>> +{
>> + vfu_ctx_t *vfu_ctx = NULL;
>> +
>> + if (!vfu_object_bdf_to_ctx_table) {
>
> Can this ever be NULL?
>
>> + return;
>> + }
>> +
>> + vfu_ctx = g_hash_table_lookup(vfu_object_bdf_to_ctx_table,
>> + INT2VOIDP(pci_bdf));
>> +
>> + if (vfu_ctx) {
>> + vfu_irq_trigger(vfu_ctx, vector);
>> + }
>> +}
>> +
>> +static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
>> +{
>> + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
>> + pci_dev->devfn);
>> +
>> + return pci_bdf;
>> +}
>> +
>> +static void vfu_object_set_irq(void *opaque, int pirq, int level)
>> +{
>> + if (level) {
>> + vfu_object_irq_trigger(pirq, 0);
>> + }
>> +}
>> +
>> +static void vfu_object_msi_notify(PCIDevice *pci_dev, unsigned vector)
>> +{
>> + int pci_bdf;
>> +
>> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
>> pci_dev->devfn);
>> +
>> + vfu_object_irq_trigger(pci_bdf, vector);
>> +}
>> +
>> +static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
>> +{
>> + vfu_ctx_t *vfu_ctx = o->vfu_ctx;
>> + int ret, pci_bdf;
>> +
>> + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
>> + if (ret < 0) {
>> + return ret;
>> + }
>> +
>> + ret = 0;
>> + if (msix_nr_vectors_allocated(pci_dev)) {
>> + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
>> + msix_nr_vectors_allocated(pci_dev));
>> +
>> + pci_dev->msix_notify = vfu_object_msi_notify;
>> + } else if (msi_nr_vectors_allocated(pci_dev)) {
>> + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
>> + msi_nr_vectors_allocated(pci_dev));
>> +
>> + pci_dev->msi_notify = vfu_object_msi_notify;
>> + }
>> +
>> + if (ret < 0) {
>> + return ret;
>> + }
>> +
>> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
>> pci_dev->devfn);
>> +
>> + g_hash_table_insert(vfu_object_bdf_to_ctx_table, INT2VOIDP(pci_bdf),
>> + o->vfu_ctx);
>> +
>> + return 0;
>> +}
>> +
>> +void vfu_object_set_bus_irq(PCIBus *pci_bus)
>> +{
>> + pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, NULL, 1);
>> +}
>> +
>> /*
>> * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
>> * properties. It also depends on devices instantiated in QEMU. These
>> @@ -559,6 +646,13 @@ static void vfu_object_init_ctx(VfuObject *o, Error
>> **errp)
>>
>> vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
>>
>> + ret = vfu_object_setup_irqs(o, o->pci_dev);
>> + if (ret < 0) {
>> + error_setg(errp, "vfu: Failed to setup interrupts for %s",
>> + o->device);
>> + goto fail;
>> + }
>> +
>> ret = vfu_realize_ctx(o->vfu_ctx);
>> if (ret < 0) {
>> error_setg(errp, "vfu: Failed to realize device %s- %s",
>> @@ -612,6 +706,7 @@ static void vfu_object_finalize(Object *obj)
>> {
>> VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
>> VfuObject *o = VFU_OBJECT(obj);
>> + int pci_bdf;
>>
>> k->nr_devs--;
>>
>> @@ -638,9 +733,17 @@ static void vfu_object_finalize(Object *obj)
>> o->unplug_blocker = NULL;
>> }
>>
>> + if (o->pci_dev) {
>> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(o->pci_dev)),
>> + o->pci_dev->devfn);
>> + g_hash_table_remove(vfu_object_bdf_to_ctx_table,
>> INT2VOIDP(pci_bdf));
>> + }
>> +
>> o->pci_dev = NULL;
>>
>> if (!k->nr_devs && k->auto_shutdown) {
>> + g_hash_table_destroy(vfu_object_bdf_to_ctx_table);
>> + vfu_object_bdf_to_ctx_table = NULL;
>> qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
>> }
>>
>> @@ -658,6 +761,10 @@ static void vfu_object_class_init(ObjectClass *klass,
>> void *data)
>>
>> k->auto_shutdown = true;
>>
>> + msi_nonbroken = true;
>
> This should go in hw/remote/machine.c. It's a global variable related to
> the machine's interrupt controller capabilities. The value is not
> related to vfu_object_class_init(), which will be called by any QEMU
> binary that links hw/remote/vfio-user-obj.o regardless of which machine
> type is instantiated.
multiprocess QEMU, which also uses the remote machine, doesn’t support MSI and
that’s why we placed it here originally. In subsequent series, we have added
‘vfio-user’ machine sub-option to discern vfio-user and multiprocess, so this
could be
moved to the machine initialization code as you just pointed out.
Thank you!
--
Jag
>
>> +
>> + vfu_object_bdf_to_ctx_table = g_hash_table_new_full(NULL, NULL, NULL,
>> NULL);
>> +
>> object_class_property_add(klass, "socket", "SocketAddress", NULL,
>> vfu_object_set_socket, NULL, NULL);
>> object_class_property_set_description(klass, "socket",
>> diff --git a/stubs/vfio-user-obj.c b/stubs/vfio-user-obj.c
>> new file mode 100644
>> index 0000000000..79100d768e
>> --- /dev/null
>> +++ b/stubs/vfio-user-obj.c
>> @@ -0,0 +1,6 @@
>> +#include "qemu/osdep.h"
>> +#include "hw/remote/vfio-user-obj.h"
>> +
>> +void vfu_object_set_bus_irq(PCIBus *pci_bus)
>> +{
>> +}
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index f47232c78c..e274cb46af 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -3569,6 +3569,7 @@ F: hw/remote/iohub.c
>> F: include/hw/remote/iohub.h
>> F: subprojects/libvfio-user
>> F: hw/remote/vfio-user-obj.c
>> +F: include/hw/remote/vfio-user-obj.h
>> F: hw/remote/iommu.c
>> F: include/hw/remote/iommu.h
>>
>> diff --git a/hw/remote/trace-events b/hw/remote/trace-events
>> index 847d50d88f..c167b3c7a5 100644
>> --- a/hw/remote/trace-events
>> +++ b/hw/remote/trace-events
>> @@ -12,3 +12,4 @@ vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA
>> 0x%"PRIx64""
>> vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr
>> 0x%"PRIx64" size 0x%"PRIx64""
>> vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR
>> address 0x%"PRIx64""
>> vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR
>> address 0x%"PRIx64""
>> +vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d"
>> diff --git a/stubs/meson.build b/stubs/meson.build
>> index d359cbe1ad..c5ce979dc3 100644
>> --- a/stubs/meson.build
>> +++ b/stubs/meson.build
>> @@ -57,3 +57,4 @@ if have_system
>> else
>> stub_ss.add(files('qdev.c'))
>> endif
>> +stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false:
>> files('vfio-user-obj.c'))
>> --
>> 2.20.1
>>