qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v6 15/19] vfio-user: handle device interrupts


From: Jag Raman
Subject: Re: [PATCH v6 15/19] vfio-user: handle device interrupts
Date: Mon, 7 Mar 2022 15:10:41 +0000


> On Mar 7, 2022, at 5:24 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> 
> On Thu, Feb 17, 2022 at 02:49:02AM -0500, Jagannathan Raman wrote:
>> Forward remote device's interrupts to the guest
>> 
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> ---
>> include/hw/pci/pci.h              |   6 ++
>> include/hw/remote/vfio-user-obj.h |   6 ++
>> hw/pci/msi.c                      |  13 +++-
>> hw/pci/msix.c                     |  12 +++-
>> hw/remote/machine.c               |  11 +--
>> hw/remote/vfio-user-obj.c         | 107 ++++++++++++++++++++++++++++++
>> stubs/vfio-user-obj.c             |   6 ++
>> MAINTAINERS                       |   1 +
>> hw/remote/trace-events            |   1 +
>> stubs/meson.build                 |   1 +
>> 10 files changed, 158 insertions(+), 6 deletions(-)
>> create mode 100644 include/hw/remote/vfio-user-obj.h
>> create mode 100644 stubs/vfio-user-obj.c
>> 
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index c3f3c90473..d42d526a48 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -129,6 +129,8 @@ typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev,
>> typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
>>                                 pcibus_t addr, pcibus_t size, int type);
>> typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
>> +typedef void PCIMSINotify(PCIDevice *pci_dev, unsigned vector);
>> +typedef void PCIMSIxNotify(PCIDevice *pci_dev, unsigned vector);
>> 
>> typedef struct PCIIORegion {
>>     pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
>> @@ -323,6 +325,10 @@ struct PCIDevice {
>>     /* Space to store MSIX table & pending bit array */
>>     uint8_t *msix_table;
>>     uint8_t *msix_pba;
>> +
>> +    PCIMSINotify *msi_notify;
>> +    PCIMSIxNotify *msix_notify;
>> +
>>     /* MemoryRegion container for msix exclusive BAR setup */
>>     MemoryRegion msix_exclusive_bar;
>>     /* Memory Regions for MSIX table and pending bit entries. */
>> diff --git a/include/hw/remote/vfio-user-obj.h 
>> b/include/hw/remote/vfio-user-obj.h
>> new file mode 100644
>> index 0000000000..87ab78b875
>> --- /dev/null
>> +++ b/include/hw/remote/vfio-user-obj.h
>> @@ -0,0 +1,6 @@
>> +#ifndef VFIO_USER_OBJ_H
>> +#define VFIO_USER_OBJ_H
>> +
>> +void vfu_object_set_bus_irq(PCIBus *pci_bus);
>> +
>> +#endif
>> diff --git a/hw/pci/msi.c b/hw/pci/msi.c
>> index 47d2b0f33c..93f5e400cc 100644
>> --- a/hw/pci/msi.c
>> +++ b/hw/pci/msi.c
>> @@ -51,6 +51,8 @@
>>  */
>> bool msi_nonbroken;
>> 
>> +static void pci_msi_notify(PCIDevice *dev, unsigned int vector);
>> +
>> /* If we get rid of cap allocator, we won't need this. */
>> static inline uint8_t msi_cap_sizeof(uint16_t flags)
>> {
>> @@ -225,6 +227,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
>>     dev->msi_cap = config_offset;
>>     dev->cap_present |= QEMU_PCI_CAP_MSI;
>> 
>> +    dev->msi_notify = pci_msi_notify;
> 
> Are you sure it's correct to skip the msi_is_masked() logic? I think the

pci_msi_notify() callback includes the test for msi_is_masked() - that
covers the non vfio-user case.

The vfio-user callback should add this test - will do it if we continue
this approach. Thanks for the catch!

> callback function should only override the behavior of
> msi_send_message(), not the entire msi_notify() function.

OK, this sounds like a better approach.

> 
> The same applies to MSI-X.
> 
>> +
>>     pci_set_word(dev->config + msi_flags_off(dev), flags);
>>     pci_set_word(dev->wmask + msi_flags_off(dev),
>>                  PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
>> @@ -307,7 +311,7 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int 
>> vector)
>>     return mask & (1U << vector);
>> }
>> 
>> -void msi_notify(PCIDevice *dev, unsigned int vector)
>> +static void pci_msi_notify(PCIDevice *dev, unsigned int vector)
>> {
>>     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
>>     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
>> @@ -332,6 +336,13 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
>>     msi_send_message(dev, msg);
>> }
>> 
>> +void msi_notify(PCIDevice *dev, unsigned int vector)
>> +{
>> +    if (dev->msi_notify) {
> 
> Can this ever be NULL?

Unlikely in the current code flow, but it could change in the future.

As a matter of principle, I thought that we should check if a function
pointer is non-NULL before invoking it in QEMU. Is that not the case?

> 
>> +        dev->msi_notify(dev, vector);
>> +    }
>> +}
>> +
>> void msi_send_message(PCIDevice *dev, MSIMessage msg)
>> {
>>     MemTxAttrs attrs = {};
>> diff --git a/hw/pci/msix.c b/hw/pci/msix.c
>> index ae9331cd0b..1c71e67f53 100644
>> --- a/hw/pci/msix.c
>> +++ b/hw/pci/msix.c
>> @@ -31,6 +31,8 @@
>> #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
>> #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
>> 
>> +static void pci_msix_notify(PCIDevice *dev, unsigned vector);
>> +
>> MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>> {
>>     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
>> @@ -334,6 +336,7 @@ int msix_init(struct PCIDevice *dev, unsigned short 
>> nentries,
>>     dev->msix_table = g_malloc0(table_size);
>>     dev->msix_pba = g_malloc0(pba_size);
>>     dev->msix_entry_used = g_malloc0(nentries * sizeof 
>> *dev->msix_entry_used);
>> +    dev->msix_notify = pci_msix_notify;
>> 
>>     msix_mask_all(dev, nentries);
>> 
>> @@ -485,7 +488,7 @@ int msix_enabled(PCIDevice *dev)
>> }
>> 
>> /* Send an MSI-X message */
>> -void msix_notify(PCIDevice *dev, unsigned vector)
>> +static void pci_msix_notify(PCIDevice *dev, unsigned vector)
>> {
>>     MSIMessage msg;
>> 
>> @@ -503,6 +506,13 @@ void msix_notify(PCIDevice *dev, unsigned vector)
>>     msi_send_message(dev, msg);
>> }
>> 
>> +void msix_notify(PCIDevice *dev, unsigned vector)
>> +{
>> +    if (dev->msix_notify) {
> 
> Can this ever be NULL?
> 
>> +        dev->msix_notify(dev, vector);
>> +    }
>> +}
>> +
>> void msix_reset(PCIDevice *dev)
>> {
>>     if (!msix_present(dev)) {
>> diff --git a/hw/remote/machine.c b/hw/remote/machine.c
>> index db4ae30710..a8b4a3aef3 100644
>> --- a/hw/remote/machine.c
>> +++ b/hw/remote/machine.c
>> @@ -23,6 +23,7 @@
>> #include "hw/remote/iohub.h"
>> #include "hw/qdev-core.h"
>> #include "hw/remote/iommu.h"
>> +#include "hw/remote/vfio-user-obj.h"
>> 
>> static void remote_machine_init(MachineState *machine)
>> {
>> @@ -54,12 +55,14 @@ static void remote_machine_init(MachineState *machine)
>> 
>>     if (s->vfio_user) {
>>         remote_configure_iommu(pci_host->bus);
>> -    }
>> 
>> -    remote_iohub_init(&s->iohub);
>> +        vfu_object_set_bus_irq(pci_host->bus);
>> +    } else {
>> +        remote_iohub_init(&s->iohub);
>> 
>> -    pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
>> -                 &s->iohub, REMOTE_IOHUB_NB_PIRQS);
>> +        pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, 
>> remote_iohub_map_irq,
>> +                     &s->iohub, REMOTE_IOHUB_NB_PIRQS);
>> +    }
>> 
>>     qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s));
>> }
>> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
>> index 2feabd06a4..d79bab87f1 100644
>> --- a/hw/remote/vfio-user-obj.c
>> +++ b/hw/remote/vfio-user-obj.c
>> @@ -54,6 +54,9 @@
>> #include "hw/pci/pci.h"
>> #include "qemu/timer.h"
>> #include "exec/memory.h"
>> +#include "hw/pci/msi.h"
>> +#include "hw/pci/msix.h"
>> +#include "hw/remote/vfio-user-obj.h"
>> 
>> #define TYPE_VFU_OBJECT "x-vfio-user-server"
>> OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
>> @@ -107,6 +110,10 @@ struct VfuObject {
>>     int vfu_poll_fd;
>> };
>> 
>> +static GHashTable *vfu_object_bdf_to_ctx_table;
> 
> I suggest adding a void *msi_notify_opaque field to PCIDevice and
> passing the value as an argument to ->msi_notify(). vfio-user-obj.c can
> set the value to vfu_ctx and eliminate the vfu_object_bdf_to_ctx_table
> hash table.
> 
> This simplifies the code, makes it faster, and solves race conditions
> during hot plug/unplug if other instances are running in IOThreads.

OK, will do.

> 
>> +
>> +#define INT2VOIDP(i) (void *)(uintptr_t)(i)
>> +
>> static void vfu_object_init_ctx(VfuObject *o, Error **errp);
>> 
>> static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
>> @@ -463,6 +470,86 @@ static void vfu_object_register_bars(vfu_ctx_t 
>> *vfu_ctx, PCIDevice *pdev)
>>     }
>> }
>> 
>> +static void vfu_object_irq_trigger(int pci_bdf, unsigned vector)
>> +{
>> +    vfu_ctx_t *vfu_ctx = NULL;
>> +
>> +    if (!vfu_object_bdf_to_ctx_table) {
> 
> Can this ever be NULL?
> 
>> +        return;
>> +    }
>> +
>> +    vfu_ctx = g_hash_table_lookup(vfu_object_bdf_to_ctx_table,
>> +                                  INT2VOIDP(pci_bdf));
>> +
>> +    if (vfu_ctx) {
>> +        vfu_irq_trigger(vfu_ctx, vector);
>> +    }
>> +}
>> +
>> +static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
>> +{
>> +    int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
>> +                                pci_dev->devfn);
>> +
>> +    return pci_bdf;
>> +}
>> +
>> +static void vfu_object_set_irq(void *opaque, int pirq, int level)
>> +{
>> +    if (level) {
>> +        vfu_object_irq_trigger(pirq, 0);
>> +    }
>> +}
>> +
>> +static void vfu_object_msi_notify(PCIDevice *pci_dev, unsigned vector)
>> +{
>> +    int pci_bdf;
>> +
>> +    pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), 
>> pci_dev->devfn);
>> +
>> +    vfu_object_irq_trigger(pci_bdf, vector);
>> +}
>> +
>> +static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
>> +{
>> +    vfu_ctx_t *vfu_ctx = o->vfu_ctx;
>> +    int ret, pci_bdf;
>> +
>> +    ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>> +
>> +    ret = 0;
>> +    if (msix_nr_vectors_allocated(pci_dev)) {
>> +        ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
>> +                                       msix_nr_vectors_allocated(pci_dev));
>> +
>> +        pci_dev->msix_notify = vfu_object_msi_notify;
>> +    } else if (msi_nr_vectors_allocated(pci_dev)) {
>> +        ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
>> +                                       msi_nr_vectors_allocated(pci_dev));
>> +
>> +        pci_dev->msi_notify = vfu_object_msi_notify;
>> +    }
>> +
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>> +
>> +    pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), 
>> pci_dev->devfn);
>> +
>> +    g_hash_table_insert(vfu_object_bdf_to_ctx_table, INT2VOIDP(pci_bdf),
>> +                        o->vfu_ctx);
>> +
>> +    return 0;
>> +}
>> +
>> +void vfu_object_set_bus_irq(PCIBus *pci_bus)
>> +{
>> +    pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, NULL, 1);
>> +}
>> +
>> /*
>>  * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
>>  * properties. It also depends on devices instantiated in QEMU. These
>> @@ -559,6 +646,13 @@ static void vfu_object_init_ctx(VfuObject *o, Error 
>> **errp)
>> 
>>     vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
>> 
>> +    ret = vfu_object_setup_irqs(o, o->pci_dev);
>> +    if (ret < 0) {
>> +        error_setg(errp, "vfu: Failed to setup interrupts for %s",
>> +                   o->device);
>> +        goto fail;
>> +    }
>> +
>>     ret = vfu_realize_ctx(o->vfu_ctx);
>>     if (ret < 0) {
>>         error_setg(errp, "vfu: Failed to realize device %s- %s",
>> @@ -612,6 +706,7 @@ static void vfu_object_finalize(Object *obj)
>> {
>>     VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
>>     VfuObject *o = VFU_OBJECT(obj);
>> +    int pci_bdf;
>> 
>>     k->nr_devs--;
>> 
>> @@ -638,9 +733,17 @@ static void vfu_object_finalize(Object *obj)
>>         o->unplug_blocker = NULL;
>>     }
>> 
>> +    if (o->pci_dev) {
>> +        pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(o->pci_dev)),
>> +                                o->pci_dev->devfn);
>> +        g_hash_table_remove(vfu_object_bdf_to_ctx_table, 
>> INT2VOIDP(pci_bdf));
>> +    }
>> +
>>     o->pci_dev = NULL;
>> 
>>     if (!k->nr_devs && k->auto_shutdown) {
>> +        g_hash_table_destroy(vfu_object_bdf_to_ctx_table);
>> +        vfu_object_bdf_to_ctx_table = NULL;
>>         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
>>     }
>> 
>> @@ -658,6 +761,10 @@ static void vfu_object_class_init(ObjectClass *klass, 
>> void *data)
>> 
>>     k->auto_shutdown = true;
>> 
>> +    msi_nonbroken = true;
> 
> This should go in hw/remote/machine.c. It's a global variable related to
> the machine's interrupt controller capabilities. The value is not
> related to vfu_object_class_init(), which will be called by any QEMU
> binary that links hw/remote/vfio-user-obj.o regardless of which machine
> type is instantiated.

multiprocess QEMU, which also uses the remote machine, doesn’t support MSI and
that’s why we placed it here originally. In subsequent series, we have added
‘vfio-user’ machine sub-option to discern vfio-user and multiprocess, so this 
could be
moved to the machine initialization code as you just pointed out.

Thank you!
--
Jag

> 
>> +
>> +    vfu_object_bdf_to_ctx_table = g_hash_table_new_full(NULL, NULL, NULL, 
>> NULL);
>> +
>>     object_class_property_add(klass, "socket", "SocketAddress", NULL,
>>                               vfu_object_set_socket, NULL, NULL);
>>     object_class_property_set_description(klass, "socket",
>> diff --git a/stubs/vfio-user-obj.c b/stubs/vfio-user-obj.c
>> new file mode 100644
>> index 0000000000..79100d768e
>> --- /dev/null
>> +++ b/stubs/vfio-user-obj.c
>> @@ -0,0 +1,6 @@
>> +#include "qemu/osdep.h"
>> +#include "hw/remote/vfio-user-obj.h"
>> +
>> +void vfu_object_set_bus_irq(PCIBus *pci_bus)
>> +{
>> +}
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index f47232c78c..e274cb46af 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -3569,6 +3569,7 @@ F: hw/remote/iohub.c
>> F: include/hw/remote/iohub.h
>> F: subprojects/libvfio-user
>> F: hw/remote/vfio-user-obj.c
>> +F: include/hw/remote/vfio-user-obj.h
>> F: hw/remote/iommu.c
>> F: include/hw/remote/iommu.h
>> 
>> diff --git a/hw/remote/trace-events b/hw/remote/trace-events
>> index 847d50d88f..c167b3c7a5 100644
>> --- a/hw/remote/trace-events
>> +++ b/hw/remote/trace-events
>> @@ -12,3 +12,4 @@ vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 
>> 0x%"PRIx64""
>> vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 
>> 0x%"PRIx64" size 0x%"PRIx64""
>> vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR 
>> address 0x%"PRIx64""
>> vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR 
>> address 0x%"PRIx64""
>> +vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d"
>> diff --git a/stubs/meson.build b/stubs/meson.build
>> index d359cbe1ad..c5ce979dc3 100644
>> --- a/stubs/meson.build
>> +++ b/stubs/meson.build
>> @@ -57,3 +57,4 @@ if have_system
>> else
>>   stub_ss.add(files('qdev.c'))
>> endif
>> +stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: 
>> files('vfio-user-obj.c'))
>> -- 
>> 2.20.1
>> 


reply via email to

[Prev in Thread] Current Thread [Next in Thread]