[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC PATCH V2 09/10] Qemu/VFIO: Add SRIOV VF migration
From: |
Michael S. Tsirkin |
Subject: |
Re: [Qemu-devel] [RFC PATCH V2 09/10] Qemu/VFIO: Add SRIOV VF migration support |
Date: |
Tue, 24 Nov 2015 23:03:29 +0200 |
On Tue, Nov 24, 2015 at 09:35:26PM +0800, Lan Tianyu wrote:
> This patch is to add SRIOV VF migration support.
> Create new device type "vfio-sriov" and add faked PCI migration capability
> to the type device.
>
> The purpose of the new capability
> 1) sync migration status with VF driver in the VM
> 2) Get mailbox irq vector to notify VF driver during migration.
> 3) Provide a way to control injecting irq or not.
>
> Qemu will migrate PCI configure space regs and MSIX config for VF.
> Inject mailbox irq at last stage of migration to notify VF about
> migration event and wait VF driver ready for migration.
I think this last bit "wait VF driver ready for migration"
is wrong. Not a lot is gained as compared to hotunplug.
To really get a benefit from this feature migration should
succeed even if guest is stuck, then interrupt should
tell guest that it has to reset the driver.
> VF driver
> writeS PCI config reg PCI_VF_MIGRATION_VF_STATUS in the new cap table
> to tell Qemu.
>
> Signed-off-by: Lan Tianyu <address@hidden>
> ---
> hw/vfio/Makefile.objs | 2 +-
> hw/vfio/pci.c | 6 ++
> hw/vfio/pci.h | 4 ++
> hw/vfio/sriov.c | 178
> ++++++++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 189 insertions(+), 1 deletion(-)
> create mode 100644 hw/vfio/sriov.c
>
> diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
> index d540c9d..9cf0178 100644
> --- a/hw/vfio/Makefile.objs
> +++ b/hw/vfio/Makefile.objs
> @@ -1,6 +1,6 @@
> ifeq ($(CONFIG_LINUX), y)
> obj-$(CONFIG_SOFTMMU) += common.o
> -obj-$(CONFIG_PCI) += pci.o
> +obj-$(CONFIG_PCI) += pci.o sriov.o
> obj-$(CONFIG_SOFTMMU) += platform.o
> obj-$(CONFIG_SOFTMMU) += calxeda-xgmac.o
> endif
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 7c43fc1..e7583b5 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2013,6 +2013,11 @@ void vfio_pci_write_config(PCIDevice *pdev, uint32_t
> addr,
> } else if (was_enabled && !is_enabled) {
> vfio_disable_msix(vdev);
> }
> + } else if (vdev->migration_cap &&
> + ranges_overlap(addr, len, vdev->migration_cap, 0x10)) {
> + /* Write everything to QEMU to keep emulated bits correct */
> + pci_default_write_config(pdev, addr, val, len);
> + vfio_migration_cap_handle(pdev, addr, val, len);
> } else {
> /* Write everything to QEMU to keep emulated bits correct */
> pci_default_write_config(pdev, addr, val, len);
> @@ -3517,6 +3522,7 @@ static int vfio_initfn(PCIDevice *pdev)
> vfio_register_err_notifier(vdev);
> vfio_register_req_notifier(vdev);
> vfio_setup_resetfn(vdev);
> + vfio_add_migration_capability(vdev);
>
> return 0;
>
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 6c00575..ee6ca5e 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -134,6 +134,7 @@ typedef struct VFIOPCIDevice {
> PCIHostDeviceAddress host;
> EventNotifier err_notifier;
> EventNotifier req_notifier;
> + uint16_t migration_cap;
> int (*resetfn)(struct VFIOPCIDevice *);
> uint32_t features;
> #define VFIO_FEATURE_ENABLE_VGA_BIT 0
> @@ -162,3 +163,6 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t
> addr, int len);
> void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
> uint32_t val, int len);
> void vfio_enable_msix(VFIOPCIDevice *vdev);
> +void vfio_add_migration_capability(VFIOPCIDevice *vdev);
> +void vfio_migration_cap_handle(PCIDevice *pdev, uint32_t addr,
> + uint32_t val, int len);
> diff --git a/hw/vfio/sriov.c b/hw/vfio/sriov.c
> new file mode 100644
> index 0000000..3109538
> --- /dev/null
> +++ b/hw/vfio/sriov.c
> @@ -0,0 +1,178 @@
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <sys/io.h>
> +#include <sys/mman.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <glob.h>
> +#include <unistd.h>
> +#include <sys/ioctl.h>
> +
> +#include "hw/hw.h"
> +#include "hw/vfio/pci.h"
> +#include "hw/vfio/vfio.h"
> +#include "hw/vfio/vfio-common.h"
> +
> +#define TYPE_VFIO_SRIOV "vfio-sriov"
> +
> +#define SRIOV_LM_SETUP 0x01
> +#define SRIOV_LM_COMPLETE 0x02
> +
> +QemuEvent migration_event;
> +
> +static void vfio_dev_post_load(void *opaque)
> +{
> + struct PCIDevice *pdev = (struct PCIDevice *)opaque;
> + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> + MSIMessage msg;
> + int vector;
> +
> + if (vfio_pci_read_config(pdev,
> + vdev->migration_cap + PCI_VF_MIGRATION_CAP, 1)
> + != PCI_VF_MIGRATION_ENABLE)
> + return;
> +
> + vector = vfio_pci_read_config(pdev,
> + vdev->migration_cap + PCI_VF_MIGRATION_IRQ, 1);
> +
> + msg = msix_get_message(pdev, vector);
> + kvm_irqchip_send_msi(kvm_state, msg);
> +}
> +
> +static int vfio_dev_load(QEMUFile *f, void *opaque, int version_id)
> +{
> + struct PCIDevice *pdev = (struct PCIDevice *)opaque;
> + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> + int ret;
> +
> + if(qemu_get_byte(f)!= SRIOV_LM_COMPLETE)
> + return 0;
> +
> + ret = pci_device_load(pdev, f);
> + if (ret) {
> + error_report("Faild to load PCI config space.\n");
> + return ret;
> + }
> +
> + if (msix_enabled(pdev)) {
> + vfio_enable_msix(vdev);
> + msix_load(pdev, f);
> + }
> +
> + vfio_pci_write_config(pdev,vdev->migration_cap +
> + PCI_VF_MIGRATION_VMM_STATUS, VMM_MIGRATION_END, 1);
> + vfio_pci_write_config(pdev,vdev->migration_cap +
> + PCI_VF_MIGRATION_VF_STATUS, PCI_VF_WAIT_FOR_MIGRATION, 1);
> + return 0;
> +}
> +
> +static int vfio_dev_save_complete(QEMUFile *f, void *opaque)
> +{
> + struct PCIDevice *pdev = (struct PCIDevice *)opaque;
> +
> + qemu_put_byte(f, SRIOV_LM_COMPLETE);
> + pci_device_save(pdev, f);
> +
> + if (msix_enabled(pdev)) {
> + msix_save(pdev, f);
> + }
> +
> + return 0;
> +}
> +
> +static int vfio_dev_setup(QEMUFile *f, void *opaque)
> +{
> + qemu_put_byte(f, SRIOV_LM_SETUP);
> + return 0;
> +}
> +
> +static void vfio_dev_save_before_stop(QEMUFile *f, void *opaque)
> +{
> + struct PCIDevice *pdev = (struct PCIDevice *)opaque;
> + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> + int vector;
> + MSIMessage msg;
> +
> + vfio_pci_write_config(pdev, vdev->migration_cap +
> + PCI_VF_MIGRATION_VMM_STATUS, VMM_MIGRATION_START, 1);
> +
> + if (vfio_pci_read_config(pdev,
> + vdev->migration_cap + PCI_VF_MIGRATION_CAP, 1)
> + != PCI_VF_MIGRATION_ENABLE)
> + return;
> +
> + vector = vfio_pci_read_config(pdev,
> + vdev->migration_cap + PCI_VF_MIGRATION_IRQ, 1);
> +
> + qemu_event_reset(&migration_event);
> +
> + msg = msix_get_message(pdev, vector);
> + kvm_irqchip_send_msi(kvm_state, msg);
> +
> + qemu_event_wait(&migration_event);
So this blocks QEMU, holding the QEMU lock, and
waits for qemu_event_set below.
> +}
> +
> +static SaveVMHandlers savevm_pt_handlers = {
> + .save_live_setup = vfio_dev_setup,
> + .save_live_complete = vfio_dev_save_complete,
> + .save_before_stop = vfio_dev_save_before_stop,
> + .load_state = vfio_dev_load,
> + .post_load_state = vfio_dev_post_load,
> +};
> +
> +void vfio_add_migration_capability(VFIOPCIDevice *vdev)
> +{
> + PCIDevice *pdev = &vdev->pdev;
> + int free_pos;
> +
> + if (strcmp(object_get_typename(OBJECT(vdev)), TYPE_VFIO_SRIOV))
> + return;
> +
> + free_pos = vfio_find_free_cfg_reg(vdev,
> + pdev->config[PCI_CAPABILITY_LIST],
> + PCI_VF_MIGRATION_CAP_SIZE);
> + if (free_pos) {
> + vdev->migration_cap = free_pos;
> + pci_add_capability(pdev, PCI_CAP_ID_MIGRATION,
> + free_pos, PCI_VF_MIGRATION_CAP_SIZE);
> + memset(vdev->emulated_config_bits + free_pos, 0xff,
> + PCI_VF_MIGRATION_CAP_SIZE);
> + memset(vdev->pdev.wmask + free_pos, 0xff,
> + PCI_VF_MIGRATION_CAP_SIZE);
> + } else
> + error_report("vfio: Fail to find free PCI config space regs.\n");
> +}
> +
> +void vfio_migration_cap_handle(PCIDevice *pdev, uint32_t addr,
> + uint32_t val, int len)
> +{
> + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +
> + if (addr == vdev->migration_cap + PCI_VF_MIGRATION_VF_STATUS
> + && val == PCI_VF_READY_FOR_MIGRATION) {
> + qemu_event_set(&migration_event);
This would wake migration so it can proceed -
except it needs QEMU lock to run, and that's
taken by the migration thread.
It seems unlikely that this ever worked - how
did you test this?
> + }
> +}
> +
> +static void vfio_sriov_instance_init(Object *obj)
> +{
> + PCIDevice *pdev = PCI_DEVICE(obj);
> +
> + register_savevm_live(NULL, "vfio-sriov", 1, 1,
> + &savevm_pt_handlers, pdev);
> +
> + qemu_event_init(&migration_event, false);
> +
> +}
> +
> +static const TypeInfo vfio_sriov_type_info = {
> + .name = TYPE_VFIO_SRIOV,
> + .parent = "vfio-pci",
> + .instance_init = vfio_sriov_instance_init,
> +};
> +
> +static void sriov_register_types(void)
> +{
> + type_register_static(&vfio_sriov_type_info);
> +}
> +type_init(sriov_register_types)
> --
> 1.9.3
- [Qemu-devel] [RFC PATCH V2 00/10] Qemu: Add live migration support for SRIOV NIC, Lan Tianyu, 2015/11/24
- [Qemu-devel] [RFC PATCH V2 01/10] Qemu/VFIO: Create head file pci.h to share data struct., Lan Tianyu, 2015/11/24
- [Qemu-devel] [RFC PATCH V2 02/10] Qemu/VFIO: Add new VFIO_GET_PCI_CAP_INFO ioctl cmd definition, Lan Tianyu, 2015/11/24
- [Qemu-devel] [RFC PATCH V2 03/10] Qemu/VFIO: Rework vfio_std_cap_max_size() function, Lan Tianyu, 2015/11/24
- [Qemu-devel] [RFC PATCH V2 04/10] Qemu/VFIO: Add vfio_find_free_cfg_reg() to find free PCI config space regs, Lan Tianyu, 2015/11/24
- [Qemu-devel] [RFC PATCH V2 05/10] Qemu/VFIO: Expose PCI config space read/write and msix functions, Lan Tianyu, 2015/11/24
- [Qemu-devel] [RFC PATCH V2 07/10] Qemu: Add post_load_state() to run after restoring CPU state, Lan Tianyu, 2015/11/24
- [Qemu-devel] [RFC PATCH V2 06/10] Qemu/PCI: Add macros for faked PCI migration capability, Lan Tianyu, 2015/11/24
- [Qemu-devel] [RFC PATCH V2 08/10] Qemu: Add save_before_stop callback to run just before stopping VCPU during migration, Lan Tianyu, 2015/11/24
- [Qemu-devel] [RFC PATCH V2 09/10] Qemu/VFIO: Add SRIOV VF migration support, Lan Tianyu, 2015/11/24
- Re: [Qemu-devel] [RFC PATCH V2 09/10] Qemu/VFIO: Add SRIOV VF migration support,
Michael S. Tsirkin <=
- [Qemu-devel] [RFC PATCH V2 10/10] Qemu/VFIO: Misc change for enable migration with VFIO, Lan Tianyu, 2015/11/24
- Re: [Qemu-devel] [RFC PATCH V2 00/10] Qemu: Add live migration support for SRIOV NIC, Michael S. Tsirkin, 2015/11/30