[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 11/12] vfio: register aer resume notification handle
From: |
Zhou Jie |
Subject: |
[Qemu-devel] [PATCH 11/12] vfio: register aer resume notification handler for aer resume |
Date: |
Wed, 18 May 2016 11:31:09 +0800 |
From: Chen Fan <address@hidden>
For supporting aer recovery, host and guest would run the same aer
recovery code, that would do the secondary bus reset if the error
is fatal, the aer recovery process:
1. error_detected
2. reset_link (if fatal)
3. slot_reset/mmio_enabled
4. resume
It indicates that host will do secondary bus reset to reset
the physical devices under bus in step 2, that would cause
devices in D3 status in a short time. But in qemu, we register
an error detected handler, that would be invoked as host broadcasts
the error-detected event in step 1, in order to avoid guest do
reset_link when host do reset_link simultaneously. it may cause
fatal error. we introduce a resmue notifier to assure host reset
completely.
In qemu, the aer recovery process:
1. Detect support for resume notification
If host vfio driver does not support for resume notification,
directly fail to boot up VM as with aer enabled.
2. Immediately notify the VM on error detected.
3. Stall any access to the device until resume is signaled.
4. Delay the guest directed bus reset.
5. Wait for resume notification.
If we don't get the resume notification from the host after
some timeout, we would abort the guest directed bus reset
altogether and unplug of the device to prevent it from further
interacting with the VM.
6. After get the resume notification reset bus.
Signed-off-by: Chen Fan <address@hidden>
Signed-off-by: Zhou Jie <address@hidden>
---
hw/vfio/pci.c | 182 ++++++++++++++++++++++++++++++++++++++++++++-
hw/vfio/pci.h | 5 ++
linux-headers/linux/vfio.h | 1 +
3 files changed, 186 insertions(+), 2 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6877a3d..39a9a9f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -35,6 +35,7 @@
#include "trace.h"
#define MSIX_CAP_LENGTH 12
+#define VFIO_RESET_TIMEOUT 1000
static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
@@ -1937,6 +1938,14 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice
*vdev, Error **errp)
VFIOGroup *group;
int ret, i, devfn, range_limit;
+ if (!vdev->vfio_resume_cap) {
+ error_setg(errp, "vfio: Cannot enable AER for device %s,"
+ " host vfio driver does not support for"
+ " resume notification",
+ vdev->vbasedev.name);
+ return;
+ }
+
ret = vfio_get_hot_reset_info(vdev, &info);
if (ret) {
error_setg(errp, "vfio: Cannot enable AER for device %s,"
@@ -2594,6 +2603,23 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
vbasedev->name);
}
+ irq_info.index = VFIO_PCI_RESUME_IRQ_INDEX;
+
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
+ if (ret) {
+ /* This can fail for an old kernel or legacy PCI dev */
+ trace_vfio_populate_device_get_irq_info_failure();
+ ret = 0;
+ } else if (irq_info.count == 1) {
+ vdev->vfio_resume_cap = true;
+ } else {
+ error_report("vfio: %s "
+ "Could not enable error recovery for the device,"
+ " because host vfio driver does not support for"
+ " resume notification",
+ vbasedev->name);
+ }
+
error:
return ret;
}
@@ -2661,6 +2687,17 @@ static void vfio_err_notifier_handler(void *opaque)
msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
PCI_ERR_ROOT_CMD_NONFATAL_EN;
+ if (isfatal) {
+ PCIDevice *dev_0 = pci_get_function_0(dev);
+ vdev->vfio_resume_wait = true;
+ vdev->vfio_resume_arrived = false;
+ vfio_mmap_set_enabled(vdev, false);
+ if (dev_0 != dev) {
+ VFIOPCIDevice *vdev_0 = DO_UPCAST(VFIOPCIDevice, pdev, dev_0);
+ vdev_0->vfio_resume_wait = true;
+ vdev_0->vfio_resume_arrived = false;
+ }
+ }
pcie_aer_msg(dev, &msg);
return;
}
@@ -2756,6 +2793,96 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice
*vdev)
event_notifier_cleanup(&vdev->err_notifier);
}
+static void vfio_resume_notifier_handler(void *opaque)
+{
+ VFIOPCIDevice *vdev = opaque;
+
+ if (!event_notifier_test_and_clear(&vdev->resume_notifier)) {
+ return;
+ }
+
+ vdev->vfio_resume_arrived = true;
+ if (vdev->reset_timer != NULL) {
+ timer_mod(vdev->reset_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
+ }
+}
+
+static void vfio_register_aer_resume_notifier(VFIOPCIDevice *vdev)
+{
+ int ret;
+ int argsz;
+ struct vfio_irq_set *irq_set;
+ int32_t *pfd;
+
+ if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) ||
+ !vdev->vfio_resume_cap) {
+ return;
+ }
+
+ if (event_notifier_init(&vdev->resume_notifier, 0)) {
+ error_report("vfio: Unable to init event notifier for"
+ " resume notification");
+ vdev->vfio_resume_cap = false;
+ return;
+ }
+
+ argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+ irq_set = g_malloc0(argsz);
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_RESUME_IRQ_INDEX;
+ irq_set->start = 0;
+ irq_set->count = 1;
+ pfd = (int32_t *)&irq_set->data;
+
+ *pfd = event_notifier_get_fd(&vdev->resume_notifier);
+ qemu_set_fd_handler(*pfd, vfio_resume_notifier_handler, NULL, vdev);
+
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ if (ret) {
+ error_report("vfio: Failed to set up resume notification");
+ qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+ event_notifier_cleanup(&vdev->resume_notifier);
+ vdev->vfio_resume_cap = false;
+ }
+ g_free(irq_set);
+}
+
+static void vfio_unregister_aer_resume_notifier(VFIOPCIDevice *vdev)
+{
+ int argsz;
+ struct vfio_irq_set *irq_set;
+ int32_t *pfd;
+ int ret;
+
+ if (!vdev->vfio_resume_cap) {
+ return;
+ }
+
+ argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+ irq_set = g_malloc0(argsz);
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_RESUME_IRQ_INDEX;
+ irq_set->start = 0;
+ irq_set->count = 1;
+ pfd = (int32_t *)&irq_set->data;
+ *pfd = -1;
+
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ if (ret) {
+ error_report("vfio: Failed to de-assign error fd: %m");
+ }
+ g_free(irq_set);
+ qemu_set_fd_handler(event_notifier_get_fd(&vdev->resume_notifier),
+ NULL, NULL, vdev);
+ event_notifier_cleanup(&vdev->resume_notifier);
+}
+
static void vfio_req_notifier_handler(void *opaque)
{
VFIOPCIDevice *vdev = opaque;
@@ -3061,6 +3188,7 @@ static int vfio_initfn(PCIDevice *pdev)
}
vfio_register_err_notifier(vdev);
+ vfio_register_aer_resume_notifier(vdev);
vfio_register_req_notifier(vdev);
vfio_setup_resetfn_quirk(vdev);
@@ -3091,6 +3219,7 @@ static void vfio_exitfn(PCIDevice *pdev)
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
vfio_unregister_req_notifier(vdev);
+ vfio_unregister_aer_resume_notifier(vdev);
vfio_unregister_err_notifier(vdev);
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
vfio_disable_interrupts(vdev);
@@ -3101,6 +3230,34 @@ static void vfio_exitfn(PCIDevice *pdev)
vfio_bars_exit(vdev);
}
+static void vfio_pci_delayed_reset(void *opaque)
+{
+ VFIOPCIDevice *vdev = opaque;
+ PCIDevice *pdev = &vdev->pdev;
+
+ timer_free(vdev->reset_timer);
+ vdev->reset_timer = NULL;
+
+ if (!vdev->vfio_resume_wait) {
+ return;
+ }
+ vdev->vfio_resume_wait = false;
+
+ if (vdev->vfio_resume_arrived) {
+ vfio_mmap_set_enabled(vdev, true);
+ if (pci_get_function_0(pdev) == pdev) {
+ vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+ }
+ } else {
+ uint32_t uncor_status;
+ uncor_status = vfio_pci_read_config(pdev,
+ pdev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+ if (uncor_status & ~0UL) {
+ qdev_unplug(&vdev->pdev.qdev, NULL);
+ }
+ }
+}
+
static void vfio_pci_reset(DeviceState *dev)
{
PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
@@ -3113,13 +3270,34 @@ static void vfio_pci_reset(DeviceState *dev)
if ((pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
PCI_BRIDGE_CTL_BUS_RESET)) {
- if (pci_get_function_0(pdev) == pdev) {
- vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+ if (!vdev->vfio_resume_wait) {
+ if (pci_get_function_0(pdev) == pdev) {
+ vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+ }
+ } else {
+ if (vdev->vfio_resume_arrived) {
+ vdev->vfio_resume_wait = false;
+ vfio_mmap_set_enabled(vdev, true);
+ if (pci_get_function_0(pdev) == pdev) {
+ vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+ }
+ } else {
+ vdev->reset_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+ vfio_pci_delayed_reset, vdev);
+ timer_mod(vdev->reset_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL)
+ + VFIO_RESET_TIMEOUT);
+ }
}
return;
}
}
+ if (vdev->vfio_resume_wait) {
+ vdev->vfio_resume_wait = false;
+ vfio_mmap_set_enabled(vdev, true);
+ }
+
vfio_pci_pre_reset(vdev);
if (vdev->resetfn && !vdev->resetfn(vdev)) {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 9fb0206..49e28d8 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -119,6 +119,7 @@ typedef struct VFIOPCIDevice {
VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
PCIHostDeviceAddress host;
EventNotifier err_notifier;
+ EventNotifier resume_notifier;
EventNotifier req_notifier;
int (*resetfn)(struct VFIOPCIDevice *);
uint32_t vendor_id;
@@ -144,6 +145,10 @@ typedef struct VFIOPCIDevice {
bool no_kvm_msi;
bool no_kvm_msix;
bool single_depend_dev;
+ bool vfio_resume_cap;
+ bool vfio_resume_wait;
+ bool vfio_resume_arrived;
+ QEMUTimer *reset_timer;
} VFIOPCIDevice;
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 759b850..01dfd5d 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -433,6 +433,7 @@ enum {
VFIO_PCI_MSIX_IRQ_INDEX,
VFIO_PCI_ERR_IRQ_INDEX,
VFIO_PCI_REQ_IRQ_INDEX,
+ VFIO_PCI_RESUME_IRQ_INDEX,
VFIO_PCI_NUM_IRQS
};
--
1.8.3.1
- [Qemu-devel] [PATCH v7 00/12] vfio-pci: pass the aer error to guest, Zhou Jie, 2016/05/17
- [Qemu-devel] [PATCH 05/12] vfio: refine function vfio_pci_host_match, Zhou Jie, 2016/05/17
- [Qemu-devel] [PATCH 02/12] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset, Zhou Jie, 2016/05/17
- [Qemu-devel] [PATCH 07/12] pci: add a pci_function_is_valid callback to check function if valid, Zhou Jie, 2016/05/17
- [Qemu-devel] [PATCH 01/12] vfio: extract vfio_get_hot_reset_info as a single function, Zhou Jie, 2016/05/17
- [Qemu-devel] [PATCH 03/12] vfio: add pcie extended capability support, Zhou Jie, 2016/05/17
- [Qemu-devel] [PATCH 08/12] vfio: add check aer functionality for hotplug device, Zhou Jie, 2016/05/17
- [Qemu-devel] [PATCH 04/12] vfio: add aer support for vfio device, Zhou Jie, 2016/05/17
- [Qemu-devel] [PATCH 09/12] vfio: vote the function 0 to do host bus reset when aer occurred, Zhou Jie, 2016/05/17
- [Qemu-devel] [PATCH 11/12] vfio: register aer resume notification handler for aer resume,
Zhou Jie <=
[Qemu-devel] [PATCH 06/12] vfio: add check host bus reset is support or not, Zhou Jie, 2016/05/17
[Qemu-devel] [PATCH 10/12] vfio-pci: pass the aer error to guest, Zhou Jie, 2016/05/17
[Qemu-devel] [PATCH 12/12] vfio: add 'aer' property to expose aercap, Zhou Jie, 2016/05/17