qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for h


From: Alex Williamson
Subject: Re: [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host bus reset
Date: Wed, 29 Apr 2015 11:32:30 -0600

On Wed, 2015-04-29 at 16:48 +0800, Chen Fan wrote:
> add host secondary bus reset for vfio when AER occurs, if reset failed,
> we should stop vm.
> 
> Signed-off-by: Chen Fan <address@hidden>
> ---
>  hw/vfio/pci.c | 151 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 138 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 060fb47..619daed 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -154,6 +154,8 @@ typedef struct VFIOPCIDevice {
>      PCIHostDeviceAddress host;
>      EventNotifier err_notifier;
>      EventNotifier req_notifier;
> +
> +    Notifier sec_bus_reset_notifier;
>      uint32_t features;
>  #define VFIO_FEATURE_ENABLE_VGA_BIT 0
>  #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
> @@ -2627,6 +2629,13 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, 
> int pos, uint8_t size)
>      return pos;
>  }
>  
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> +                                PCIHostDeviceAddress *host2)
> +{
> +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> +            host1->slot == host2->slot && host1->function == 
> host2->function);
> +}
> +
>  static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos)
>  {
>      uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP);
> @@ -2819,6 +2828,131 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, 
> uint8_t pos)
>      return 0;
>  }
>  
> +static int vfio_aer_validate_devices(DeviceState *dev,
> +                                     void *opaque)
> +{
> +    VFIOPCIDevice *vdev;
> +    int i;
> +    bool found = false;
> +    struct vfio_pci_hot_reset_info *info = opaque;
> +    struct vfio_pci_dependent_device *devices = &info->devices[0];
> +
> +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> +        return 0;
> +    }
> +
> +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(dev));
> +    for (i = 0; i < info->count; i++) {
> +        PCIHostDeviceAddress host;
> +
> +        host.domain = devices[i].segment;
> +        host.bus = devices[i].bus;
> +        host.slot = PCI_SLOT(devices[i].devfn);
> +        host.function = PCI_FUNC(devices[i].devfn);
> +
> +        if (vfio_pci_host_match(&host, &vdev->host)) {
> +            found = true;
> +            break;
> +        }
> +    }
> +
> +    if (!found) {
> +        error_report("vfio: Cannot reset parent bus with AER supported,"
> +                     "depends on device %s which is not contained.",
> +                      vdev->vbasedev.name);
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static void vfio_pci_vm_stop(VFIOPCIDevice *vdev)
> +{
> +    error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
> +                 "Please collect any data possible and then kill the guest",
> +                 __func__, vdev->host.domain, vdev->host.bus,
> +                 vdev->host.slot, vdev->host.function);
> +
> +    vm_stop(RUN_STATE_INTERNAL_ERROR);
> +}
> +
> +static void vfio_pci_host_bus_reset(Notifier *n, void *opaque)
> +{
> +    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, 
> sec_bus_reset_notifier);
> +    PCIDevice *pdev = &vdev->pdev;
> +    int ret, i;
> +    struct vfio_pci_hot_reset_info *info;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +
> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> +        return;
> +    }
> +
> +    /*
> +     * Check the affected devices by virtual bus reset are contained in
> +     * the set of groups.
> +     */
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret < 0) {
> +        goto stop_vm;
> +    }
> +
> +    devices = &info->devices[0];
> +
> +    /* Verify that we have all the groups required */
> +    for (i = 0; i < info->count; i++) {
> +        PCIHostDeviceAddress host;
> +
> +        host.domain = devices[i].segment;
> +        host.bus = devices[i].bus;
> +        host.slot = PCI_SLOT(devices[i].devfn);
> +        host.function = PCI_FUNC(devices[i].devfn);
> +
> +        if (vfio_pci_host_match(&host, &vdev->host)) {
> +            continue;
> +        }
> +
> +        QLIST_FOREACH(group, &vfio_group_list, next) {
> +            if (group->groupid == devices[i].group_id) {
> +                break;
> +            }
> +        }
> +
> +        if (!group) {
> +            if (!vdev->has_pm_reset) {

I'm not sure how has_pm_reset has anything to do with what we're testing
here.  Copy-paste error?

> +                error_report("vfio: Cannot reset device %s with AER 
> supported,"
> +                             "depends on group %d which is not owned.",
> +                             vdev->vbasedev.name, devices[i].group_id);
> +            }
> +            ret = -EPERM;
> +            goto stop_vm;
> +        }
> +    }


The above verifies that all of the devices affected by the bus reset are
owned by the VM.

> +
> +    /* Verify that we have all the affected devices under the bus */
> +    ret = qbus_walk_children(BUS(pdev->bus), NULL, NULL,
> +                             vfio_aer_validate_devices,
> +                             NULL, info);

And here we make sure that any vfio-pci devices affected by the bus
reset are contained in the list of affected devices.  What we're still
missing is whether there are any affected devices that are exposed to
the VM that are not covered in this bus walk.  For instance, if I assign
a multi-function device and place function 0 and 1 under separate, peer
root ports, I believe the above tests will confirm that the VM owns the
necessary groups and that the only vfio-pci device affected by the bus
reset is in the list of affected devices, but it will not verify that
the other function is not affected by the guest bus reset, but is
affected by the host bus reset.

> +    if (ret < 0) {
> +        goto stop_vm;
> +    }
> +
> +
> +    /* bus reset! */
> +    ret = vfio_pci_do_hot_reset(vdev, info);
> +    if (ret < 0) {
> +        goto stop_vm;
> +    }
> +
> +    g_free(info);
> +    return;
> +
> +stop_vm:
> +    g_free(info);
> +    vfio_pci_vm_stop(vdev);
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
>  {
>      PCIDevice *pdev = &vdev->pdev;
> @@ -2852,6 +2986,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, 
> uint16_t size)
>                     pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
>      pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
>  
> +    vdev->sec_bus_reset_notifier.notify = vfio_pci_host_bus_reset;
> +    pci_bus_add_reset_notifier(pdev->bus, &vdev->sec_bus_reset_notifier);
> +

Additionally, we're going to do a bus reset once for every vfio-pci
device subordinate to the bus being reset.  That's not very efficient.

There are still a number of problems here.  We're allowing users to
configure their VMs however they wish and enable AER, then only when
they do a bus reset (which we have no way to infer is related to an AER
event) do we check and potentially halt the VM if guest mapping of the
host topology prevents a bus reset.  That seems sort of like making
backups of your data, but never testing that you can recover from the
backup until we need the data.  I can't imagine that many users are
going to be able to get this correct and they won't know it's incorrect
until an AER event occurs.

Also, Do we need to worry about guest root complex devices?  The guest
won't be able to perform a bus reset on the guest bus in that case.  Is
AER even valid for RC devices since they don't have a parent downstream
port to signal the AER?  This seems like another case where it's more
likely that a user will create an invalid configuration than it is they
will create a valid one, but we won't know until an AER occurs with the
code structure here.

Therefore I think that if the user specifies AER, we need to verify and
enforce at that point, ie. the initfn, that a host bus reset is
possible, that a guest reset is possible, and that a guest bus reset
fully contains the VM visible host devices affected by the reset.

I'd also like to see the patches ordered such that we provide all the
infrastructure to validate and enforce the configuration restrictions to
support AER before we enable it to the guest.  The order here creates
bisect points where AER is exposed to the guest with unacceptable QEMU
handling.  Thanks,

Alex

>      return 0;
>  }
>  
> @@ -2978,13 +3115,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>      vfio_enable_intx(vdev);
>  }
>  
> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> -                                PCIHostDeviceAddress *host2)
> -{
> -    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> -            host1->slot == host2->slot && host1->function == 
> host2->function);
> -}
> -
>  static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>  {
>      VFIOGroup *group;
> @@ -3328,12 +3458,7 @@ static void vfio_err_notifier_handler(void *opaque)
>       * terminate the guest to contain the error.
>       */
>  
> -    error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
> -                 "Please collect any data possible and then kill the guest",
> -                 __func__, vdev->host.domain, vdev->host.bus,
> -                 vdev->host.slot, vdev->host.function);
> -
> -    vm_stop(RUN_STATE_INTERNAL_ERROR);
> +    vfio_pci_vm_stop(vdev);
>  }
>  
>  /*






reply via email to

[Prev in Thread] Current Thread [Next in Thread]