qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH] pcie: Defer hot unplug until migration is complete


From: Julia Suvorova
Subject: Re: [RFC PATCH] pcie: Defer hot unplug until migration is complete
Date: Thu, 16 Jan 2020 11:47:52 +0100

On Mon, Jan 13, 2020 at 3:04 PM Yury Kotov <address@hidden> wrote:
>
> Devices hot-plug during migration is not allowed and disabled in
> corresponding QMP-commands (device_add, device_del).
> But guest still can unplug device by powering it off
> (Example: echo 0 > /sys/bus/pci/slots/XXX/power).

You don't want to unplug device due to powering the slot off in the
first place. Instead, you can hide the device (see f3a8505656), and
make it visible again when the slot is powered on. Thus, the guest
will not be able to unplug the device and your problem will disappear.

Best regards, Julia Suvorova.

> Fix it by deferring unplugging until the migration is complete.
>
> Signed-off-by: Yury Kotov <address@hidden>
> ---
>  hw/pci-bridge/gen_pcie_root_port.c |  7 ++++
>  hw/pci-bridge/ioh3420.c            |  7 ++++
>  hw/pci-bridge/xio3130_downstream.c |  7 ++++
>  hw/pci/pcie.c                      | 54 +++++++++++++++++++++++-------
>  hw/pci/pcie_port.c                 | 47 ++++++++++++++++++++++++++
>  include/hw/pci/pcie.h              |  1 +
>  include/hw/pci/pcie_port.h         | 20 +++++++++++
>  7 files changed, 130 insertions(+), 13 deletions(-)
>
> diff --git a/hw/pci-bridge/gen_pcie_root_port.c 
> b/hw/pci-bridge/gen_pcie_root_port.c
> index 9eaefebca8..5b3c202341 100644
> --- a/hw/pci-bridge/gen_pcie_root_port.c
> +++ b/hw/pci-bridge/gen_pcie_root_port.c
> @@ -100,6 +100,9 @@ static void gen_rp_realize(DeviceState *dev, Error **errp)
>      }
>  }
>
> +static const VMStateDescription vmstate_rp_deffered_unplug =
> +    VMSTATE_DEFFERED_UNPLUG("pcie-root-port");
> +
>  static const VMStateDescription vmstate_rp_dev = {
>      .name = "pcie-root-port",
>      .priority = MIG_PRI_PCI_BUS,
> @@ -114,6 +117,10 @@ static const VMStateDescription vmstate_rp_dev = {
>                            GenPCIERootPort,
>                            gen_rp_test_migrate_msix),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &vmstate_rp_deffered_unplug,
> +        NULL
>      }
>  };
>
> diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
> index f1e16135a3..2399a9a87f 100644
> --- a/hw/pci-bridge/ioh3420.c
> +++ b/hw/pci-bridge/ioh3420.c
> @@ -82,6 +82,9 @@ static void ioh3420_interrupts_uninit(PCIDevice *d)
>      msi_uninit(d);
>  }
>
> +static const VMStateDescription vmstate_ioh3420_deffered_unplug =
> +    VMSTATE_DEFFERED_UNPLUG("ioh-3240-express-root-port");
> +
>  static const VMStateDescription vmstate_ioh3420 = {
>      .name = "ioh-3240-express-root-port",
>      .priority = MIG_PRI_PCI_BUS,
> @@ -93,6 +96,10 @@ static const VMStateDescription vmstate_ioh3420 = {
>          VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
>                         PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &vmstate_ioh3420_deffered_unplug,
> +        NULL
>      }
>  };
>
> diff --git a/hw/pci-bridge/xio3130_downstream.c 
> b/hw/pci-bridge/xio3130_downstream.c
> index a9f084b863..a5b4fe08ee 100644
> --- a/hw/pci-bridge/xio3130_downstream.c
> +++ b/hw/pci-bridge/xio3130_downstream.c
> @@ -139,6 +139,9 @@ static Property xio3130_downstream_props[] = {
>      DEFINE_PROP_END_OF_LIST()
>  };
>
> +static const VMStateDescription vmstate_xio3130_downstream_deffered_unplug =
> +    VMSTATE_DEFFERED_UNPLUG("xio3130-express-downstream-port");
> +
>  static const VMStateDescription vmstate_xio3130_downstream = {
>      .name = "xio3130-express-downstream-port",
>      .priority = MIG_PRI_PCI_BUS,
> @@ -150,6 +153,10 @@ static const VMStateDescription 
> vmstate_xio3130_downstream = {
>          VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
>                         PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &vmstate_xio3130_downstream_deffered_unplug,
> +        NULL
>      }
>  };
>
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index 08718188bb..29f0e5c05b 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -28,6 +28,8 @@
>  #include "hw/pci/pcie_regs.h"
>  #include "hw/pci/pcie_port.h"
>  #include "qemu/range.h"
> +#include "sysemu/sysemu.h"
> +#include "migration/misc.h"
>
>  //#define DEBUG_PCIE
>  #ifdef DEBUG_PCIE
> @@ -575,6 +577,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
>
>      if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) {
>          /* Downstream ports enforce device number 0. */
> +        PCIESlot *slot = PCIE_SLOT(dev);
>          bool populated = pci_bridge_get_sec_bus(PCI_BRIDGE(dev))->devices[0];
>          uint16_t pic;
>
> @@ -588,6 +591,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
>
>          pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF;
>          pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic);
> +        slot->unplug_is_deferred = false;
>      }
>
>      pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
> @@ -608,13 +612,42 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t 
> *slt_ctl, uint16_t *slt_sta)
>      *slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
>  }
>
> +static void pcie_cap_slot_unplug(PCIDevice *dev)
> +{
> +    uint32_t pos = dev->exp.exp_cap;
> +    uint8_t *exp_cap = dev->config + pos;
> +    PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
> +
> +    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> +                        pcie_unplug_device, NULL);
> +    pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, 
> PCI_EXP_SLTSTA_PDS);
> +    if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
> +        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
> +                                     PCI_EXP_LNKSTA_DLLLA);
> +    }
> +    pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC);
> +    hotplug_event_notify(dev);
> +}
> +
> +void pcie_cap_slot_deferred_unplug(PCIDevice *dev)
> +{
> +    PCIESlot *slot = PCIE_SLOT(dev);
> +
> +    if (migration_is_idle() && slot->unplug_is_deferred) {
> +        pcie_cap_slot_unplug(dev);
> +        slot->unplug_is_deferred = false;
> +    }
> +}
> +
>  void pcie_cap_slot_write_config(PCIDevice *dev,
>                                  uint16_t old_slt_ctl, uint16_t old_slt_sta,
>                                  uint32_t addr, uint32_t val, int len)
>  {
> +    PCIESlot *slot = PCIE_SLOT(dev);
>      uint32_t pos = dev->exp.exp_cap;
>      uint8_t *exp_cap = dev->config + pos;
>      uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
> +    bool may_unplug;
>
>      if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
>          /*
> @@ -660,22 +693,17 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
>       * this is a work around for guests that overwrite
>       * control of powered off slots before powering them on.
>       */
> -    if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) &&
> -        (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
> +    may_unplug = (val & PCI_EXP_SLTCTL_PCC) &&
> +                 (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF;
> +    if (may_unplug && (sltsta & PCI_EXP_SLTSTA_PDS) &&
>          (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
>          (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
> -        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
> -        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> -                            pcie_unplug_device, NULL);
> -
> -        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
> -                                     PCI_EXP_SLTSTA_PDS);
> -        if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
> -            pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
> -                                         PCI_EXP_LNKSTA_DLLLA);
> +        slot->unplug_is_deferred = !migration_is_idle();
> +        if (!slot->unplug_is_deferred) {
> +            pcie_cap_slot_unplug(dev);
>          }
> -        pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
> -                                       PCI_EXP_SLTSTA_PDC);
> +    } else if (!may_unplug) {
> +        slot->unplug_is_deferred = false;
>      }
>
>      hotplug_event_notify(dev);
> diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
> index c19a9be592..bd5fbf6827 100644
> --- a/hw/pci/pcie_port.c
> +++ b/hw/pci/pcie_port.c
> @@ -23,6 +23,9 @@
>  #include "hw/qdev-properties.h"
>  #include "qemu/module.h"
>  #include "hw/hotplug.h"
> +#include "sysemu/runstate.h"
> +#include "migration/migration.h"
> +#include "migration/misc.h"
>
>  void pcie_port_init_reg(PCIDevice *d)
>  {
> @@ -150,6 +153,48 @@ static Property pcie_slot_props[] = {
>      DEFINE_PROP_END_OF_LIST()
>  };
>
> +bool vmstate_deffered_unplug_needed(void *opaque)
> +{
> +    PCIESlot *slot = opaque;
> +
> +    return slot->unplug_is_deferred;
> +}
> +
> +static void pcie_slot_migration_notifier_cb(Notifier *notifier, void *data)
> +{
> +    PCIESlot *slot = container_of(notifier, PCIESlot, migration_notifier);
> +
> +    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
> +}
> +
> +static void pcie_slot_vm_state_change(void *opaque, int running, RunState 
> state)
> +{
> +    PCIESlot *slot = opaque;
> +
> +    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
> +}
> +
> +static void pcie_slot_init(Object *obj)
> +{
> +    PCIESlot *slot = PCIE_SLOT(obj);
> +
> +    slot->unplug_is_deferred = false;
> +    slot->migration_notifier = (Notifier) {
> +        .notify = pcie_slot_migration_notifier_cb
> +    };
> +    add_migration_state_change_notifier(&slot->migration_notifier);
> +    slot->vmstate_change =
> +        qemu_add_vm_change_state_handler(pcie_slot_vm_state_change, slot);
> +}
> +
> +static void pcie_slot_finalize(Object *obj)
> +{
> +    PCIESlot *slot = PCIE_SLOT(obj);
> +
> +    remove_migration_state_change_notifier(&slot->migration_notifier);
> +    qemu_del_vm_change_state_handler(slot->vmstate_change);
> +}
> +
>  static void pcie_slot_class_init(ObjectClass *oc, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(oc);
> @@ -166,6 +211,8 @@ static const TypeInfo pcie_slot_type_info = {
>      .name = TYPE_PCIE_SLOT,
>      .parent = TYPE_PCIE_PORT,
>      .instance_size = sizeof(PCIESlot),
> +    .instance_init = pcie_slot_init,
> +    .instance_finalize = pcie_slot_finalize,
>      .abstract = true,
>      .class_init = pcie_slot_class_init,
>      .interfaces = (InterfaceInfo[]) {
> diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
> index 7064875835..128f26199e 100644
> --- a/include/hw/pci/pcie.h
> +++ b/include/hw/pci/pcie.h
> @@ -110,6 +110,7 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, 
> uint16_t *slt_sta);
>  void pcie_cap_slot_write_config(PCIDevice *dev,
>                                  uint16_t old_slt_ctl, uint16_t old_slt_sta,
>                                  uint32_t addr, uint32_t val, int len);
> +void pcie_cap_slot_deferred_unplug(PCIDevice *dev);
>  int pcie_cap_slot_post_load(void *opaque, int version_id);
>  void pcie_cap_slot_push_attention_button(PCIDevice *dev);
>
> diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
> index 7515430087..32e45f0c89 100644
> --- a/include/hw/pci/pcie_port.h
> +++ b/include/hw/pci/pcie_port.h
> @@ -23,6 +23,9 @@
>
>  #include "hw/pci/pci_bridge.h"
>  #include "hw/pci/pci_bus.h"
> +#include "qemu/notify.h"
> +#include "sysemu/sysemu.h"
> +#include "migration/vmstate.h"
>
>  #define TYPE_PCIE_PORT "pcie-port"
>  #define PCIE_PORT(obj) OBJECT_CHECK(PCIEPort, (obj), TYPE_PCIE_PORT)
> @@ -44,6 +47,10 @@ void pcie_port_init_reg(PCIDevice *d);
>  struct PCIESlot {
>      /*< private >*/
>      PCIEPort    parent_obj;
> +    bool        unplug_is_deferred;
> +    Notifier    migration_notifier;
> +    VMChangeStateEntry *vmstate_change;
> +
>      /*< public >*/
>
>      /* pci express switch port with slot */
> @@ -58,6 +65,19 @@ struct PCIESlot {
>      QLIST_ENTRY(PCIESlot) next;
>  };
>
> +bool vmstate_deffered_unplug_needed(void *opaque);
> +
> +#define VMSTATE_DEFFERED_UNPLUG(parent_section_name) {            \
> +    .name = parent_section_name "/deffered-unplug",               \
> +    .version_id = 1,                                              \
> +    .minimum_version_id = 1,                                      \
> +    .needed = vmstate_deffered_unplug_needed,                     \
> +    .fields = (VMStateField[]) {                                  \
> +        VMSTATE_BOOL(unplug_is_deferred, PCIESlot),               \
> +        VMSTATE_END_OF_LIST()                                     \
> +    }                                                             \
> +}
> +
>  void pcie_chassis_create(uint8_t chassis_number);
>  PCIESlot *pcie_chassis_find_slot(uint8_t chassis, uint16_t slot);
>  int pcie_chassis_add_slot(struct PCIESlot *slot);
> --
> 2.24.1
>
>




reply via email to

[Prev in Thread] Current Thread [Next in Thread]