qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v4] virtio-scsi: restart DMA after iothread


From: Paolo Bonzini
Subject: Re: [Qemu-devel] [PATCH v4] virtio-scsi: restart DMA after iothread
Date: Wed, 12 Jun 2019 14:32:30 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.6.1

On 12/06/19 14:04, Stefan Hajnoczi wrote:
> When the 'cont' command resumes guest execution the vm change state
> handlers are invoked.  Unfortunately there is no explicit ordering
> between vm change state handlers.  When two layers of code both use vm
> change state handlers, we don't control which handler runs first.
> 
> virtio-scsi with iothreads hits a deadlock when a failed SCSI command is
> restarted and completes before the iothread is re-initialized.
> 
> This patch introduces priorities for VM change state handlers so the
> IOThread is guaranteed to be initialized before DMA requests are
> restarted.
> 
> Signed-off-by: Stefan Hajnoczi <address@hidden>
> ---
> v4:
> Paolo and Michael were interested in a priorities system.  Kevin wasn't
> convinced.  Here is a patch implementing the priorities approach so you
> can decide whether you prefer this or not.

I like this better than the others, but I do agree with Kevin that the
names aren't great and PRIO_IOTHREAD has nothing to do with iothreads
really.

Maybe the priority should be simply the "depth" of the device's bus, so
2 for scsi because we know it always has a controller between the device
and the machine and 1 for everything else.  Maybe who knows, in the
future the vmstate_change handler could be moved in DeviceClass and
propagated across buses[1]

So I vote for this patch but with VM_CHANGE_STATE_HANDLER_PRIO_IOTHREAD
renamed to VM_CHANGE_STATE_HANDLER_PRIO_DEVICE and
VM_CHANGE_STATE_HANDLER_PRIO_DEVICE renamed to
VM_CHANGE_STATE_HANDLER_PRIO_DISK (which is consistent with the naming
of scsi-disk, though not with ide-drive...).

Paolo

[1] With care, because currently the initialization order for stop is
virtio-device -> virtio-pci -> scsi-bus (and the opposite for start).  I
am not sure that the order for virtio-pci and virtio-device could be
exchanged, as would be the case if you followed the bus order
pci->virtio->scsi.

> The customer has now confirmed that the deadlock is fixed.  I have
> changed the Subject line from RFC to PATCH.
> 
>  include/sysemu/sysemu.h | 10 ++++++++++
>  hw/scsi/scsi-bus.c      |  6 ++++--
>  hw/virtio/virtio.c      |  6 ++++--
>  vl.c                    | 44 +++++++++++++++++++++++++++++++----------
>  4 files changed, 52 insertions(+), 14 deletions(-)
> 
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index 61579ae71e..1a4db092c7 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -27,8 +27,18 @@ bool runstate_store(char *str, size_t size);
>  typedef struct vm_change_state_entry VMChangeStateEntry;
>  typedef void VMChangeStateHandler(void *opaque, int running, RunState state);
>  
> +enum {
> +    /* Low priorities run first when the VM starts */
> +    VM_CHANGE_STATE_HANDLER_PRIO_UNDEFINED = 0,
> +    VM_CHANGE_STATE_HANDLER_PRIO_IOTHREAD = 100,
> +    VM_CHANGE_STATE_HANDLER_PRIO_DEVICE = 200,
> +    /* High priorities run first when the VM stops */
> +};
> +
>  VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler 
> *cb,
>                                                       void *opaque);
> +VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
> +        VMChangeStateHandler *cb, void *opaque, int priority);
>  void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
>  void vm_state_notify(int running, RunState state);
>  
> diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
> index c480553083..eda5b9a19e 100644
> --- a/hw/scsi/scsi-bus.c
> +++ b/hw/scsi/scsi-bus.c
> @@ -206,8 +206,10 @@ static void scsi_qdev_realize(DeviceState *qdev, Error 
> **errp)
>          error_propagate(errp, local_err);
>          return;
>      }
> -    dev->vmsentry = qemu_add_vm_change_state_handler(scsi_dma_restart_cb,
> -                                                     dev);
> +    dev->vmsentry = qemu_add_vm_change_state_handler_prio(
> +            scsi_dma_restart_cb,
> +            dev,
> +            VM_CHANGE_STATE_HANDLER_PRIO_DEVICE);
>  }
>  
>  static void scsi_qdev_unrealize(DeviceState *qdev, Error **errp)
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 07f4a64b48..9256af587a 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -2354,8 +2354,10 @@ void virtio_init(VirtIODevice *vdev, const char *name,
>      } else {
>          vdev->config = NULL;
>      }
> -    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
> -                                                     vdev);
> +    vdev->vmstate = qemu_add_vm_change_state_handler_prio(
> +            virtio_vmstate_change,
> +            vdev,
> +            VM_CHANGE_STATE_HANDLER_PRIO_IOTHREAD);
>      vdev->device_endian = virtio_default_endian();
>      vdev->use_guest_notifier_mask = true;
>  }
> diff --git a/vl.c b/vl.c
> index cd1fbc4cdc..26c82063d2 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -1469,27 +1469,45 @@ static int machine_help_func(QemuOpts *opts, 
> MachineState *machine)
>  struct vm_change_state_entry {
>      VMChangeStateHandler *cb;
>      void *opaque;
> -    QLIST_ENTRY (vm_change_state_entry) entries;
> +    QTAILQ_ENTRY (vm_change_state_entry) entries;
> +    int priority;
>  };
>  
> -static QLIST_HEAD(, vm_change_state_entry) vm_change_state_head;
> +static QTAILQ_HEAD(, vm_change_state_entry) vm_change_state_head;
>  
> -VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler 
> *cb,
> -                                                     void *opaque)
> +VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
> +        VMChangeStateHandler *cb, void *opaque, int priority)
>  {
>      VMChangeStateEntry *e;
> +    VMChangeStateEntry *other;
>  
>      e = g_malloc0(sizeof (*e));
> -
>      e->cb = cb;
>      e->opaque = opaque;
> -    QLIST_INSERT_HEAD(&vm_change_state_head, e, entries);
> +    e->priority = priority;
> +
> +    /* Keep list sorted in ascending priority order */
> +    QTAILQ_FOREACH(other, &vm_change_state_head, entries) {
> +        if (priority < other->priority) {
> +            QTAILQ_INSERT_BEFORE(other, e, entries);
> +            return e;
> +        }
> +    }
> +
> +    QTAILQ_INSERT_TAIL(&vm_change_state_head, e, entries);
>      return e;
>  }
>  
> +VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler 
> *cb,
> +                                                     void *opaque)
> +{
> +    return qemu_add_vm_change_state_handler_prio(cb, opaque,
> +            VM_CHANGE_STATE_HANDLER_PRIO_UNDEFINED);
> +}
> +
>  void qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
>  {
> -    QLIST_REMOVE (e, entries);
> +    QTAILQ_REMOVE (&vm_change_state_head, e, entries);
>      g_free (e);
>  }
>  
> @@ -1499,8 +1517,14 @@ void vm_state_notify(int running, RunState state)
>  
>      trace_vm_state_notify(running, state, RunState_str(state));
>  
> -    QLIST_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
> -        e->cb(e->opaque, running, state);
> +    if (running) {
> +        QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
> +            e->cb(e->opaque, running, state);
> +        }
> +    } else {
> +        QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) 
> {
> +            e->cb(e->opaque, running, state);
> +        }
>      }
>  }
>  
> @@ -3009,7 +3033,7 @@ int main(int argc, char **argv, char **envp)
>          exit(1);
>      }
>  
> -    QLIST_INIT (&vm_change_state_head);
> +    QTAILQ_INIT (&vm_change_state_head);
>      os_setup_early_signal_handling();
>  
>      cpu_option = NULL;
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]