qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] Re: [PATCH] virtio: Use ioeventfd for virtqueue notify


From: Michael S. Tsirkin
Subject: [Qemu-devel] Re: [PATCH] virtio: Use ioeventfd for virtqueue notify
Date: Tue, 19 Oct 2010 15:33:41 +0200
User-agent: Mutt/1.5.21 (2010-09-15)

As a general comment, could you please try to split this patch
up, to make it easier to review? I did a pass over it but I am
still not understanding it completely.

My main concern is with the fact that we add more state
in notifiers that can easily get out of sync with users.
If we absolutely need this state, let's try to at least
document the state machine, and make the API
for state transitions more transparent.

On Thu, Sep 30, 2010 at 03:01:52PM +0100, Stefan Hajnoczi wrote:
> diff --git a/hw/vhost.c b/hw/vhost.c
> index 1b8624d..f127a07 100644
> --- a/hw/vhost.c
> +++ b/hw/vhost.c
> @@ -517,7 +517,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
>          goto fail_guest_notifier;
>      }
>  
> -    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true);
> +    r = virtio_set_host_notifier(vdev, idx, true);
>      if (r < 0) {
>          fprintf(stderr, "Error binding host notifier: %d\n", -r);
>          goto fail_host_notifier;
> @@ -539,7 +539,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
>  
>  fail_call:
>  fail_kick:
> -    vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
> +    virtio_set_host_notifier(vdev, idx, false);
>  fail_host_notifier:
>      vdev->binding->set_guest_notifier(vdev->binding_opaque, idx, false);
>  fail_guest_notifier:
> @@ -575,7 +575,7 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
>      }
>      assert (r >= 0);
>  
> -    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
> +    r = virtio_set_host_notifier(vdev, idx, false);
>      if (r < 0) {
>          fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r);
>          fflush(stderr);
> diff --git a/hw/virtio.c b/hw/virtio.c
> index fbef788..f075b3a 100644
> --- a/hw/virtio.c
> +++ b/hw/virtio.c
> @@ -16,6 +16,7 @@
>  #include "trace.h"
>  #include "virtio.h"
>  #include "sysemu.h"
> +#include "kvm.h"
>  
>  /* The alignment to use between consumer and producer parts of vring.
>   * x86 pagesize again. */
> @@ -77,6 +78,11 @@ struct VirtQueue
>      VirtIODevice *vdev;
>      EventNotifier guest_notifier;
>      EventNotifier host_notifier;
> +    enum {
> +        HOST_NOTIFIER_DEASSIGNED,   /* inactive */
> +        HOST_NOTIFIER_ASSIGNED,     /* active */
> +        HOST_NOTIFIER_OFFLIMITS,    /* active but outside our control */
> +    } host_notifier_state;

This state machine confuses me. Please note that users already
track notifier state and call set with assign/deassign correctly.
The comment does not help: what does 'outside our control' mean?
Who's control?

>  };
>  
>  /* virt queue functions */
> @@ -453,6 +459,93 @@ void virtio_update_irq(VirtIODevice *vdev)
>      virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
>  }
>  
> +/* Service virtqueue notify from a host notifier */
> +static void virtio_read_host_notifier(void *opaque)
> +{
> +    VirtQueue *vq = opaque;
> +    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
> +    if (event_notifier_test_and_clear(notifier)) {
> +        if (vq->vring.desc) {
> +            vq->handle_output(vq->vdev, vq);
> +        }
> +    }
> +}
> +
> +/* Transition between host notifier states */
> +static int virtio_set_host_notifier_state(VirtIODevice *vdev, int n, int 
> state)

really unfortunate naming for functions: we seem to have
about 4 of them starting with virtio_set_host_notifier*

> +{
> +    VirtQueue *vq = &vdev->vq[n];
> +    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
> +    int rc;
> +
> +    if (!kvm_enabled()) {
> +        return -ENOSYS;
> +    }

If this means that there's no need to do anything for non kvm,
return 0 here.

> +
> +    /* If the number of ioeventfds is limited, use them for vhost only */
> +    if (state == HOST_NOTIFIER_ASSIGNED && !kvm_has_many_iobus_devs()) {
> +        state = HOST_NOTIFIER_DEASSIGNED;
> +    }
> +
> +    /* Ignore if no state change */
> +    if (vq->host_notifier_state == state) {
> +        return 0;
> +    }
> +
> +    /* Disable read handler if transitioning away from assigned */
> +    if (vq->host_notifier_state == HOST_NOTIFIER_ASSIGNED) {
> +        qemu_set_fd_handler(event_notifier_get_fd(notifier), NULL, NULL, 
> NULL);
> +    }
> +
> +    /* Toggle host notifier if transitioning to or from deassigned */
> +    if (state == HOST_NOTIFIER_DEASSIGNED ||
> +        vq->host_notifier_state == HOST_NOTIFIER_DEASSIGNED) {
> +        rc = vdev->binding->set_host_notifier(vdev->binding_opaque, n,
> +                state != HOST_NOTIFIER_DEASSIGNED);
> +        if (rc < 0) {
> +            return rc;
> +        }
> +    }
> +
> +    /* Enable read handler if transitioning to assigned */
> +    if (state == HOST_NOTIFIER_ASSIGNED) {
> +        qemu_set_fd_handler(event_notifier_get_fd(notifier),
> +                            virtio_read_host_notifier, NULL, vq);
> +    }
> +
> +    vq->host_notifier_state = state;
> +    return 0;
> +}
> +
> +/* Try to assign/deassign host notifiers for all virtqueues */
> +static void virtio_set_host_notifiers(VirtIODevice *vdev, bool assigned)

void? don't we care whether this fails?

> +{

This really confuses me. Why is this not a simple loop
over all vqs?

> +    int state = assigned ? HOST_NOTIFIER_ASSIGNED : HOST_NOTIFIER_DEASSIGNED;
> +    int i;
> +
> +    if (!vdev->binding->set_host_notifier) {
> +        return;
> +    }
> +
> +    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
> +        if (vdev->vq[i].host_notifier_state == HOST_NOTIFIER_OFFLIMITS) {
> +            continue;
> +        }
> +
> +        if (!vdev->vq[i].vring.desc) {
> +            continue;
> +        }
> +
> +        virtio_set_host_notifier_state(vdev, i, state);

ignores return value?

> +    }
> +}
> +
> +int virtio_set_host_notifier(VirtIODevice *vdev, int n, bool assigned)
> +{
> +    int state = assigned ? HOST_NOTIFIER_OFFLIMITS : HOST_NOTIFIER_ASSIGNED;

So here assigned == false sets state to ASSIGNED?


> +    return virtio_set_host_notifier_state(vdev, n, state);
> +}
> +
>  void virtio_reset(void *opaque)
>  {
>      VirtIODevice *vdev = opaque;
> @@ -467,6 +560,7 @@ void virtio_reset(void *opaque)
>      vdev->isr = 0;
>      vdev->config_vector = VIRTIO_NO_VECTOR;
>      virtio_notify_vector(vdev, vdev->config_vector);
> +    virtio_set_host_notifiers(vdev, false);
>  
>      for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
>          vdev->vq[i].vring.desc = 0;
> @@ -592,6 +686,16 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, 
> uint16_t vector)
>          vdev->vq[n].vector = vector;
>  }
>  
> +void virtio_set_status(VirtIODevice *vdev, uint8_t val)
> +{
> +    virtio_set_host_notifiers(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
> +
> +    if (vdev->set_status) {
> +        vdev->set_status(vdev, val);
> +    }
> +    vdev->status = val;

How does this interact with vhost.c which uses both notifiers and status
callback?

> +}
> +
>  VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
>                              void (*handle_output)(VirtIODevice *, VirtQueue 
> *))
>  {
> @@ -719,6 +823,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
>      }
>  
>      virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
> +    virtio_set_host_notifiers(vdev, vdev->status & 
> VIRTIO_CONFIG_S_DRIVER_OK);
>      return 0;
>  }
>  
> @@ -746,6 +851,7 @@ VirtIODevice *virtio_common_init(const char *name, 
> uint16_t device_id,
>      for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
>          vdev->vq[i].vector = VIRTIO_NO_VECTOR;
>          vdev->vq[i].vdev = vdev;
> +        vdev->vq[i].host_notifier_state = HOST_NOTIFIER_DEASSIGNED;
>      }
>  
>      vdev->name = name;
> diff --git a/hw/virtio.h b/hw/virtio.h
> index 96514e6..d76157e 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -125,13 +125,7 @@ struct VirtIODevice
>      uint16_t device_id;
>  };
>  
> -static inline void virtio_set_status(VirtIODevice *vdev, uint8_t val)
> -{
> -    if (vdev->set_status) {
> -        vdev->set_status(vdev, val);
> -    }
> -    vdev->status = val;
> -}
> +void virtio_set_status(VirtIODevice *vdev, uint8_t val);
>  
>  VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
>                              void (*handle_output)(VirtIODevice *,
> @@ -217,6 +211,7 @@ target_phys_addr_t 
> virtio_queue_get_ring_size(VirtIODevice *vdev, int n);
>  uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n);
>  void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t 
> idx);
>  VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
> +int virtio_set_host_notifier(VirtIODevice *vdev, int n, bool assigned);

I think it's not the best API. Two functions e.g.
_assign
_deassign
would make it easier to avoid confusion.

>  EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
>  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
>  void virtio_irq(VirtQueue *vq);
> diff --git a/kvm-all.c b/kvm-all.c
> index 1cc696f..2f09e34 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -24,6 +24,7 @@
>  #include "qemu-barrier.h"
>  #include "sysemu.h"
>  #include "hw/hw.h"
> +#include "hw/event_notifier.h"
>  #include "gdbstub.h"
>  #include "kvm.h"
>  #include "bswap.h"
> @@ -72,6 +73,7 @@ struct KVMState
>      int irqchip_in_kernel;
>      int pit_in_kernel;
>      int xsave, xcrs;
> +    int many_iobus_devs;
>  };
>  
>  static KVMState *kvm_state;
> @@ -423,6 +425,36 @@ int kvm_check_extension(KVMState *s, unsigned int 
> extension)
>      return ret;
>  }
>  
> +static int kvm_check_many_iobus_devs(void)
> +{
> +    /* Older kernels have a 6 device limit on the KVM io bus.  In that case
> +     * creating many ioeventfds must be avoided.  This tests 

This tests -> this test

>+        checks for the limitation.
> +     */
> +    EventNotifier notifiers[7];

This is low level stuff, we really don't need to use notifiers here,
simple eventfd will be enough.

> +    int i, ret = 0;
> +    for (i = 0; i < ARRAY_SIZE(notifiers); i++) {
> +        ret = event_notifier_init(&notifiers[i], 0);
> +        if (ret < 0) {
> +            break;
> +        }
> +        ret = 
> kvm_set_ioeventfd_pio_word(event_notifier_get_fd(&notifiers[i]), 0, i, true);
> +        if (ret < 0) {
> +            event_notifier_cleanup(&notifiers[i]);
> +            break;
> +        }
> +    }
> +
> +    /* Decide whether many devices are supported or not */
> +    ret = i == ARRAY_SIZE(notifiers);
> +
> +    while (i-- > 0) {
> +        kvm_set_ioeventfd_pio_word(event_notifier_get_fd(&notifiers[i]), 0, 
> i, false);
> +        event_notifier_cleanup(&notifiers[i]);
> +    }
> +    return ret;
> +}
> +
>  static void kvm_set_phys_mem(target_phys_addr_t start_addr,
>                            ram_addr_t size,
>                            ram_addr_t phys_offset)
> @@ -699,6 +731,8 @@ int kvm_init(int smp_cpus)
>      kvm_state = s;
>      cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
>  
> +    s->many_iobus_devs = kvm_check_many_iobus_devs();
> +
>      return 0;
>  
>  err:
> @@ -1028,6 +1062,11 @@ int kvm_has_xcrs(void)
>      return kvm_state->xcrs;
>  }
>  
> +int kvm_has_many_iobus_devs(void)
> +{
> +    return kvm_state->many_iobus_devs;
> +}
> +
>  void kvm_setup_guest_memory(void *start, size_t size)
>  {
>      if (!kvm_has_sync_mmu()) {
> diff --git a/kvm-stub.c b/kvm-stub.c
> index d45f9fa..b0887fb 100644
> --- a/kvm-stub.c
> +++ b/kvm-stub.c
> @@ -99,6 +99,11 @@ int kvm_has_robust_singlestep(void)
>      return 0;
>  }
>  
> +int kvm_has_many_iobus_devs(void)
> +{
> +    return 0;
> +}
> +
>  void kvm_setup_guest_memory(void *start, size_t size)
>  {
>  }
> diff --git a/kvm.h b/kvm.h
> index 50b6c01..f405906 100644
> --- a/kvm.h
> +++ b/kvm.h
> @@ -42,6 +42,7 @@ int kvm_has_robust_singlestep(void);
>  int kvm_has_debugregs(void);
>  int kvm_has_xsave(void);
>  int kvm_has_xcrs(void);
> +int kvm_has_many_iobus_devs(void);

Looks like a pretty low level API.
Name it 'kvm_has_ioeventfd' or something like this?

>  
>  #ifdef NEED_CPU_H
>  int kvm_init_vcpu(CPUState *env);
> -- 
> 1.7.1



reply via email to

[Prev in Thread] Current Thread [Next in Thread]