qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 17/31] vdpa: adapt vhost_ops callbacks to svq


From: Eugenio Perez Martin
Subject: Re: [PATCH 17/31] vdpa: adapt vhost_ops callbacks to svq
Date: Mon, 31 Jan 2022 19:58:40 +0100

On Sun, Jan 30, 2022 at 5:03 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2022/1/22 上午4:27, Eugenio Pérez 写道:
> > First half of the buffers forwarding part, preparing vhost-vdpa
> > callbacks to SVQ to offer it. QEMU cannot enable it at this moment, so
> > this is effectively dead code at the moment, but it helps to reduce
> > patch size.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> >   hw/virtio/vhost-shadow-virtqueue.h |   2 +-
> >   hw/virtio/vhost-shadow-virtqueue.c |  21 ++++-
> >   hw/virtio/vhost-vdpa.c             | 133 ++++++++++++++++++++++++++---
> >   3 files changed, 143 insertions(+), 13 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> > b/hw/virtio/vhost-shadow-virtqueue.h
> > index 035207a469..39aef5ffdf 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > @@ -35,7 +35,7 @@ size_t vhost_svq_device_area_size(const 
> > VhostShadowVirtqueue *svq);
> >
> >   void vhost_svq_stop(VhostShadowVirtqueue *svq);
> >
> > -VhostShadowVirtqueue *vhost_svq_new(void);
> > +VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize);
> >
> >   void vhost_svq_free(VhostShadowVirtqueue *vq);
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index f129ec8395..7c168075d7 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -277,9 +277,17 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
> >   /**
> >    * Creates vhost shadow virtqueue, and instruct vhost device to use the 
> > shadow
> >    * methods and file descriptors.
> > + *
> > + * @qsize Shadow VirtQueue size
> > + *
> > + * Returns the new virtqueue or NULL.
> > + *
> > + * In case of error, reason is reported through error_report.
> >    */
> > -VhostShadowVirtqueue *vhost_svq_new(void)
> > +VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize)
> >   {
> > +    size_t desc_size = sizeof(vring_desc_t) * qsize;
> > +    size_t device_size, driver_size;
> >       g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 
> > 1);
> >       int r;
> >
> > @@ -300,6 +308,15 @@ VhostShadowVirtqueue *vhost_svq_new(void)
> >       /* Placeholder descriptor, it should be deleted at set_kick_fd */
> >       event_notifier_init_fd(&svq->svq_kick, INVALID_SVQ_KICK_FD);
> >
> > +    svq->vring.num = qsize;
>
>
> I wonder if this is the best. E.g some hardware can support up to 32K
> queue size. So this will probably end up with:
>
> 1) SVQ use 32K queue size
> 2) hardware queue uses 256
>

In that case SVQ vring queue size will be 32K and guest's vring can
negotiate any number with SVQ equal or less than 32K, including 256.
Is that what you mean?

If with hardware queues you mean guest's vring, not sure why it is
"probably 256". I'd say that in that case with the virtio-net kernel
driver the ring size will be the same as the device export, for
example, isn't it?

The implementation should support any combination of sizes, but the
ring size exposed to the guest is never bigger than hardware one.

> ? Or we SVQ can stick to 256 but this will this cause trouble if we want
> to add event index support?
>

I think we should not have any problem with event idx. If you mean
that the guest could mark more buffers available than SVQ vring's
size, that should not happen because there must be less entries in the
guest than SVQ.

But if I understood you correctly, a similar situation could happen if
a guest's contiguous buffer is scattered across many qemu's VA chunks.
Even if that would happen, the situation should be ok too: SVQ knows
the guest's avail idx and, if SVQ is full, it will continue forwarding
avail buffers when the device uses more buffers.

Does that make sense to you?

>
> > +    driver_size = vhost_svq_driver_area_size(svq);
> > +    device_size = vhost_svq_device_area_size(svq);
> > +    svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size);
> > +    svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
> > +    memset(svq->vring.desc, 0, driver_size);
> > +    svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
> > +    memset(svq->vring.used, 0, device_size);
> > +
> >       event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
> >       return g_steal_pointer(&svq);
> >
> > @@ -318,5 +335,7 @@ void vhost_svq_free(VhostShadowVirtqueue *vq)
> >       event_notifier_cleanup(&vq->hdev_kick);
> >       event_notifier_set_handler(&vq->hdev_call, NULL);
> >       event_notifier_cleanup(&vq->hdev_call);
> > +    qemu_vfree(vq->vring.desc);
> > +    qemu_vfree(vq->vring.used);
> >       g_free(vq);
> >   }
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 9d801cf907..53e14bafa0 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -641,20 +641,52 @@ static int vhost_vdpa_set_vring_addr(struct vhost_dev 
> > *dev,
> >       return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
> >   }
> >
> > -static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
> > -                                      struct vhost_vring_state *ring)
> > +static int vhost_vdpa_set_dev_vring_num(struct vhost_dev *dev,
> > +                                        struct vhost_vring_state *ring)
> >   {
> >       trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num);
> >       return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
> >   }
> >
> > -static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
> > -                                       struct vhost_vring_state *ring)
> > +static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
> > +                                    struct vhost_vring_state *ring)
> > +{
> > +    struct vhost_vdpa *v = dev->opaque;
> > +
> > +    if (v->shadow_vqs_enabled) {
> > +        /*
> > +         * Vring num was set at device start. SVQ num is handled by 
> > VirtQueue
> > +         * code
> > +         */
> > +        return 0;
> > +    }
> > +
> > +    return vhost_vdpa_set_dev_vring_num(dev, ring);
> > +}
> > +
> > +static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev,
> > +                                         struct vhost_vring_state *ring)
> >   {
> >       trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
> >       return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
> >   }
> >
> > +static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
> > +                                     struct vhost_vring_state *ring)
> > +{
> > +    struct vhost_vdpa *v = dev->opaque;
> > +
> > +    if (v->shadow_vqs_enabled) {
> > +        /*
> > +         * Vring base was set at device start. SVQ base is handled by 
> > VirtQueue
> > +         * code
> > +         */
> > +        return 0;
> > +    }
> > +
> > +    return vhost_vdpa_set_dev_vring_base(dev, ring);
> > +}
> > +
> >   static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
> >                                          struct vhost_vring_state *ring)
> >   {
> > @@ -784,8 +816,8 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, 
> > bool started)
> >       }
> >   }
> >
> > -static int vhost_vdpa_get_features(struct vhost_dev *dev,
> > -                                     uint64_t *features)
> > +static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
> > +                                       uint64_t *features)
> >   {
> >       int ret;
> >
> > @@ -794,15 +826,64 @@ static int vhost_vdpa_get_features(struct vhost_dev 
> > *dev,
> >       return ret;
> >   }
> >
> > +static int vhost_vdpa_get_features(struct vhost_dev *dev, uint64_t 
> > *features)
> > +{
> > +    struct vhost_vdpa *v = dev->opaque;
> > +    int ret = vhost_vdpa_get_dev_features(dev, features);
> > +
> > +    if (ret == 0 && v->shadow_vqs_enabled) {
> > +        /* Filter only features that SVQ can offer to guest */
> > +        vhost_svq_valid_guest_features(features);
> > +    }
>
>
> Sorry if I've asked before, I think it's sufficient to filter out the
> device features that we don't support during and fail the vhost
> initialization. Any reason we need do it again here?
>

On the contrary, if something needs to be asked that means that is not
clear enough :).

At initialization we validate that the device offers all the needed
features (ACCESS_PLATFORM, VERSION_1). We don't have the features
acknowledged by the guest at that point.

This is checking the written features by the guest. For example, we
accept _F_INDIRECT here, so the guest can write indirect descriptors
to SVQ, since qemu VirtQueue code is handling it for us. I've stayed
on the safe side and I've not included packed or event_idx, but I
might try to run with them. These would not have been accepted for the
device.

>
> > +
> > +    return ret;
> > +}
> > +
> >   static int vhost_vdpa_set_features(struct vhost_dev *dev,
> >                                      uint64_t features)
> >   {
> > +    struct vhost_vdpa *v = dev->opaque;
> >       int ret;
> >
> >       if (vhost_vdpa_one_time_request(dev)) {
> >           return 0;
> >       }
> >
> > +    if (v->shadow_vqs_enabled) {
> > +        uint64_t dev_features, svq_features, acked_features;
> > +        bool ok;
> > +
> > +        ret = vhost_vdpa_get_dev_features(dev, &dev_features);
> > +        if (ret != 0) {
> > +            error_report("Can't get vdpa device features, got (%d)", ret);
> > +            return ret;
> > +        }
> > +
> > +        svq_features = dev_features;
> > +        ok = vhost_svq_valid_device_features(&svq_features);
> > +        if (unlikely(!ok)) {
> > +            error_report("SVQ Invalid device feature flags, offer: 0x%"
> > +                         PRIx64", ok: 0x%"PRIx64, dev->features, 
> > svq_features);
> > +            return -1;
> > +        }
> > +
> > +        ok = vhost_svq_valid_guest_features(&features);
> > +        if (unlikely(!ok)) {
> > +            error_report(
> > +                "Invalid guest acked feature flag, acked: 0x%"
> > +                PRIx64", ok: 0x%"PRIx64, dev->acked_features, features);
> > +            return -1;
> > +        }
> > +
> > +        ok = vhost_svq_ack_guest_features(svq_features, features,
> > +                                          &acked_features);
> > +        if (unlikely(!ok)) {
> > +            return -1;
> > +        }
> > +
> > +        features = acked_features;
> > +    }
> > +
> >       trace_vhost_vdpa_set_features(dev, features);
> >       ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
> >       if (ret) {
> > @@ -822,13 +903,31 @@ static int vhost_vdpa_set_owner(struct vhost_dev *dev)
> >       return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
> >   }
> >
> > -static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
> > -                    struct vhost_vring_addr *addr, struct vhost_virtqueue 
> > *vq)
> > +static void vhost_vdpa_vq_get_guest_addr(struct vhost_vring_addr *addr,
> > +                                         struct vhost_virtqueue *vq)
> >   {
> > -    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> >       addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
> >       addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
> >       addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
> > +}
> > +
> > +static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
> > +                                  struct vhost_vring_addr *addr,
> > +                                  struct vhost_virtqueue *vq)
> > +{
> > +    struct vhost_vdpa *v = dev->opaque;
> > +
> > +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> > +
> > +    if (v->shadow_vqs_enabled) {
> > +        int idx = vhost_vdpa_get_vq_index(dev, addr->index);
> > +        VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, idx);
> > +
> > +        vhost_svq_get_vring_addr(svq, addr);
> > +    } else {
> > +        vhost_vdpa_vq_get_guest_addr(addr, vq);
> > +    }
> > +
> >       trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr,
> >                                    addr->avail_user_addr, 
> > addr->used_user_addr);
> >       return 0;
> > @@ -849,6 +948,12 @@ static void vhost_psvq_free(gpointer svq)
> >       vhost_svq_free(svq);
> >   }
> >
> > +static int vhost_vdpa_get_max_queue_size(struct vhost_dev *dev,
> > +                                         uint16_t *qsize)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_VDPA_GET_VRING_NUM, qsize);
> > +}
> > +
> >   static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa 
> > *v,
> >                                  Error **errp)
> >   {
> > @@ -857,6 +962,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
> > struct vhost_vdpa *v,
> >                                                              
> > vhost_psvq_free);
> >       uint64_t dev_features;
> >       uint64_t svq_features;
> > +    uint16_t qsize;
> >       int r;
> >       bool ok;
> >
> > @@ -864,7 +970,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
> > struct vhost_vdpa *v,
> >           goto out;
> >       }
> >
> > -    r = vhost_vdpa_get_features(hdev, &dev_features);
> > +    r = vhost_vdpa_get_dev_features(hdev, &dev_features);
> >       if (r != 0) {
> >           error_setg(errp, "Can't get vdpa device features, got (%d)", r);
> >           return r;
> > @@ -879,9 +985,14 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
> > struct vhost_vdpa *v,
> >           return -1;
> >       }
> >
> > +    r = vhost_vdpa_get_max_queue_size(hdev, &qsize);
> > +    if (unlikely(r)) {
> > +        qsize = 256;
> > +    }
>
>
> Should we fail instead of having a "default" value here?
>

Maybe it is better to fail here, yes. I guess there is no safe default value.

Thanks!

> Thanks
>
>
> > +
> >       shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_psvq_free);
> >       for (unsigned n = 0; n < hdev->nvqs; ++n) {
> > -        VhostShadowVirtqueue *svq = vhost_svq_new();
> > +        VhostShadowVirtqueue *svq = vhost_svq_new(qsize);
> >
> >           if (unlikely(!svq)) {
> >               error_setg(errp, "Cannot create svq %u", n);
>




reply via email to

[Prev in Thread] Current Thread [Next in Thread]