[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v4 3/5] vhost-user block device backend server
From: |
Kevin Wolf |
Subject: |
Re: [PATCH v4 3/5] vhost-user block device backend server |
Date: |
Tue, 25 Feb 2020 17:09:58 +0100 |
User-agent: |
Mutt/1.12.1 (2019-06-15) |
Am 18.02.2020 um 06:07 hat Coiby Xu geschrieben:
> By making use of libvhost, multiple block device drives can be exported
> and each drive can serve multiple clients simultaneously.
> Since vhost-user-server needs a block drive to be created first, delay
> the creation of this object.
>
> Signed-off-by: Coiby Xu <address@hidden>
> ---
> Makefile.target | 1 +
> backends/Makefile.objs | 2 +
> backends/vhost-user-blk-server.c | 718 +++++++++++++++++++++++++++++++
> backends/vhost-user-blk-server.h | 21 +
> vl.c | 4 +
> 5 files changed, 746 insertions(+)
> create mode 100644 backends/vhost-user-blk-server.c
> create mode 100644 backends/vhost-user-blk-server.h
>
> diff --git a/Makefile.target b/Makefile.target
> index 6e61f607b1..8c6c01eb3a 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -159,6 +159,7 @@ obj-y += monitor/
> obj-y += qapi/
> obj-y += memory.o
> obj-y += memory_mapping.o
> +obj-$(CONFIG_LINUX) += ../contrib/libvhost-user/libvhost-user.o
> obj-y += migration/ram.o
> LIBS := $(libs_softmmu) $(LIBS)
>
> diff --git a/backends/Makefile.objs b/backends/Makefile.objs
> index 28a847cd57..4e7be731e0 100644
> --- a/backends/Makefile.objs
> +++ b/backends/Makefile.objs
> @@ -14,6 +14,8 @@ common-obj-y += cryptodev-vhost.o
> common-obj-$(CONFIG_VHOST_CRYPTO) += cryptodev-vhost-user.o
> endif
>
> +common-obj-$(CONFIG_LINUX) += vhost-user-blk-server.o
> +
> common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o
>
> common-obj-$(CONFIG_LINUX) += hostmem-memfd.o
> diff --git a/backends/vhost-user-blk-server.c
> b/backends/vhost-user-blk-server.c
> new file mode 100644
> index 0000000000..1bf7f7b544
> --- /dev/null
> +++ b/backends/vhost-user-blk-server.c
Please move this to block/export/vhost-user-blk.c. This will
automatically have it covered as a block layer component in MAINTAINERS,
and it will provide a place where other exports can be put, too.
> @@ -0,0 +1,718 @@
> +/*
> + * Sharing QEMU block devices via vhost-user protocal
> + *
> + * Author: Coiby Xu <address@hidden>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later. See the COPYING file in the top-level directory.
> + */
> +#include "qemu/osdep.h"
> +#include "block/block.h"
> +#include "vhost-user-blk-server.h"
> +#include "qapi/error.h"
> +#include "qom/object_interfaces.h"
> +#include "sysemu/block-backend.h"
> +
> +enum {
> + VHOST_USER_BLK_MAX_QUEUES = 1,
> +};
> +struct virtio_blk_inhdr {
> + unsigned char status;
> +};
> +
> +static QTAILQ_HEAD(, VuBlockDev) vu_block_devs =
> + QTAILQ_HEAD_INITIALIZER(vu_block_devs);
> +
> +
> +typedef struct VuBlockReq {
> + VuVirtqElement *elem;
> + int64_t sector_num;
> + size_t size;
> + struct virtio_blk_inhdr *in;
> + struct virtio_blk_outhdr out;
> + VuClient *client;
> + struct VuVirtq *vq;
> +} VuBlockReq;
> +
> +
> +static void vu_block_req_complete(VuBlockReq *req)
> +{
> + VuDev *vu_dev = &req->client->parent;
> +
> + /* IO size with 1 extra status byte */
> + vu_queue_push(vu_dev, req->vq, req->elem, req->size + 1);
> + vu_queue_notify(vu_dev, req->vq);
> +
> + if (req->elem) {
> + free(req->elem);
> + }
> +
> + g_free(req);
> +}
> +
> +static VuBlockDev *get_vu_block_device_by_client(VuClient *client)
> +{
> + return container_of(client->server->ptr_in_device, VuBlockDev,
> vu_server);
> +}
> +
> +static int coroutine_fn
> +vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
> + uint32_t iovcnt, uint32_t type)
> +{
> + struct virtio_blk_discard_write_zeroes desc;
> + ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
> + if (unlikely(size != sizeof(desc))) {
> + error_report("Invalid size %ld, expect %ld", size, sizeof(desc));
> + return -EINVAL;
> + }
> +
> + VuBlockDev *vdev_blk = get_vu_block_device_by_client(req->client);
> + uint64_t range[2] = { le64toh(desc.sector) << 9,
> + le32toh(desc.num_sectors) << 9 };
> + if (type == VIRTIO_BLK_T_DISCARD) {
> + if (blk_co_pdiscard(vdev_blk->backend, range[0], range[1]) == 0) {
> + return 0;
> + }
> + } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
> + if (blk_co_pwrite_zeroes(vdev_blk->backend,
> + range[0], range[1], 0) == 0) {
> + return 0;
> + }
> + }
> +
> + return -EINVAL;
> +}
> +
> +
> +static void coroutine_fn vu_block_flush(VuBlockReq *req)
> +{
> + VuBlockDev *vdev_blk = get_vu_block_device_by_client(req->client);
> + BlockBackend *backend = vdev_blk->backend;
> + blk_co_flush(backend);
> +}
> +
> +
> +static int coroutine_fn vu_block_virtio_process_req(VuClient *client,
> + VuVirtq *vq)
> +{
> + VuDev *vu_dev = &client->parent;
> + VuVirtqElement *elem;
> + uint32_t type;
> + VuBlockReq *req;
> +
> + VuBlockDev *vdev_blk = get_vu_block_device_by_client(client);
> + BlockBackend *backend = vdev_blk->backend;
> + elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) +
> + sizeof(VuBlockReq));
> + if (!elem) {
> + return -1;
> + }
> +
> + struct iovec *in_iov = elem->in_sg;
> + struct iovec *out_iov = elem->out_sg;
> + unsigned in_num = elem->in_num;
> + unsigned out_num = elem->out_num;
> + /* refer to hw/block/virtio_blk.c */
> + if (elem->out_num < 1 || elem->in_num < 1) {
> + error_report("virtio-blk request missing headers");
> + free(elem);
> + return -EINVAL;
> + }
> +
> + req = g_new0(VuBlockReq, 1);
> + req->client = client;
> + req->vq = vq;
> + req->elem = elem;
You can keep req on the stack, it will be freed before this function
returns.
> + if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
> + sizeof(req->out)) != sizeof(req->out))) {
> + error_report("virtio-blk request outhdr too short");
> + goto err;
> + }
> +
> + iov_discard_front(&out_iov, &out_num, sizeof(req->out));
> +
> + if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
> + error_report("virtio-blk request inhdr too short");
> + goto err;
> + }
> +
> + /* We always touch the last byte, so just see how big in_iov is. */
> + req->in = (void *)in_iov[in_num - 1].iov_base
> + + in_iov[in_num - 1].iov_len
> + - sizeof(struct virtio_blk_inhdr);
> + iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
> +
> +
> + type = le32toh(req->out.type);
> + switch (type & ~VIRTIO_BLK_T_BARRIER) {
> + case VIRTIO_BLK_T_IN:
> + case VIRTIO_BLK_T_OUT: {
> + ssize_t ret = 0;
> + bool is_write = type & VIRTIO_BLK_T_OUT;
> + req->sector_num = le64toh(req->out.sector);
> +
> + int64_t offset = req->sector_num * vdev_blk->blk_size;
> + QEMUIOVector *qiov = g_new0(QEMUIOVector, 1);
> + if (is_write) {
> + qemu_iovec_init_external(qiov, out_iov, out_num);
> + ret = blk_co_pwritev(backend, offset, qiov->size,
> + qiov, 0);
> + } else {
> + qemu_iovec_init_external(qiov, in_iov, in_num);
> + ret = blk_co_preadv(backend, offset, qiov->size,
> + qiov, 0);
> + }
> + aio_wait_kick();
What purpose does this aio_wait_kick() serve? Usually you do this if you
want to signal completion fo something to a different thread, but I
don't see that we changed any control state that could be visible to
other threads.
> + if (ret >= 0) {
> + req->in->status = VIRTIO_BLK_S_OK;
> + } else {
> + req->in->status = VIRTIO_BLK_S_IOERR;
> + }
> + vu_block_req_complete(req);
> + break;
> + }
> + case VIRTIO_BLK_T_FLUSH:
> + vu_block_flush(req);
> + req->in->status = VIRTIO_BLK_S_OK;
> + vu_block_req_complete(req);
> + break;
> + case VIRTIO_BLK_T_GET_ID: {
> + size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
> + VIRTIO_BLK_ID_BYTES);
> + snprintf(elem->in_sg[0].iov_base, size, "%s",
> "vhost_user_blk_server");
> + req->in->status = VIRTIO_BLK_S_OK;
> + req->size = elem->in_sg[0].iov_len;
> + vu_block_req_complete(req);
> + break;
> + }
> + case VIRTIO_BLK_T_DISCARD:
> + case VIRTIO_BLK_T_WRITE_ZEROES: {
> + int rc;
> + rc = vu_block_discard_write_zeroes(req, &elem->out_sg[1],
> + out_num, type);
> + if (rc == 0) {
> + req->in->status = VIRTIO_BLK_S_OK;
> + } else {
> + req->in->status = VIRTIO_BLK_S_IOERR;
> + }
> + vu_block_req_complete(req);
> + break;
> + }
> + default:
> + req->in->status = VIRTIO_BLK_S_UNSUPP;
> + vu_block_req_complete(req);
> + break;
> + }
You have vu_block_req_complete() as the last thing in every branch. You
could have it just once after the switch block.
> + return 0;
> +
> +err:
> + free(elem);
> + g_free(req);
> + return -EINVAL;
> +}
Okay, so vu_block_virtio_process_req() takes a single request from the
virtqueue and processes it. This makes sense as a building block, but it
doesn't allow parallelism yet. I expect to see the creation of multiple
coroutines below that can run in parallel.
> +
> +static void vu_block_process_vq(VuDev *vu_dev, int idx)
> +{
> + VuClient *client;
> + VuVirtq *vq;
> + int ret;
> +
> + client = container_of(vu_dev, VuClient, parent);
> + assert(client);
> +
> + vq = vu_get_queue(vu_dev, idx);
> + assert(vq);
> +
> + while (1) {
> + ret = vu_block_virtio_process_req(client, vq);
If you call vu_block_virtio_process_req(), then this one need to be a
coroutine_fn, too.
> + if (ret) {
> + break;
> + }
> + }
> +}
> +
> +static void vu_block_queue_set_started(VuDev *vu_dev, int idx, bool started)
> +{
> + VuVirtq *vq;
> +
> + assert(vu_dev);
> +
> + vq = vu_get_queue(vu_dev, idx);
> + vu_set_queue_handler(vu_dev, vq, started ? vu_block_process_vq : NULL);
> +}
If vu_block_process_vq() is used as a vq->handler, will it not be called
outside coroutine context? How can this work when
vu_block_virtio_process_req() required to be run in coroutine context?
Another problem is that vu_block_process_vq() runs a loop as long as
there are requests that aren't completed yet. During this time,
vu_kick_cb() will block. I don't think this is what was intended.
Instead, we should start the requests (without waiting for their
completion) and return.
> diff --git a/vl.c b/vl.c
> index 794f2e5733..0332ab70da 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -2538,6 +2538,10 @@ static bool object_create_initial(const char *type,
> QemuOpts *opts)
> }
> #endif
>
> + /* Reason: vhost-user-server property "name" */
"node-name" now.
> + if (g_str_equal(type, "vhost-user-blk-server")) {
> + return false;
> + }
> /*
> * Reason: filter-* property "netdev" etc.
> */
Kevin
- [PATCH v4 0/5] vhost-user block device backend implementation, Coiby Xu, 2020/02/18
- [PATCH v4 1/5] extend libvhost to support IOThread and coroutine, Coiby Xu, 2020/02/18
- [PATCH v4 2/5] generic vhost user server, Coiby Xu, 2020/02/18
- [PATCH v4 3/5] vhost-user block device backend server, Coiby Xu, 2020/02/18
- Re: [PATCH v4 3/5] vhost-user block device backend server,
Kevin Wolf <=
- [PATCH v4 4/5] a standone-alone tool to directly share disk image file via vhost-user protocol, Coiby Xu, 2020/02/18
- [PATCH v4 5/5] new qTest case to test the vhost-user-blk-server, Coiby Xu, 2020/02/18
- Re: [PATCH v4 0/5] vhost-user block device backend implementation, Stefan Hajnoczi, 2020/02/19
- Re: [PATCH v4 0/5] vhost-user block device backend implementation, Coiby Xu, 2020/02/26
- Re: [PATCH v4 0/5] vhost-user block device backend implementation, Stefan Hajnoczi, 2020/02/27
- Re: [PATCH v4 0/5] vhost-user block device backend implementation, Coiby Xu, 2020/02/27
- Re: [PATCH v4 0/5] vhost-user block device backend implementation, Kevin Wolf, 2020/02/27
- Re: [PATCH v4 0/5] vhost-user block device backend implementation, Coiby Xu, 2020/02/27
- Re: [PATCH v4 0/5] vhost-user block device backend implementation, Kevin Wolf, 2020/02/27
- Re: [PATCH v4 0/5] vhost-user block device backend implementation, Marc-André Lureau, 2020/02/27