Re: [PATCH v2 2/2] virtio-blk: add zoned storage emulation for zoned dev

qemu-block
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v2 2/2] virtio-blk: add zoned storage emulation for zoned dev

From:	Sam Li
Subject:	Re: [PATCH v2 2/2] virtio-blk: add zoned storage emulation for zoned devices
Date:	Sun, 9 Oct 2022 10:38:34 +0800
Sam Li <faithilikerun@gmail.com> 于2022年10月9日周日 09:54写道：
>
> Stefan Hajnoczi <stefanha@redhat.com> 于2022年10月6日周四 23:04写道：
> >
> > On Thu, Sep 29, 2022 at 05:48:21PM +0800, Sam Li wrote:
> > > This patch extends virtio-blk emulation to handle zoned device commands
> > > by calling the new block layer APIs to perform zoned device I/O on
> > > behalf of the guest. It supports Report Zone, four zone oparations (open,
> > > close, finish, reset), and Append Zone.
> > >
> > > The VIRTIO_BLK_F_ZONED feature bit will only be set if the host does
> > > support zoned block devices. Regular block devices(conventional zones)
> > > will not be set.
> > >
> > > The guest os having zoned device support can use blkzone(8) to test those
> > > commands. Furthermore, using zonefs to test zone append write is also
> > > supported.
> > >
> > > Signed-off-by: Sam Li <faithilikerun@gmail.com>
> > > ---
> > >  hw/block/virtio-blk.c | 393 ++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 393 insertions(+)
> > >
> > > diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> > > index e9ba752f6b..1c2535bfeb 100644
> > > --- a/hw/block/virtio-blk.c
> > > +++ b/hw/block/virtio-blk.c
> > > @@ -26,6 +26,9 @@
> > >  #include "hw/virtio/virtio-blk.h"
> > >  #include "dataplane/virtio-blk.h"
> > >  #include "scsi/constants.h"
> > > +#if defined(CONFIG_BLKZONED)
> > > +#include <linux/blkzoned.h>
> > > +#endif
> >
> > Why is this Linux-specific header file included? The virtio-blk
> > emulation code should only use QEMU block layer APIs, not Linux APIs.
> >
> > >  #ifdef __linux__
> > >  # include <scsi/sg.h>
> > >  #endif
> > > @@ -46,6 +49,8 @@ static const VirtIOFeature feature_sizes[] = {
> > >       .end = endof(struct virtio_blk_config, discard_sector_alignment)},
> > >      {.flags = 1ULL << VIRTIO_BLK_F_WRITE_ZEROES,
> > >       .end = endof(struct virtio_blk_config, write_zeroes_may_unmap)},
> > > +    {.flags = 1ULL << VIRTIO_BLK_F_ZONED,
> > > +     .end = endof(struct virtio_blk_config, zoned)},
> > >      {}
> > >  };
> > >
> > > @@ -614,6 +619,340 @@ err:
> > >      return err_status;
> > >  }
> > >
> > > +typedef struct ZoneCmdData {
> > > +    VirtIOBlockReq *req;
> > > +    union {
> > > +        struct {
> > > +            unsigned int nr_zones;
> > > +            BlockZoneDescriptor *zones;
> > > +        } zone_report_data;
> > > +        struct {
> > > +            int64_t append_sector;
> > > +        } zone_append_data;
> > > +    };
> > > +} ZoneCmdData;
> > > +
> > > +/*
> > > + * check zoned_request: error checking before issuing requests. If all 
> > > checks
> > > + * passed, return true.
> > > + * append: true if only zone append requests issued.
> > > + */
> > > +static bool check_zoned_request(VirtIOBlock *s, int64_t offset, int64_t 
> > > len,
> > > +                             bool append, uint8_t *status) {
> > > +    BlockDriverState *bs = blk_bs(s->blk);
> > > +    int index = offset / bs->bl.zone_size;
> >
> > This function doesn't check that offset+len is in the same zone as
> > offset. Maybe that's correct because some request types allow [offset,
> > offset+len) to cross zones?
>
> Yes, zone_mgmt requests should allow that.
>
> >
> > > +
> > > +    if (offset < 0 || offset + len > bs->bl.capacity) {
> >
> > Other cases that are not checked:
> > 1. len < 0
> > 2. offset >= bs->bl.capacity
> > 3. len > bs->bl.capacity - offset (catches integer overflow)
> >
> > It may be possible to combine these cases, but be careful about integer
> > overflow.
>
> Right. Combining above cases:
>
> if (offset < 0 || len < 0 || offset > cap - len)
>
> offset > cap - len can cover for  #2, #3 cases because any offset that
> is greater than cap-len is invalid must be also invalid when it's
> greater than cap.
>
> >
> > > +        *status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
> > > +        return false;
> > > +    }
> > > +
> > > +    if (!virtio_has_feature(s->host_features, VIRTIO_BLK_F_ZONED)) {
> > > +        *status = VIRTIO_BLK_S_UNSUPP;
> > > +        return false;
> > > +    }
> > > +
> > > +    if (append) {
> > > +        if ((offset % bs->bl.write_granularity) != 0) {
> > > +            *status = VIRTIO_BLK_S_ZONE_UNALIGNED_WP;
> > > +            return false;
> > > +        }
> > > +
> > > +        if (!BDRV_ZT_IS_SWR(bs->bl.wps->wp[index])) {
> > > +            *status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
> > > +            return false;
> > > +        }
> >
> > Where does the virtio-blk zone spec say that only SWR zones allow zone
> > append commands? Should it work for SWP zones too?
>
> The spec says not. But it should work for SWP zones too. I'll change
> this to check conventional zones instead.
>
> +If the zone specified by the VIRTIO_BLK_T_ZONE_APPEND request is not
> a SWR zone,
> +then the request SHALL be completed with VIRTIO_BLK_S_ZONE_INVALID_CMD
> +\field{status}.
>
> >
> > > +
> > > +        if (len / 512 > bs->bl.max_append_sectors) {
> > > +            if (bs->bl.max_append_sectors == 0) {
> > > +                *status = VIRTIO_BLK_S_UNSUPP;
> > > +            } else {
> > > +                *status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
> > > +            }
> > > +            return false;
> > > +        }
> > > +    }
> > > +    return true;
> > > +}
> > > +
> > > +static void virtio_blk_zone_report_complete(void *opaque, int ret)
> > > +{
> > > +    ZoneCmdData *data = opaque;
> > > +    VirtIOBlockReq *req = data->req;
> > > +    VirtIOBlock *s = req->dev;
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
> > > +    struct iovec *in_iov = req->elem.in_sg;
> > > +    unsigned in_num = req->elem.in_num;
> > > +    int64_t zrp_size, nz, n, j = 0;
> > > +    int8_t err_status = VIRTIO_BLK_S_OK;
> > > +
> > > +    if (ret) {
> > > +        err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
> > > +        goto out;
> > > +    }
> > > +
> > > +    nz = data->zone_report_data.nr_zones;
> > > +    struct virtio_blk_zone_report zrp_hdr = (struct 
> > > virtio_blk_zone_report) {
> > > +            .nr_zones = cpu_to_le64(nz),
> > > +    };
> > > +
> > > +    zrp_size = sizeof(struct virtio_blk_zone_report)
> > > +               + sizeof(struct virtio_blk_zone_descriptor) * nz;
> > > +    n = iov_from_buf(in_iov, in_num, 0, &zrp_hdr, sizeof(zrp_hdr));
> > > +    if (n != sizeof(zrp_hdr)) {
> > > +        virtio_error(vdev, "Driver provided intput buffer that is too 
> > > small!");
> > > +        err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
> > > +        goto out;
> > > +    }
> > > +
> > > +    for (size_t i = sizeof(zrp_hdr); i < zrp_size; i += sizeof(struct 
> > > virtio_blk_zone_descriptor), ++j) {
> > > +        struct virtio_blk_zone_descriptor desc =
> > > +                (struct virtio_blk_zone_descriptor) {
> > > +                        .z_start = 
> > > cpu_to_le64(data->zone_report_data.zones[j].start) >> BDRV_SECTOR_BITS,
> > > +                        .z_cap = 
> > > cpu_to_le64(data->zone_report_data.zones[j].cap) >> BDRV_SECTOR_BITS,
> > > +                        .z_wp = 
> > > cpu_to_le64(data->zone_report_data.zones[j].wp) >> BDRV_SECTOR_BITS,
> > > +                };
> > > +
> > > +        switch (data->zone_report_data.zones[j].type) {
> > > +        case BLK_ZT_CONV:
> > > +            desc.z_type = BLK_ZONE_TYPE_CONVENTIONAL;
> > > +            break;
> > > +        case BLK_ZT_SWR:
> > > +            desc.z_type = BLK_ZONE_TYPE_SEQWRITE_REQ;
> > > +            break;
> > > +        case BLK_ZT_SWP:
> > > +            desc.z_type = BLK_ZONE_TYPE_SEQWRITE_PREF;
> > > +            break;
> > > +        default:
> > > +            g_assert_not_reached();
> > > +        }
> > > +
> > > +        switch (data->zone_report_data.zones[j].cond) {
> > > +        case BLK_ZS_RDONLY:
> > > +            desc.z_state = BLK_ZONE_COND_READONLY;
> > > +            break;
> > > +        case BLK_ZS_OFFLINE:
> > > +            desc.z_state = BLK_ZONE_COND_OFFLINE;
> > > +            break;
> > > +        case BLK_ZS_EMPTY:
> > > +            desc.z_state = BLK_ZONE_COND_EMPTY;
> > > +            break;
> > > +        case BLK_ZS_CLOSED:
> > > +            desc.z_state = BLK_ZONE_COND_CLOSED;
> > > +            break;
> > > +        case BLK_ZS_FULL:
> > > +            desc.z_state = BLK_ZONE_COND_FULL;
> > > +            break;
> > > +        case BLK_ZS_EOPEN:
> > > +            desc.z_state = BLK_ZONE_COND_EXP_OPEN;
> > > +            break;
> > > +        case BLK_ZS_IOPEN:
> > > +            desc.z_state = BLK_ZONE_COND_IMP_OPEN;
> > > +            break;
> > > +        case BLK_ZS_NOT_WP:
> > > +            desc.z_state = BLK_ZONE_COND_NOT_WP;
> > > +            break;
> > > +        default:
> > > +            g_assert_not_reached();
> > > +            break;
> > > +        }
> > > +
> > > +        /* TODO: it takes O(n^2) time complexity. Optimizations required 
> > > here. */
> > > +        n = iov_from_buf(in_iov, in_num, i, &desc, sizeof(desc));
> > > +        if (n != sizeof(desc)) {
> > > +            virtio_error(vdev, "Driver provided input buffer "
> > > +                               "for descriptors that is too small!");
> > > +            err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
> > > +            goto out;
> > > +        }
> > > +    }
> > > +    goto out;
> > > +
> > > +out:
> > > +    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
> > > +    virtio_blk_req_complete(req, err_status);
> > > +    virtio_blk_free_request(req);
> > > +    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
> > > +    g_free(data->zone_report_data.zones);
> > > +    g_free(data);
> > > +}
> > > +
> > > +static int virtio_blk_handle_zone_report(VirtIOBlockReq *req) {
> > > +    VirtIOBlock *s = req->dev;
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> > > +    unsigned int nr_zones;
> > > +    ZoneCmdData *data;
> > > +    int64_t zone_size, offset;
> > > +    uint8_t err_status;
> > > +
> > > +    if (req->in_len < sizeof(struct virtio_blk_inhdr) +
> > > +            sizeof(struct virtio_blk_zone_report) +
> > > +            sizeof(struct virtio_blk_zone_descriptor)) {
> > > +        virtio_error(vdev, "in buffer too small for zone report");
> > > +        return -1;
> > > +    }
> > > +
> > > +    /* start byte offset of the zone report */
> > > +    offset = virtio_ldq_p(vdev, &req->out.sector) * 512;
> > > +    if (!check_zoned_request(s, offset, 0, false, &err_status)) {
> > > +        goto out;
> > > +    }
> > > +
> > > +    nr_zones = (req->in_len - sizeof(struct virtio_blk_inhdr) -
> > > +                sizeof(struct virtio_blk_zone_report)) /
> > > +               sizeof(struct virtio_blk_zone_descriptor);
> > > +
> > > +    zone_size = sizeof(BlockZoneDescriptor) * nr_zones;
> > > +    data = g_malloc(sizeof(ZoneCmdData));
> > > +    data->req = req;
> > > +    data->zone_report_data.nr_zones = nr_zones;
> > > +    data->zone_report_data.zones = g_malloc(zone_size),
> > > +
> > > +    blk_aio_zone_report(s->blk, offset, &data->zone_report_data.nr_zones,
> > > +                        data->zone_report_data.zones,
> > > +                        virtio_blk_zone_report_complete, data);
> > > +    return 0;
> > > +
> > > +out:
> > > +    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
> > > +    virtio_blk_req_complete(req, err_status);
> > > +    virtio_blk_free_request(req);
> > > +    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
> > > +    return err_status;
> > > +}
> > > +
> > > +static void virtio_blk_zone_mgmt_complete(void *opaque, int ret) {
> > > +    ZoneCmdData *data = opaque;
> > > +    VirtIOBlockReq *req = data->req;
> > > +    VirtIOBlock *s = req->dev;
> > > +    int8_t err_status = VIRTIO_BLK_S_OK;
> > > +
> > > +    if (ret) {
> > > +        err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
> > > +        goto out;
> > > +    }
> > > +    goto out;
> > > +
> > > +out:
> > > +    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
> > > +    virtio_blk_req_complete(req, err_status);
> > > +    virtio_blk_free_request(req);
> > > +    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
> > > +    g_free(data);
> > > +}
> > > +
> > > +static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp 
> > > op) {
> > > +    VirtIOBlock *s = req->dev;
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> > > +    BlockDriverState *bs = blk_bs(s->blk);
> > > +    int64_t offset = virtio_ldq_p(vdev, &req->out.sector) * 512;
> > > +    uint64_t len;
> > > +    uint32_t type;
> > > +    uint8_t err_status = VIRTIO_BLK_S_OK;
> > > +
> > > +    if (!check_zoned_request(s, offset, 0, false, &err_status)) {
> > > +        goto out;
> > > +    }
> > > +
> > > +    ZoneCmdData *data = g_malloc(sizeof(ZoneCmdData));
> > > +    data->req = req;
> > > +
> > > +    type = virtio_ldl_p(vdev, &req->out.type);
> > > +    if (type == VIRTIO_BLK_T_ZONE_RESET_ALL) {
> > > +        /* Entire drive capacity */
> > > +        offset = 0;
> > > +        len = bs->bl.capacity;
> > > +    } else {
> > > +        if (bs->bl.zone_size * bs->bl.nr_zones == bs->bl.capacity) {
> > > +            len = bs->bl.zone_size;
> > > +        } else {
> > > +            /* when the SWR drive has one last small zone, calculate its 
> > > len */
> > > +            len = bs->bl.capacity - bs->bl.zone_size * (bs->bl.nr_zones 
> > > - 1);
> > > +        }
> > > +        if (offset + len > bs->bl.capacity) {
> > > +            err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
> > > +            goto out;
> >
> > data is leaked here.
>
> Move the len part before check_zoned_request so data is not malloced
> until it passes checks. Avoid freeing memory here.
>
> >
> > > +        }
> > > +    }
> > > +
> > > +    blk_aio_zone_mgmt(s->blk, op, offset, len,
> > > +                      virtio_blk_zone_mgmt_complete, data);
> > > +
> > > +    return 0;
> > > +out:
> > > +    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
> > > +    virtio_blk_req_complete(req, err_status);
> > > +    virtio_blk_free_request(req);
> > > +    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
> > > +    return err_status;
> > > +}
> > > +
> > > +static void virtio_blk_zone_append_complete(void *opaque, int ret) {
> > > +    ZoneCmdData *data = opaque;
> > > +    VirtIOBlockReq *req = data->req;
> > > +    VirtIOBlock *s = req->dev;
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
> > > +    int64_t append_sector, n;
> > > +    struct iovec *out_iov = req->elem.out_sg;
> > > +    unsigned out_num = req->elem.out_num;
> > > +    uint8_t err_status = VIRTIO_BLK_S_OK;
> > > +
> > > +    if (ret) {
> > > +        err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
> > > +        goto out;
> > > +    }
> > > +
> > > +    virtio_stl_p(vdev, &append_sector, 
> > > data->zone_append_data.append_sector);
> > > +    n = iov_to_buf(out_iov, out_num, 0, &append_sector, 
> > > sizeof(append_sector));
> >
> > out_iov contains the driver->device buffers. The device is only allowed
> > to read from out_iov, not write to it.
> >
> > The device->driver buffers are in in_iov.
> >
> > According to the spec the zone append in hdr looks like this:
> >
> >   struct {
> >       u8 status;
> >       u8 reserved[7];
> >       le64 append_sector;
> >   } virtio_blk_zone_append_inhdr;
> >
> > In virtio_blk_handle_request() we used iov_discard_back_undoable() to
> > take the last byte (the status field for non-zone append requests) from
> > in_iov[]. This is incorrect for zone append requests because they have
> > the larger struct zone_append_inhdr instead of struct
> > virtio_blk_inhdr.
> >
> > I think it might be time to stop using req->in in virtio-blk.c and
> > instead use iov_from_buf() to write the status byte. For zone append
> > requests we also need to write reserved[] and append_sector:
> >
> >   iov_discard_undo(&req->inhdr_undo);
> >   inhdr_len = is_zone_append ?
> >                sizeof(struct virtio_blk_zone_append_inhdr) :
> >                sizeof(struct virtio_blk_inhdr);
> >   iov_from_buf(req->elem.in_sg, req->elem.in_num,
> >                req->in_len - inhdr_len,
> >                &req->in, inhdr_len);
> >
> > where req->in changes to:
> >
> >   union {
> >       struct virtio_blk_inhdr inhdr;
> >       struct virtio_blk_zone_append_inhdr zone_append_inhdr;
> >   } in;
> >
> > Most requests will just use in.inhdr but zone append will fill out the
> > full in.zone_append_inhdr struct.
>
> I did some changes according to that but it didn't work for
> zone_append requests and broke other zonefs-tests too. I wonder how
> zone_append requests fill the status bit. I used the original way as
> virtio_blk_inhdr here because attempts to remove it failed :)
>
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 1c2535bfeb..3ecabc7fb3 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -54,6 +54,12 @@ static const VirtIOFeature feature_sizes[] = {
>      {}
>  };
>
> +typedef struct virtio_blk_zone_append_inhdr {
> +    uint8_t status;
> +    uint8_t reserved[7];
> +    int64_t append_sector;
> +} virtio_blk_zone_append_inhdr;
> +
>  static void virtio_blk_set_config_size(VirtIOBlock *s, uint64_t 
> host_features)
>  {
>      s->config_size = MAX(VIRTIO_BLK_CFG_SIZE,
> @@ -82,11 +88,16 @@ static void virtio_blk_req_complete(VirtIOBlockReq
> *req, unsigned char status)
>  {
>      VirtIOBlock *s = req->dev;
>      VirtIODevice *vdev = VIRTIO_DEVICE(s);
> +    uint32_t type = virtio_ldl_p(vdev, &req->out.type);
>
>      trace_virtio_blk_req_complete(vdev, req, status);
>
> -    stb_p(&req->in->status, status);
>      iov_discard_undo(&req->inhdr_undo);
> +    size_t inhdr_len = (type == VIRTIO_BLK_T_ZONE_APPEND) ?
> +
> sizeof(virtio_blk_zone_append_inhdr):sizeof(struct virtio_blk_inhdr);
> +    iov_from_buf(req->elem.in_sg, req->elem.in_num,
> +                 req->in_len - inhdr_len, &req->in, inhdr_len);
> +    stb_p(&req->in->status, status);
> +
>      iov_discard_undo(&req->outhdr_undo);
>      virtqueue_push(req->vq, &req->elem, req->in_len);
>      if (s->dataplane_started && !s->dataplane_disabled) {
>
> >
> > > +    if (n != sizeof(append_sector)) {
> > > +        virtio_error(vdev, "Driver provided input buffer less than size 
> > > of "
> > > +                     "append_sector");
> > > +        err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
> > > +        goto out;
> > > +    }
> > > +    goto out;
> > > +
> > > +out:
> > > +    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
> > > +    virtio_blk_req_complete(req, err_status);
> > > +    virtio_blk_free_request(req);
> > > +    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
> > > +    g_free(data);
> > > +}
> > > +
> > > +static int virtio_blk_handle_zone_append(VirtIOBlockReq *req) {
> > > +    VirtIOBlock *s = req->dev;
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> > > +    uint64_t niov = req->elem.out_num;
> > > +    struct iovec *out_iov = req->elem.out_sg;
> > > +    uint8_t err_status = VIRTIO_BLK_S_OK;
> > > +
> > > +    int64_t offset = virtio_ldq_p(vdev, &req->out.sector) * 512;
> > > +    int64_t len = 0;
> > > +    for (int i = 1; i < niov; ++i) {
> > > +        len += out_iov[i].iov_len;
> >
> > Please pass in out_iov and out_num instead of using req->elem.out_sg and
> > req->elem.out_num. virtio_blk_handle_request() modifies the iovecs
> > pointed to by req->elem.out_sg using iov_discard_front_undoable() and it
> > is not safe to access req->elem.out_sg directly.
> >
> > Also, VIRTIO devices are not allowed to make assumptions about the iovec
> > layout. That means skipping the first iovec in the for loop violates the
> > spec. The driver could send struct virtio_blk_req as two or more iovecs
> > instead of putting it into just 1 iovec. This is why the device is not
> > allowed to assume out_iov[0] is struct virtio_blk_req.

When VIRTIO devices can't make such assumptions, zone_append in the
block layer should still make the assumption about struct
virtio_blk_req. Because the iovecs with headers may not be aligned to
block size in most cases and zone_append writes need to skip those
iovecs. So the problem becomes how many iovecs would it take to
contain struct virtio_blk_req. Can we just skip the first N iovecs
whose size is less than/not aligned to one block size and write the
following iovecs?

> >
> > The for loop can be replaced with:
> >
> >   len = iov_size(out_iov, out_num);
> >
> > and out_iov[1]/niov-1 can be replaced with just out_iov and out_num (if
> > you pass them in from virtio_blk_handle_request()).
>
> Thanks!
>
> >
> > > +    }
> > > +
> > > +    if (!check_zoned_request(s, offset, len, true, &err_status)) {
> > > +        goto out;
> > > +    }
> > > +
> > > +    ZoneCmdData *data = g_malloc(sizeof(ZoneCmdData));
> > > +    data->req = req;
> > > +    data->zone_append_data.append_sector = offset;
> > > +    qemu_iovec_init_external(&req->qiov, &out_iov[1], niov-1);
> > > +    blk_aio_zone_append(s->blk, &data->zone_append_data.append_sector, 
> > > &req->qiov, 0,
> > > +                        virtio_blk_zone_append_complete, data);
> > > +    return 0;
> > > +
> > > +out:
> > > +    aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
> > > +    virtio_blk_req_complete(req, err_status);
> > > +    virtio_blk_free_request(req);
> > > +    aio_context_release(blk_get_aio_context(s->conf.conf.blk));
> > > +    return err_status;
> > > +}
> > > +
> > >  static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer 
> > > *mrb)
> > >  {
> > >      uint32_t type;
> > > @@ -700,6 +1039,24 @@ static int virtio_blk_handle_request(VirtIOBlockReq 
> > > *req, MultiReqBuffer *mrb)
> > >      case VIRTIO_BLK_T_FLUSH:
> > >          virtio_blk_handle_flush(req, mrb);
> > >          break;
> > > +    case VIRTIO_BLK_T_ZONE_REPORT:
> > > +        virtio_blk_handle_zone_report(req);
> > > +        break;
> > > +    case VIRTIO_BLK_T_ZONE_OPEN:
> > > +        virtio_blk_handle_zone_mgmt(req, BLK_ZO_OPEN);
> > > +        break;
> > > +    case VIRTIO_BLK_T_ZONE_CLOSE:
> > > +        virtio_blk_handle_zone_mgmt(req, BLK_ZO_CLOSE);
> > > +        break;
> > > +    case VIRTIO_BLK_T_ZONE_FINISH:
> > > +        virtio_blk_handle_zone_mgmt(req, BLK_ZO_FINISH);
> > > +        break;
> > > +    case VIRTIO_BLK_T_ZONE_RESET:
> > > +        virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET);
> > > +        break;
> > > +    case VIRTIO_BLK_T_ZONE_RESET_ALL:
> > > +        virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET_ALL);
> > > +        break;
> > >      case VIRTIO_BLK_T_SCSI_CMD:
> > >          virtio_blk_handle_scsi(req);
> > >          break;
> > > @@ -718,6 +1075,9 @@ static int virtio_blk_handle_request(VirtIOBlockReq 
> > > *req, MultiReqBuffer *mrb)
> > >          virtio_blk_free_request(req);
> > >          break;
> > >      }
> > > +   case VIRTIO_BLK_T_ZONE_APPEND & ~VIRTIO_BLK_T_OUT:
> >
> > Indentation is off. QEMU uses 4-space indentation.
> >
> > > +       virtio_blk_handle_zone_append(req);
> > > +       break;
> > >      /*
> > >       * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined 
> > > with
> > >       * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch 
> > > statement,
> > > @@ -917,6 +1277,7 @@ static void virtio_blk_update_config(VirtIODevice 
> > > *vdev, uint8_t *config)
> > >  {
> > >      VirtIOBlock *s = VIRTIO_BLK(vdev);
> > >      BlockConf *conf = &s->conf.conf;
> > > +    BlockDriverState *bs = blk_bs(s->blk);
> > >      struct virtio_blk_config blkcfg;
> > >      uint64_t capacity;
> > >      int64_t length;
> > > @@ -976,6 +1337,30 @@ static void virtio_blk_update_config(VirtIODevice 
> > > *vdev, uint8_t *config)
> > >          blkcfg.write_zeroes_may_unmap = 1;
> > >          virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1);
> > >      }
> > > +    if (bs->bl.zoned != BLK_Z_NONE) {
> > > +        switch (bs->bl.zoned) {
> > > +        case BLK_Z_HM:
> > > +            blkcfg.zoned.model = VIRTIO_BLK_Z_HM;
> > > +            break;
> > > +        case BLK_Z_HA:
> > > +            blkcfg.zoned.model = VIRTIO_BLK_Z_HA;
> > > +            break;
> > > +        default:
> > > +            g_assert_not_reached();
> > > +        }
> > > +
> > > +        virtio_stl_p(vdev, &blkcfg.zoned.zone_sectors,
> > > +                     bs->bl.zone_size / 512);
> > > +        virtio_stl_p(vdev, &blkcfg.zoned.max_active_zones,
> > > +                     bs->bl.max_active_zones);
> > > +        virtio_stl_p(vdev, &blkcfg.zoned.max_open_zones,
> > > +                     bs->bl.max_open_zones);
> > > +        virtio_stl_p(vdev, &blkcfg.zoned.write_granularity, blk_size);
> > > +        virtio_stl_p(vdev, &blkcfg.zoned.max_append_sectors,
> > > +                     bs->bl.max_append_sectors);
> > > +    } else {
> > > +        blkcfg.zoned.model = VIRTIO_BLK_Z_NONE;
> > > +    }
> > >      memcpy(config, &blkcfg, s->config_size);
> > >  }
> > >
> > > @@ -1140,6 +1525,7 @@ static void virtio_blk_device_realize(DeviceState 
> > > *dev, Error **errp)
> > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > >      VirtIOBlock *s = VIRTIO_BLK(dev);
> > >      VirtIOBlkConf *conf = &s->conf;
> > > +    BlockDriverState *bs = blk_bs(conf->conf.blk);
> > >      Error *err = NULL;
> > >      unsigned i;
> > >
> > > @@ -1185,6 +1571,13 @@ static void virtio_blk_device_realize(DeviceState 
> > > *dev, Error **errp)
> > >          return;
> > >      }
> > >
> > > +    if (bs->bl.zoned != BLK_Z_NONE) {
> > > +        virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED);
> > > +        if (bs->bl.zoned == BLK_Z_HM) {
> > > +            virtio_clear_feature(&s->host_features, 
> > > VIRTIO_BLK_F_DISCARD);
> > > +        }
> > > +    }
> > > +
> > >      if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) &&
> > >          (!conf->max_discard_sectors ||
> > >           conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) {
> > > --
> > > 2.37.3
> > >
[Prev in Thread]
Current Thread
[Next in Thread]
Re: [PATCH v2 2/2] virtio-blk: add zoned storage emulation for zoned devices, Stefan Hajnoczi, 2022/10/06
- Re: [PATCH v2 2/2] virtio-blk: add zoned storage emulation for zoned devices, Sam Li, 2022/10/08
  - Re: [PATCH v2 2/2] virtio-blk: add zoned storage emulation for zoned devices, Sam Li <=
Prev by Date: Re: [PATCH 04/11] hw/ppc/mpc8544ds: Add platform bus
Next by Date: Re: [PATCH v2 06/13] hw/ppc/mpc8544ds: Add platform bus
Previous by thread: Re: [PATCH v2 2/2] virtio-blk: add zoned storage emulation for zoned devices
Next by thread: [PATCH 00/18] tests/qtest: Enable running qtest on Windows
Index(es):
- Date
- Thread