qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v4 2/6] block: add .bdrv_co_write_zeroes() inter


From: Kevin Wolf
Subject: Re: [Qemu-devel] [PATCH v4 2/6] block: add .bdrv_co_write_zeroes() interface
Date: Tue, 24 Jan 2012 16:16:07 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:9.0) Gecko/20111222 Thunderbird/9.0

Am 18.01.2012 15:59, schrieb Stefan Hajnoczi:
> The ability to zero regions of an image file is a useful primitive for
> higher-level features such as image streaming or zero write detection.
> 
> Image formats may support an optimized metadata representation instead
> of writing zeroes into the image file.  This allows zero writes to be
> potentially faster than regular write operations and also preserve
> sparseness of the image file.
> 
> The .bdrv_co_write_zeroes() interface should be implemented by block
> drivers that wish to provide efficient zeroing.
> 
> Note that this operation is different from the discard operation, which
> may leave the contents of the region indeterminate.  That means
> discarded blocks are not guaranteed to contain zeroes and may contain
> junk data instead.
> 
> Signed-off-by: Stefan Hajnoczi <address@hidden>
> ---
>  block.c      |   53 +++++++++++++++++++++++++++++++++++++++++++++++------
>  block.h      |    7 +++++++
>  block_int.h  |    8 ++++++++
>  trace-events |    1 +
>  4 files changed, 63 insertions(+), 6 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 3621d11..c9fa5c1 100644
> --- a/block.c
> +++ b/block.c
> @@ -50,6 +50,7 @@
>  
>  typedef enum {
>      BDRV_REQ_COPY_ON_READ = 0x1,
> +    BDRV_REQ_ZERO_WRITE   = 0x2,
>  } BdrvRequestFlags;
>  
>  static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
> @@ -69,7 +70,8 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState 
> *bs,
>      int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
>      BdrvRequestFlags flags);
>  static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
> -    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
> +    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> +    BdrvRequestFlags flags);
>  static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
>                                                 int64_t sector_num,
>                                                 QEMUIOVector *qiov,
> @@ -1300,7 +1302,7 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
>                                       rwco->nb_sectors, rwco->qiov, 0);
>      } else {
>          rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
> -                                      rwco->nb_sectors, rwco->qiov);
> +                                      rwco->nb_sectors, rwco->qiov, 0);
>      }
>  }
>  
> @@ -1639,11 +1641,37 @@ int coroutine_fn 
> bdrv_co_copy_on_readv(BlockDriverState *bs,
>                              BDRV_REQ_COPY_ON_READ);
>  }
>  
> +static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
> +    int64_t sector_num, int nb_sectors)
> +{
> +    BlockDriver *drv = bs->drv;
> +    QEMUIOVector qiov;
> +    struct iovec iov;
> +    int ret;
> +
> +    /* First try the efficient write zeroes operation */
> +    if (drv->bdrv_co_write_zeroes) {
> +        return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
> +    }
> +
> +    /* Fall back to bounce buffer if write zeroes is unsupported */
> +    iov.iov_len  = nb_sectors * BDRV_SECTOR_SIZE;
> +    iov.iov_base = qemu_blockalign(bs, iov.iov_len);
> +    memset(iov.iov_base, 0, iov.iov_len);
> +    qemu_iovec_init_external(&qiov, &iov, 1);
> +
> +    ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
> +
> +    qemu_vfree(iov.iov_base);
> +    return ret;
> +}
> +
>  /*
>   * Handle a write request in coroutine context
>   */
>  static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
> -    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
> +    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> +    BdrvRequestFlags flags)
>  {
>      BlockDriver *drv = bs->drv;
>      BdrvTrackedRequest req;
> @@ -1670,7 +1698,11 @@ static int coroutine_fn 
> bdrv_co_do_writev(BlockDriverState *bs,
>  
>      tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
>  
> -    ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
> +    if (flags & BDRV_REQ_ZERO_WRITE) {
> +        ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
> +    } else {
> +        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
> +    }
>  
>      if (bs->dirty_bitmap) {
>          set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
> @@ -1690,7 +1722,16 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, 
> int64_t sector_num,
>  {
>      trace_bdrv_co_writev(bs, sector_num, nb_sectors);
>  
> -    return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
> +    return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
> +}
> +
> +int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
> +                                      int64_t sector_num, int nb_sectors)
> +{
> +    trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
> +
> +    return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
> +                             BDRV_REQ_ZERO_WRITE);
>  }
>  
>  /**
> @@ -3192,7 +3233,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
>              acb->req.nb_sectors, acb->req.qiov, 0);
>      } else {
>          acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
> -            acb->req.nb_sectors, acb->req.qiov);
> +            acb->req.nb_sectors, acb->req.qiov, 0);
>      }
>  
>      acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
> diff --git a/block.h b/block.h
> index cae289b..9f3aad3 100644
> --- a/block.h
> +++ b/block.h
> @@ -146,6 +146,13 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState 
> *bs,
>      int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
>  int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
>      int nb_sectors, QEMUIOVector *qiov);
> +/*
> + * Efficiently zero a region of the disk image.  Note that this is a regular
> + * I/O request like read or write and should have a reasonable size.  This
> + * function is not suitable for zeroing the entire image in a single request.
> + */

The reason for this is that in the fallback case you allocate memory for
the whole request, right? So what about just limiting the allocation in
bdrv_co_do_write_zeroes() to a reasonable size and putting a loop there
for large requests?

Kevin



reply via email to

[Prev in Thread] Current Thread [Next in Thread]