[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 02/15] block: add .bdrv_co_write_zeroes() interface
From: |
Kevin Wolf |
Subject: |
[Qemu-devel] [PATCH 02/15] block: add .bdrv_co_write_zeroes() interface |
Date: |
Fri, 10 Feb 2012 13:47:31 +0100 |
From: Stefan Hajnoczi <address@hidden>
The ability to zero regions of an image file is a useful primitive for
higher-level features such as image streaming or zero write detection.
Image formats may support an optimized metadata representation instead
of writing zeroes into the image file. This allows zero writes to be
potentially faster than regular write operations and also preserve
sparseness of the image file.
The .bdrv_co_write_zeroes() interface should be implemented by block
drivers that wish to provide efficient zeroing.
Note that this operation is different from the discard operation, which
may leave the contents of the region indeterminate. That means
discarded blocks are not guaranteed to contain zeroes and may contain
junk data instead.
Signed-off-by: Stefan Hajnoczi <address@hidden>
Signed-off-by: Kevin Wolf <address@hidden>
---
block.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++------
block.h | 8 ++++++++
block_int.h | 8 ++++++++
trace-events | 1 +
4 files changed, 64 insertions(+), 6 deletions(-)
diff --git a/block.c b/block.c
index 3621d11..c9fa5c1 100644
--- a/block.c
+++ b/block.c
@@ -50,6 +50,7 @@
typedef enum {
BDRV_REQ_COPY_ON_READ = 0x1,
+ BDRV_REQ_ZERO_WRITE = 0x2,
} BdrvRequestFlags;
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
@@ -69,7 +70,8 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
BdrvRequestFlags flags);
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@@ -1300,7 +1302,7 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
rwco->nb_sectors, rwco->qiov, 0);
} else {
rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov);
+ rwco->nb_sectors, rwco->qiov, 0);
}
}
@@ -1639,11 +1641,37 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState
*bs,
BDRV_REQ_COPY_ON_READ);
}
+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ BlockDriver *drv = bs->drv;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ int ret;
+
+ /* First try the efficient write zeroes operation */
+ if (drv->bdrv_co_write_zeroes) {
+ return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ }
+
+ /* Fall back to bounce buffer if write zeroes is unsupported */
+ iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+ memset(iov.iov_base, 0, iov.iov_len);
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
+
+ qemu_vfree(iov.iov_base);
+ return ret;
+}
+
/*
* Handle a write request in coroutine context
*/
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
@@ -1670,7 +1698,11 @@ static int coroutine_fn
bdrv_co_do_writev(BlockDriverState *bs,
tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ if (flags & BDRV_REQ_ZERO_WRITE) {
+ ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
+ } else {
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ }
if (bs->dirty_bitmap) {
set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
@@ -1690,7 +1722,16 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs,
int64_t sector_num,
{
trace_bdrv_co_writev(bs, sector_num, nb_sectors);
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
+ return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
+ BDRV_REQ_ZERO_WRITE);
}
/**
@@ -3192,7 +3233,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
acb->req.nb_sectors, acb->req.qiov, 0);
} else {
acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov);
+ acb->req.nb_sectors, acb->req.qiov, 0);
}
acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
diff --git a/block.h b/block.h
index cae289b..60ea730 100644
--- a/block.h
+++ b/block.h
@@ -146,6 +146,14 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState
*bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
+/*
+ * Efficiently zero a region of the disk image. Note that this is a regular
+ * I/O request like read or write and should have a reasonable size. This
+ * function is not suitable for zeroing the entire image in a single request
+ * because it may allocate memory for the entire region.
+ */
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors);
int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum);
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
diff --git a/block_int.h b/block_int.h
index 7be2988..7946cf6 100644
--- a/block_int.h
+++ b/block_int.h
@@ -131,6 +131,14 @@ struct BlockDriver {
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ /*
+ * Efficiently zero a region of the disk image. Typically an image format
+ * would use a compact metadata representation to implement this. This
+ * function pointer may be NULL and .bdrv_co_writev() will be called
+ * instead.
+ */
+ int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors);
int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
diff --git a/trace-events b/trace-events
index 5d05749..3372087 100644
--- a/trace-events
+++ b/trace-events
@@ -67,6 +67,7 @@ bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num
%"PRId64" nb_sectors %d"
bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p
sector_num %"PRId64" nb_sectors %d"
bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num
%"PRId64" nb_sectors %d"
+bdrv_co_write_zeroes(void *bs, int64_t sector_num, int nb_sector) "bs %p
sector_num %"PRId64" nb_sectors %d"
bdrv_co_io_em(void *bs, int64_t sector_num, int nb_sectors, int is_write, void
*acb) "bs %p sector_num %"PRId64" nb_sectors %d is_write %d acb %p"
bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t
cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64"
nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"
--
1.7.6.5
- [Qemu-devel] [PULL 00/15] Block patches, Kevin Wolf, 2012/02/10
- [Qemu-devel] [PATCH 01/15] cutils: extract buffer_is_zero() from qemu-img.c, Kevin Wolf, 2012/02/10
- [Qemu-devel] [PATCH 02/15] block: add .bdrv_co_write_zeroes() interface,
Kevin Wolf <=
- [Qemu-devel] [PATCH 03/15] block: perform zero-detection during copy-on-read, Kevin Wolf, 2012/02/10
- [Qemu-devel] [PATCH 04/15] qed: replace is_write with flags field, Kevin Wolf, 2012/02/10
- [Qemu-devel] [PATCH 05/15] qed: add .bdrv_co_write_zeroes() support, Kevin Wolf, 2012/02/10
- [Qemu-devel] [PATCH 06/15] qemu-io: add write -z option for bdrv_co_write_zeroes, Kevin Wolf, 2012/02/10
- [Qemu-devel] [PATCH 14/15] sheepdog: fix co_recv coroutine context, Kevin Wolf, 2012/02/10
- [Qemu-devel] [PATCH 15/15] AHCI: Masking of IRQs actually masks them, Kevin Wolf, 2012/02/10
- [Qemu-devel] [PATCH 09/15] vpc: Round up image size during fixed image creation, Kevin Wolf, 2012/02/10
- [Qemu-devel] [PATCH 11/15] qcow2: Keep unknown header extension when rewriting header, Kevin Wolf, 2012/02/10
- [Qemu-devel] [PATCH 12/15] rewrite QEMU_BUILD_BUG_ON, Kevin Wolf, 2012/02/10
- [Qemu-devel] [PATCH 10/15] qcow2: Update whole header at once, Kevin Wolf, 2012/02/10