[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 40/47] mirror: perform COW if the cluster size is bi
From: |
Paolo Bonzini |
Subject: |
[Qemu-devel] [PATCH 40/47] mirror: perform COW if the cluster size is bigger than the granularity |
Date: |
Tue, 24 Jul 2012 13:04:18 +0200 |
When mirroring runs, the backing files for the target may not yet be
ready. However, this means that a copy-on-write operation on the target
would fill the missing sectors with zeros. Copy-on-write only happens
if the granularity of the dirty bitmap is smaller than the cluster size
(and only for clusters that are allocated in the source after the job
has started copying). So far, the granularity was fixed to 1MB; to avoid
the problem we detected the situation and required the backing files to
be available in that case only.
However, we want to lower the granularity for efficiency, so we need
a better solution. The solution is to always copy a whole cluster the
first time it is touched. The code keeps a bitmap of clusters that
have already been allocated by the mirroring job, and only does "manual"
copy-on-write if the chunk being copied is zero in the bitmap.
Signed-off-by: Paolo Bonzini <address@hidden>
---
block/mirror.c | 60 ++++++++++++++++++++++++++++++++++++++++--------
blockdev.c | 13 +----------
tests/qemu-iotests/039 | 25 ++++++++++++++++++--
trace-events | 1 +
4 files changed, 76 insertions(+), 23 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index c3340d1..6f8ae62 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -15,6 +15,7 @@
#include "blockjob.h"
#include "block_int.h"
#include "qemu/ratelimit.h"
+#include "bitmap.h"
enum {
/*
@@ -36,6 +37,8 @@ typedef struct MirrorBlockJob {
bool synced;
bool complete;
int64_t sector_num;
+ size_t buf_size;
+ unsigned long *cow_bitmap;
HBitmapIter hbi;
uint8_t *buf;
} MirrorBlockJob;
@@ -47,7 +50,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
BlockDriverState *target = s->target;
QEMUIOVector qiov;
int ret, nb_sectors;
- int64_t end;
+ int64_t end, sector_num, cluster_num;
struct iovec iov;
s->sector_num = hbitmap_iter_next(&s->hbi);
@@ -58,24 +61,43 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
assert(s->sector_num >= 0);
}
+ /* If we have no backing file yet in the destination, and the cluster size
+ * is very large, we need to do COW ourselves. The first time a cluster is
+ * copied, copy it entirely.
+ *
+ * Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are
+ * powers of two, the number of sectors to copy cannot exceed one cluster.
+ */
+ sector_num = s->sector_num;
+ nb_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
+ cluster_num = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
+ if (s->cow_bitmap && !test_bit(cluster_num, s->cow_bitmap)) {
+ trace_mirror_cow(s, sector_num);
+ bdrv_round_to_clusters(s->target,
+ sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK,
+ §or_num, &nb_sectors);
+ bitmap_set(s->cow_bitmap, sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK,
+ nb_sectors / BDRV_SECTORS_PER_DIRTY_CHUNK);
+ }
+
end = s->common.len >> BDRV_SECTOR_BITS;
- nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
- bdrv_reset_dirty(source, s->sector_num, nb_sectors);
+ nb_sectors = MIN(nb_sectors, end - sector_num);
+ bdrv_reset_dirty(source, sector_num, nb_sectors);
/* Copy the dirty cluster. */
iov.iov_base = s->buf;
iov.iov_len = nb_sectors * 512;
qemu_iovec_init_external(&qiov, &iov, 1);
- trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
- ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
+ trace_mirror_one_iteration(s, sector_num, nb_sectors);
+ ret = bdrv_co_readv(source, sector_num, nb_sectors, &qiov);
if (ret < 0) {
*p_action = block_job_error_action(&s->common, source,
s->on_source_error, true, -ret);
s->synced = false;
goto fail;
}
- ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+ ret = bdrv_co_writev(target, sector_num, nb_sectors, &qiov);
if (ret < 0) {
*p_action = block_job_error_action(&s->common, target,
s->on_target_error, false, -ret);
@@ -86,7 +108,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
fail:
/* Try again later. */
- bdrv_set_dirty(source, s->sector_num, nb_sectors);
+ bdrv_set_dirty(source, sector_num, nb_sectors);
return ret;
}
@@ -94,7 +116,9 @@ static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
BlockDriverState *bs = s->common.bs;
- int64_t sector_num, end;
+ int64_t sector_num, end, length;
+ BlockDriverInfo bdi;
+ char backing_filename[1024];
int ret = 0;
int n;
@@ -108,8 +132,23 @@ static void coroutine_fn mirror_run(void *opaque)
return;
}
+ /* If we have no backing file yet in the destination, we cannot let
+ * the destination do COW. Instead, we copy sectors around the
+ * dirty data if needed. We need a bitmap to do that.
+ */
+ bdrv_get_backing_filename(s->target, backing_filename,
+ sizeof(backing_filename));
+ if (backing_filename[0] && !s->target->backing_hd) {
+ bdrv_get_info(s->target, &bdi);
+ if (s->buf_size < bdi.cluster_size) {
+ s->buf_size = bdi.cluster_size;
+ length = (bdrv_getlength(bs) + BLOCK_SIZE - 1) / BLOCK_SIZE;
+ s->cow_bitmap = bitmap_new(length);
+ }
+ }
+
end = s->common.len >> BDRV_SECTOR_BITS;
- s->buf = qemu_blockalign(bs, BLOCK_SIZE);
+ s->buf = qemu_blockalign(bs, s->buf_size);
if (s->mode == MIRROR_SYNC_MODE_FULL || s->mode == MIRROR_SYNC_MODE_TOP) {
/* First part, loop on the sectors and initialize the dirty bitmap. */
@@ -218,6 +257,7 @@ static void coroutine_fn mirror_run(void *opaque)
immediate_exit:
g_free(s->buf);
+ g_free(s->cow_bitmap);
bdrv_set_dirty_tracking(bs, false);
bdrv_iostatus_disable(s->target);
if (s->complete && ret == 0) {
@@ -313,6 +353,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState
*target,
s->on_target_error = on_target_error;
s->target = target;
s->mode = mode;
+ s->buf_size = BLOCK_SIZE;
+
bdrv_set_dirty_tracking(bs, true);
bdrv_set_on_error(s->target, on_target_error, on_target_error);
bdrv_iostatus_enable(s->target);
diff --git a/blockdev.c b/blockdev.c
index eb528cd..e160610 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -836,7 +836,6 @@ void qmp_drive_mirror(const char *device, const char
*target,
bool has_on_target_error, BlockdevOnError
on_target_error,
Error **errp)
{
- BlockDriverInfo bdi;
BlockDriverState *bs;
BlockDriverState *source, *target_bs;
BlockDriver *proto_drv;
@@ -927,6 +926,7 @@ void qmp_drive_mirror(const char *device, const char
*target,
return;
}
+ /* Mirroring takes care of copy-on-write using data from the source. */
target_bs = bdrv_new("");
ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv);
@@ -936,17 +936,6 @@ void qmp_drive_mirror(const char *device, const char
*target,
return;
}
- /* We need a backing file if we will copy parts of a cluster. */
- if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 &&
- bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) {
- ret = bdrv_ensure_backing_file(target_bs);
- if (ret < 0) {
- bdrv_delete(target_bs);
- error_set(errp, QERR_OPEN_FILE_FAILED, target);
- return;
- }
- }
-
mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
block_job_cb, bs, &local_err);
if (local_err != NULL) {
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index 3e17881..17fa05f 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -195,8 +195,8 @@ class TestSingleDrive(ImageMirroringTestCase):
def test_large_cluster(self):
self.assert_no_active_mirrors()
- qemu_img('create', '-f', iotests.imgfmt, '-o',
'cluster_size=%d,backing_file=%s'
- % (TestSingleDrive.image_len, mid_img), target_img)
+ qemu_img('create', '-f', iotests.imgfmt, '-o',
'cluster_size=%d,size=%d'
+ % (TestSingleDrive.image_len,
TestSingleDrive.image_len), target_img)
result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
mode='existing', target=target_img)
self.assert_qmp(result, 'return', {})
@@ -280,6 +280,27 @@ class TestMirrorNoBacking(ImageMirroringTestCase):
self.assertTrue(self.compare_images(test_img, target_img),
'target image does not match source after mirroring')
+ def test_large_cluster(self):
+ self.assert_no_active_mirrors()
+
+ # qemu-img create fails if the image is not there
+ qemu_img('create', '-f', iotests.imgfmt, '-o', 'size=%d'
+ %(TestMirrorNoBacking.image_len), target_backing_img)
+ qemu_img('create', '-f', iotests.imgfmt, '-o',
'cluster_size=%d,backing_file=%s'
+ % (TestMirrorNoBacking.image_len, target_backing_img),
target_img)
+ os.remove(target_backing_img)
+
+ result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+ mode='existing', target=target_img)
+ self.assert_qmp(result, 'return', {})
+
+ self.complete_and_wait()
+ result = self.vm.qmp('query-block')
+ self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+ self.vm.shutdown()
+ self.assertTrue(self.compare_images(test_img, target_img),
+ 'target image does not match source after mirroring')
+
class TestReadErrors(ImageMirroringTestCase):
image_len = 2 * 1024 * 1024 # MB
diff --git a/trace-events b/trace-events
index 496824c..6b504d8 100644
--- a/trace-events
+++ b/trace-events
@@ -82,6 +82,7 @@ mirror_before_flush(void *s) "s %p"
mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count
%"PRId64" synced %d"
mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p
sector_num %"PRId64" nb_sectors %d"
+mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64
# blockdev.c
qmp_block_job_cancel(void *job) "job %p"
--
1.7.10.4
- Re: [Qemu-devel] [PATCH 31/47] qemu-iotests: add mirroring test case, (continued)
- [Qemu-devel] [PATCH 33/47] mirror: add support for on-source-error/on-target-error, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 32/47] block: forward bdrv_iostatus_reset to block job, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 34/47] qmp: add pull_event function, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 37/47] add hierarchical bitmap data type and test cases, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 39/47] block: make round_to_clusters public, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 40/47] mirror: perform COW if the cluster size is bigger than the granularity,
Paolo Bonzini <=
- [Qemu-devel] [PATCH 46/47] mirror: support more than one in-flight AIO operation, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 47/47] mirror: support arbitrarily-sized iterations, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 41/47] block: return count of dirty sectors, not chunks, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 36/47] host-utils: add ffsl and flsl, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 23/47] block: add target info to QMP query-blockjobs command, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 42/47] block: allow customizing the granularity of the dirty bitmap, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 17/47] qemu-iotests: add tests for streaming error handling, Paolo Bonzini, 2012/07/24
- [Qemu-devel] [PATCH 18/47] block: live snapshot documentation tweaks, Paolo Bonzini, 2012/07/24