[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v2 2/3] qed: add zero write detection support
From: |
Stefan Hajnoczi |
Subject: |
[Qemu-devel] [PATCH v2 2/3] qed: add zero write detection support |
Date: |
Wed, 7 Dec 2011 12:10:58 +0000 |
The QED image format is able to efficiently represent clusters
containing zeroes with a magic offset value. This patch implements zero
write detection for allocating writes so that image streaming can copy
over zero clusters from a backing file without expanding the image file
unnecessarily.
This is based code by Anthony Liguori <address@hidden>.
Signed-off-by: Stefan Hajnoczi <address@hidden>
---
block/qed.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++------
1 files changed, 72 insertions(+), 8 deletions(-)
diff --git a/block/qed.c b/block/qed.c
index 8da3ebe..db4246a 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -941,9 +941,8 @@ static void qed_aio_write_l1_update(void *opaque, int ret)
/**
* Update L2 table with new cluster offsets and write them out
*/
-static void qed_aio_write_l2_update(void *opaque, int ret)
+static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
{
- QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
int index;
@@ -959,7 +958,7 @@ static void qed_aio_write_l2_update(void *opaque, int ret)
index = qed_l2_index(s, acb->cur_pos);
qed_update_l2_table(s, acb->request.l2_table->table, index,
acb->cur_nclusters,
- acb->cur_cluster);
+ offset);
if (need_alloc) {
/* Write out the whole new L2 table */
@@ -976,6 +975,51 @@ err:
qed_aio_complete(acb, ret);
}
+static void qed_aio_write_l2_update_cb(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
+}
+
+/**
+ * Determine if we have a zero write to a block of clusters
+ *
+ * We validate that the write is aligned to a cluster boundary, and that it's
+ * a multiple of cluster size with all zeros.
+ */
+static bool qed_is_zero_write(QEDAIOCB *acb)
+{
+ BDRVQEDState *s = acb_to_s(acb);
+ int i;
+
+ if (!qed_offset_is_cluster_aligned(s, acb->cur_pos)) {
+ return false;
+ }
+
+ if (!qed_offset_is_cluster_aligned(s, acb->cur_qiov.size)) {
+ return false;
+ }
+
+ for (i = 0; i < acb->cur_qiov.niov; i++) {
+ struct iovec *iov = &acb->cur_qiov.iov[i];
+ uint64_t *v;
+ int j;
+
+ if ((iov->iov_len & 0x07)) {
+ return false;
+ }
+
+ v = iov->iov_base;
+ for (j = 0; j < iov->iov_len; j += sizeof(v[0])) {
+ if (v[j >> 3]) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
/**
* Flush new data clusters before updating the L2 table
*
@@ -990,7 +1034,7 @@ static void qed_aio_write_flush_before_l2_update(void
*opaque, int ret)
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
- if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update, opaque)) {
+ if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
qed_aio_complete(acb, -EIO);
}
}
@@ -1019,7 +1063,7 @@ static void qed_aio_write_main(void *opaque, int ret)
if (s->bs->backing_hd) {
next_fn = qed_aio_write_flush_before_l2_update;
} else {
- next_fn = qed_aio_write_l2_update;
+ next_fn = qed_aio_write_l2_update_cb;
}
}
@@ -1081,6 +1125,18 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
return !(s->header.features & QED_F_NEED_CHECK);
}
+static void qed_aio_write_zero_cluster(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ qed_aio_write_l2_update(acb, 0, 1);
+}
+
/**
* Write new data cluster
*
@@ -1092,6 +1148,7 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
{
BDRVQEDState *s = acb_to_s(acb);
+ BlockDriverCompletionFunc *cb;
/* Cancel timer when the first allocating request comes in */
if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
@@ -1109,14 +1166,21 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t
len)
acb->cur_nclusters = qed_bytes_to_clusters(s,
qed_offset_into_cluster(s, acb->cur_pos) + len);
- acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+ /* Zero write detection */
+ if (s->bs->zero_detection && qed_is_zero_write(acb)) {
+ cb = qed_aio_write_zero_cluster;
+ } else {
+ cb = qed_aio_write_prefill;
+ acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
+ }
+
if (qed_should_set_need_check(s)) {
s->header.features |= QED_F_NEED_CHECK;
- qed_write_header(s, qed_aio_write_prefill, acb);
+ qed_write_header(s, cb, acb);
} else {
- qed_aio_write_prefill(acb, 0);
+ cb(acb, 0);
}
}
--
1.7.7.3