[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 11/26] FVD: add impl of interface bdrv_aio_writev()
From: |
Chunqiang Tang |
Subject: |
[Qemu-devel] [PATCH 11/26] FVD: add impl of interface bdrv_aio_writev() |
Date: |
Fri, 25 Feb 2011 17:37:51 -0500 |
This patch is part of the Fast Virtual Disk (FVD) proposal.
See http://wiki.qemu.org/Features/FVD.
This patch adds FVD's implementation of the bdrv_aio_writev() interface. It
supports copy-on-write in FVD.
Signed-off-by: Chunqiang Tang <address@hidden>
---
block/fvd-bitmap.c | 150 ++++++++++++++++
block/fvd-journal.c | 4 +
block/fvd-store.c | 20 +++
block/fvd-write.c | 468 ++++++++++++++++++++++++++++++++++++++++++++++++++-
block/fvd.c | 4 +-
block/fvd.h | 1 +
6 files changed, 645 insertions(+), 2 deletions(-)
create mode 100644 block/fvd-bitmap.c
create mode 100644 block/fvd-store.c
diff --git a/block/fvd-bitmap.c b/block/fvd-bitmap.c
new file mode 100644
index 0000000..7e96201
--- /dev/null
+++ b/block/fvd-bitmap.c
@@ -0,0 +1,150 @@
+/*
+ * QEMU Fast Virtual Disk Format Utility Functions for Bitmap
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Chunqiang Tang <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+static inline bool stale_bitmap_show_sector_in_base_img(int64_t sector_num,
+ const BDRVFvdState * s)
+{
+ if (sector_num >= s->base_img_sectors) {
+ return false;
+ }
+
+ int64_t block_num = sector_num / s->block_size;
+ int64_t bitmap_byte_offset = block_num / 8;
+ uint8_t bitmap_bit_offset = block_num % 8;
+ uint8_t b = s->stale_bitmap[bitmap_byte_offset];
+ return 0 == (int)((b >> bitmap_bit_offset) & 0x01);
+}
+
+static inline bool fresh_bitmap_show_sector_in_base_img(int64_t sector_num,
+ const BDRVFvdState * s)
+{
+ if (sector_num >= s->base_img_sectors) {
+ return false;
+ }
+
+ int64_t block_num = sector_num / s->block_size;
+ int64_t bitmap_byte_offset = block_num / 8;
+ uint8_t bitmap_bit_offset = block_num % 8;
+ uint8_t b = s->fresh_bitmap[bitmap_byte_offset];
+ return 0 == (int)((b >> bitmap_bit_offset) & 0x01);
+}
+
+static inline void update_fresh_bitmap(int64_t sector_num, int nb_sectors,
+ const BDRVFvdState * s)
+{
+ if (sector_num >= s->base_img_sectors) {
+ return;
+ }
+
+ int64_t end = sector_num + nb_sectors;
+ if (end > s->base_img_sectors) {
+ end = s->base_img_sectors;
+ }
+
+ int64_t block_num = sector_num / s->block_size;
+ int64_t block_end = (end - 1) / s->block_size;
+
+ for (; block_num <= block_end; block_num++) {
+ int64_t bitmap_byte_offset = block_num / 8;
+ uint8_t bitmap_bit_offset = block_num % 8;
+ uint8_t mask = (uint8_t) (0x01 << bitmap_bit_offset);
+ uint8_t b = s->fresh_bitmap[bitmap_byte_offset];
+ if (!(b & mask)) {
+ b |= mask;
+ s->fresh_bitmap[bitmap_byte_offset] = b;
+ }
+ }
+}
+
+static inline bool bitmap_show_sector_in_base_img(int64_t sector_num,
+ const BDRVFvdState * s,
+ int bitmap_offset,
+ uint8_t * bitmap)
+{
+ if (sector_num >= s->base_img_sectors) {
+ return false;
+ }
+
+ int64_t block_num = sector_num / s->block_size;
+ int64_t bitmap_byte_offset = block_num / 8 - bitmap_offset;
+ uint8_t bitmap_bit_offset = block_num % 8;
+ uint8_t b = bitmap[bitmap_byte_offset];
+ return 0 == (int)((b >> bitmap_bit_offset) & 0x01);
+}
+
+static inline bool stale_bitmap_need_update(FvdAIOCB * acb)
+{
+ BlockDriverState *bs = acb->common.bs;
+ BDRVFvdState *s = bs->opaque;
+ int64_t end = acb->sector_num + acb->nb_sectors;
+
+ if (end > s->base_img_sectors) {
+ end = s->base_img_sectors;
+ }
+ int64_t block_end = (end - 1) / s->block_size;
+ int64_t block_num = acb->sector_num / s->block_size;
+
+ for (; block_num <= block_end; block_num++) {
+ int64_t bitmap_byte_offset = block_num / 8;
+ uint8_t bitmap_bit_offset = block_num % 8;
+ uint8_t mask = (uint8_t) (0x01 << bitmap_bit_offset);
+ uint8_t b = s->stale_bitmap[bitmap_byte_offset];
+ if (!(b & mask)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Return true if stable_bitmap needs update. */
+static bool update_fresh_bitmap_and_check_stale_bitmap(FvdAIOCB * acb)
+{
+ BlockDriverState *bs = acb->common.bs;
+ BDRVFvdState *s = bs->opaque;
+
+ if (acb->sector_num >= s->base_img_sectors) {
+ return false;
+ }
+
+ bool need_update = false;
+ int64_t end = acb->sector_num + acb->nb_sectors;
+
+ if (end > s->base_img_sectors) {
+ end = s->base_img_sectors;
+ }
+
+ int64_t block_end = (end - 1) / s->block_size;
+ int64_t block_num = acb->sector_num / s->block_size;
+
+ for (; block_num <= block_end; block_num++) {
+ int64_t bitmap_byte_offset = block_num / 8;
+ uint8_t bitmap_bit_offset = block_num % 8;
+ uint8_t mask = (uint8_t) (0x01 << bitmap_bit_offset);
+ uint8_t b = s->stale_bitmap[bitmap_byte_offset];
+ if (b & mask) {
+ /* If the bit in stale_bitmap is set, the corresponding bit in
+ * fresh_bitmap must be set already. */
+ continue;
+ }
+
+ need_update = true;
+ b = s->fresh_bitmap[bitmap_byte_offset];
+ if (!(b & mask)) {
+ b |= mask;
+ s->fresh_bitmap[bitmap_byte_offset] = b;
+ }
+ }
+
+ return need_update;
+}
diff --git a/block/fvd-journal.c b/block/fvd-journal.c
index 5ba34bd..2edfc70 100644
--- a/block/fvd-journal.c
+++ b/block/fvd-journal.c
@@ -28,6 +28,10 @@ static int init_journal(int read_only, BlockDriverState * bs,
return -ENOTSUP;
}
+static void write_metadata_to_journal(struct FvdAIOCB *acb, bool update_bitmap)
+{
+}
+
void fvd_emulate_host_crash(bool cond)
{
emulate_host_crash = cond;
diff --git a/block/fvd-store.c b/block/fvd-store.c
new file mode 100644
index 0000000..85e45d4
--- /dev/null
+++ b/block/fvd-store.c
@@ -0,0 +1,20 @@
+/*
+ * QEMU Fast Virtual Disk Format Store Data in Compact Image
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Chunqiang Tang <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+static inline BlockDriverAIOCB *store_data(int soft_write,
+ FvdAIOCB * parent_acb, BlockDriverState * bs,
+ int64_t sector_num, QEMUIOVector * orig_qiov, int nb_sectors,
+ BlockDriverCompletionFunc * cb, void *opaque)
+{
+ return NULL;
+}
diff --git a/block/fvd-write.c b/block/fvd-write.c
index a736a37..f0580d4 100644
--- a/block/fvd-write.c
+++ b/block/fvd-write.c
@@ -11,11 +11,477 @@
*
*/
+static void write_metadata_to_journal(struct FvdAIOCB *acb, bool
update_bitmap);
+static int do_aio_write(struct FvdAIOCB *acb);
+static void restart_dependent_writes(struct FvdAIOCB *acb);
+static void free_write_resource(struct FvdAIOCB *acb);
+static inline BlockDriverAIOCB *store_data(int soft_write,
+ FvdAIOCB * parent_acb, BlockDriverState * bs,
+ int64_t sector_num, QEMUIOVector * orig_qiov, int nb_sectors,
+ BlockDriverCompletionFunc * cb, void *opaque);
+
+static inline void init_data_region(BDRVFvdState * s)
+{
+ bdrv_truncate(s->fvd_data, s->data_offset * 512 + s->virtual_disk_size);
+ s->data_region_prepared = true;
+}
+
static BlockDriverAIOCB *fvd_aio_writev(BlockDriverState * bs,
int64_t sector_num,
QEMUIOVector * qiov, int nb_sectors,
BlockDriverCompletionFunc * cb,
void *opaque)
{
- return NULL;
+ BDRVFvdState *s = bs->opaque;
+ FvdAIOCB *acb;
+
+ TRACE_REQUEST(true, sector_num, nb_sectors);
+
+ if (s->metadata_err_prohibit_write) {
+ return NULL;
+ }
+
+ if (!s->data_region_prepared) {
+ init_data_region(s);
+ }
+
+ if (s->prefetch_state == PREFETCH_STATE_FINISHED
+ || sector_num >= s->base_img_sectors) {
+ /* This is an efficient case. See Section 3.3.5 of the FVD-cow paper.
+ * This also covers the case of no base image. */
+ return store_data(false, NULL, bs, sector_num, qiov,
+ nb_sectors, cb, opaque);
+ }
+
+ /* Check if all requested sectors are in the FVD data file. */
+ int64_t sec = ROUND_DOWN(sector_num, s->block_size);
+ int64_t sec_in_last_block = ROUND_DOWN(sector_num + nb_sectors - 1,
+ s->block_size);
+ do {
+ if (stale_bitmap_show_sector_in_base_img(sec, s)) {
+ goto slow_path;
+ }
+ sec += s->block_size;
+ } while (sec <= sec_in_last_block);
+
+ /* This is the fast path, as all requested data are in the FVD data file
+ * and no need to update the bitmap. */
+ return store_data(false, NULL, bs, sector_num, qiov,
+ nb_sectors, cb, opaque);
+
+slow_path:
+ acb = my_qemu_aio_get(&fvd_aio_pool, bs, cb, opaque);
+ if (!acb) {
+ return NULL;
+ }
+
+ acb->type = OP_WRITE;
+ acb->cancel_in_progress = false;
+ acb->sector_num = sector_num;
+ acb->nb_sectors = nb_sectors;
+ acb->write.ret = 0;
+ acb->write.update_table = false;
+ acb->write.qiov = qiov;
+ acb->write.hd_acb = NULL;
+ acb->write.cow_buf = NULL;
+ acb->copy_lock.next.le_prev = NULL;
+ acb->write.next_write_lock.le_prev = NULL;
+ acb->write.next_dependent_write.le_prev = NULL;
+ acb->jcb.iov.iov_base = NULL;
+ acb->jcb.hd_acb = NULL;
+ acb->jcb.ujnl_next_wait4_recycle.le_prev = NULL;
+ QLIST_INIT(&acb->copy_lock.dependent_writes);
+
+ QDEBUG("WRITE: acb%llu-%p start sector_num=%" PRId64 " nb_sectors=%d\n",
+ acb->uuid, acb, acb->sector_num, acb->nb_sectors);
+
+ if (do_aio_write(acb) < 0) {
+ my_qemu_aio_release(acb);
+ return NULL;
+ }
+#ifdef FVD_DEBUG
+ pending_local_writes++;
+#endif
+ return &acb->common;
+}
+
+static void free_write_resource(FvdAIOCB * acb)
+{
+ if (acb->write.next_write_lock.le_prev) {
+ QLIST_REMOVE(acb, write.next_write_lock);
+ }
+ if (acb->copy_lock.next.le_prev) {
+ QLIST_REMOVE(acb, copy_lock.next);
+ restart_dependent_writes(acb);
+ }
+ if (acb->write.cow_buf) {
+ my_qemu_vfree(acb->write.cow_buf);
+ }
+ if (acb->jcb.iov.iov_base != NULL) {
+ my_qemu_vfree(acb->jcb.iov.iov_base);
+ }
+
+ my_qemu_aio_release(acb);
+
+#ifdef FVD_DEBUG
+ pending_local_writes--;
+#endif
+}
+
+static inline void finish_write(FvdAIOCB * acb, int ret)
+{
+ QDEBUG("WRITE: acb%llu-%p completely_finished ret=%d\n", acb->uuid, acb,
+ ret);
+ acb->common.cb(acb->common.opaque, ret);
+ free_write_resource(acb);
+}
+
+static void write_data_cb(void *opaque, int ret)
+{
+ FvdAIOCB *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+ BDRVFvdState *s = bs->opaque;
+
+ if (acb->cancel_in_progress) {
+ return;
+ }
+
+ acb->write.ret = ret;
+ acb->write.hd_acb = NULL;
+
+ if (ret != 0) {
+ QDEBUG("WRITE: acb%llu-%p write_data_cb error ret=%d\n",
+ acb->uuid, acb, ret);
+ finish_write(acb, ret);
+ return;
+ }
+
+ QDEBUG("WRITE: acb%llu-%p write_data_cb\n", acb->uuid, acb);
+
+ /* Figure out whether to update metadata or not. */
+ if (s->fresh_bitmap == s->stale_bitmap) {
+ /* Neither copy_on_read nor prefetching is enabled. Cannot update
+ * fresh_bitmap until the on-disk metadata is updated. */
+ if (stale_bitmap_need_update(acb)) {
+ write_metadata_to_journal(acb, true);
+ } else if (acb->write.update_table) {
+ write_metadata_to_journal(acb, false);
+ } else {
+ finish_write(acb, ret); /* No need to update metadata. */
+ }
+
+ return;
+ }
+
+ /* stale_bitmap and fresh_bitmap are different. Update fresh_bitmap now
+ * and stale_bitmap will be updated after on-disk metadata are updated. */
+ bool bitmap_need_update = update_fresh_bitmap_and_check_stale_bitmap(acb);
+
+ /* Release lock on data now since fresh_bitmap has been updated. */
+ QLIST_REMOVE(acb, write.next_write_lock);
+ acb->write.next_write_lock.le_prev = NULL;
+ if (acb->copy_lock.next.le_prev) {
+ QLIST_REMOVE(acb, copy_lock.next);
+ restart_dependent_writes(acb);
+ }
+
+ if (bitmap_need_update) {
+ write_metadata_to_journal(acb, true);
+ } else if (acb->write.update_table) {
+ write_metadata_to_journal(acb, false);
+ } else {
+ finish_write(acb, ret);
+ }
+}
+
+static void read_backing_for_copy_on_write_cb(void *opaque, int ret)
+{
+ FvdAIOCB *acb = (FvdAIOCB *) opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ if (acb->cancel_in_progress) {
+ return;
+ }
+
+ if (ret != 0) {
+ QDEBUG("WRITE: acb%llu-%p read_backing with error "
+ "ret=%d\n", acb->uuid, acb, ret);
+ finish_write(acb, ret);
+ } else {
+ QDEBUG("WRITE: acb%llu-%p "
+ "finish_read_from_backing_and_start_write_data\n",
+ acb->uuid, acb);
+ acb->write.hd_acb = store_data(false, acb, bs,
+ acb->write.cow_start_sector,
+ acb->write.cow_qiov,
+ acb->write.cow_qiov->size / 512,
+ write_data_cb, acb);
+ if (!acb->write.hd_acb) {
+ finish_write(acb, -EIO);
+ }
+ }
+}
+
+static int do_aio_write(FvdAIOCB * acb)
+{
+ BlockDriverState *bs = acb->common.bs;
+ BDRVFvdState *s = bs->opaque;
+
+ /* Calculate the data region need be locked. */
+ const int64_t sector_end = acb->sector_num + acb->nb_sectors;
+ const int64_t block_begin = ROUND_DOWN(acb->sector_num, s->block_size);
+ int64_t block_end = ROUND_UP(sector_end, s->block_size);
+
+ /* Check for conflicting copy-on-reads. */
+ FvdAIOCB *old;
+ QLIST_FOREACH(old, &s->copy_locks, copy_lock.next) {
+ if (old->copy_lock.end > acb->sector_num &&
+ sector_end > old->copy_lock.begin) {
+ QLIST_INSERT_HEAD(&old->copy_lock.dependent_writes, acb,
+ write.next_dependent_write);
+ QDEBUG("WRITE: acb%llu-%p put_on_hold_due_to_data_conflict "
+ "with %s acb%llu-%p\n", acb->uuid, acb,
+ old->type == OP_WRITE ? "write" : "copy_on_read",
+ old->uuid, old);
+ return 0;
+ }
+ }
+
+ /* No conflict. check if this write updates partial blocks and need to
+ * read those blocks from the base image and merge with this write. */
+ int read_first_block, read_last_block;
+ if (acb->sector_num % s->block_size == 0) {
+ read_first_block = false;
+ } else if (fresh_bitmap_show_sector_in_base_img(acb->sector_num, s)) {
+ read_first_block = true;
+ } else {
+ read_first_block = false;
+ }
+
+ if (sector_end % s->block_size == 0) {
+ read_last_block = false;
+ } else if (fresh_bitmap_show_sector_in_base_img(sector_end, s)) {
+ read_last_block = true;
+ } else {
+ read_last_block = false;
+ }
+
+ if (read_first_block) {
+ if (read_last_block) {
+ /* Case 1: Read all the blocks involved from the base image. */
+ const QEMUIOVector *old_qiov = acb->write.qiov;
+ if (block_end > s->base_img_sectors) {
+ block_end = s->base_img_sectors;
+ }
+
+ int buf_size = (block_end - block_begin) * 512
+ + 2 * sizeof(QEMUIOVector)
+ + sizeof(struct iovec) * (old_qiov->niov + 3);
+ buf_size = ROUND_UP(buf_size, 512);
+ acb->write.cow_buf = my_qemu_blockalign(bs->backing_hd, buf_size);
+
+ /* For reading from the base image. */
+ QEMUIOVector *read_qiov = (QEMUIOVector *) (acb->write.cow_buf +
+ (block_end - block_begin) * 512);
+ read_qiov->iov = (struct iovec *)(read_qiov + 1);
+ read_qiov->nalloc = -1;
+ read_qiov->niov = 1;
+ read_qiov->iov[0].iov_base = acb->write.cow_buf;
+ read_qiov->iov[0].iov_len = read_qiov->size =
+ (block_end - block_begin) * 512;
+
+ /* For writing to the FVD data file. */
+ QEMUIOVector *write_qiov = (QEMUIOVector *) (read_qiov->iov + 1);
+ write_qiov->iov = (struct iovec *)(write_qiov + 1);
+ write_qiov->nalloc = -1;
+ write_qiov->niov = old_qiov->niov + 2;
+ write_qiov->size = read_qiov->size;
+
+ /* The first entry is for data read from the base image. */
+ write_qiov->iov[0].iov_base = acb->write.cow_buf;
+ write_qiov->iov[0].iov_len = (acb->sector_num - block_begin) * 512;
+ memcpy(&write_qiov->iov[1], old_qiov->iov,
+ sizeof(struct iovec) * old_qiov->niov);
+
+ /* The last entry is for data read from the base image. */
+ const int last = old_qiov->niov + 1;
+ write_qiov->iov[last].iov_base = acb->write.cow_buf
+ + (sector_end - block_begin) * 512;
+ write_qiov->iov[last].iov_len = (block_end - sector_end) * 512;
+ acb->write.cow_qiov = write_qiov;
+ acb->write.cow_start_sector = block_begin;
+
+ acb->write.hd_acb = bdrv_aio_readv(bs->backing_hd, block_begin,
+ read_qiov, block_end - block_begin,
+ read_backing_for_copy_on_write_cb, acb);
+ if (!acb->write.hd_acb) {
+ goto fail;
+ }
+
+ acb->copy_lock.begin = block_begin;
+ acb->copy_lock.end = block_end;
+ QLIST_INSERT_HEAD(&s->copy_locks, acb, copy_lock.next);
+ QDEBUG("WRITE: acb%llu-%p "
+ "read_first_last_partial_blocks_from_backing sector_num=%"
+ PRId64 " nb_sectors=%d\n", acb->uuid, acb, block_begin,
+ (int)(block_end - block_begin));
+ } else {
+ /* Case 2: Read the first block from the base image. */
+ int nb = acb->sector_num - block_begin;
+ const QEMUIOVector *old_qiov = acb->write.qiov;
+
+ /* Space for data and metadata. */
+ int buf_size = nb * 512 + 2 * sizeof(QEMUIOVector)
+ + sizeof(struct iovec) * (old_qiov->niov + 2);
+ buf_size = ROUND_UP(buf_size, 512);
+ acb->write.cow_buf = my_qemu_blockalign(bs->backing_hd, buf_size);
+
+ /* For reading from the base image. */
+ QEMUIOVector *read_qiov =
+ (QEMUIOVector *) (acb->write.cow_buf + nb * 512);
+ read_qiov->iov = (struct iovec *)(read_qiov + 1);
+ read_qiov->nalloc = -1;
+ read_qiov->niov = 1;
+ read_qiov->iov[0].iov_base = acb->write.cow_buf;
+ read_qiov->iov[0].iov_len = read_qiov->size = nb * 512;
+
+ /* For writing to the FVD data file. */
+ QEMUIOVector *write_qiov = (QEMUIOVector *) (read_qiov->iov + 1);
+ write_qiov->iov = (struct iovec *)(write_qiov + 1);
+ write_qiov->nalloc = -1;
+ write_qiov->niov = old_qiov->niov + 1;
+ write_qiov->size = old_qiov->size + read_qiov->size;
+
+ /* The first entry is added for data read from the base image. */
+ write_qiov->iov[0].iov_base = acb->write.cow_buf;
+ write_qiov->iov[0].iov_len = read_qiov->size;
+ memcpy(&write_qiov->iov[1], old_qiov->iov,
+ sizeof(struct iovec) * old_qiov->niov);
+ acb->write.cow_qiov = write_qiov;
+ acb->write.cow_start_sector = block_begin;
+
+ acb->write.hd_acb = bdrv_aio_readv(bs->backing_hd,
+ block_begin, read_qiov, nb,
+ read_backing_for_copy_on_write_cb, acb);
+ if (!acb->write.hd_acb) {
+ goto fail;
+ }
+
+ acb->copy_lock.begin = block_begin;
+ acb->copy_lock.end = block_begin + s->block_size;
+ QLIST_INSERT_HEAD(&s->copy_locks, acb, copy_lock.next);
+ QDEBUG("WRITE: acb%llu-%p read_first_partial_block_from_backing "
+ "sector_num=%" PRId64 " nb_sectors=%d\n",
+ acb->uuid, acb, block_begin, nb);
+ }
+ } else {
+ if (read_last_block) {
+ /* Case 3: Read the last block from the base image. */
+ int nb;
+ if (block_end < s->base_img_sectors) {
+ nb = block_end - sector_end;
+ } else {
+ nb = s->base_img_sectors - sector_end;
+ }
+ const QEMUIOVector *old_qiov = acb->write.qiov;
+
+ /* Space for data and metadata. */
+ int buf_size = nb * 512 + 2 * sizeof(QEMUIOVector)
+ + sizeof(struct iovec) * (old_qiov->niov + 2);
+ buf_size = ROUND_UP(buf_size, 512);
+ acb->write.cow_buf = my_qemu_blockalign(bs->backing_hd, buf_size);
+
+ /* For reading from the base image. */
+ QEMUIOVector *read_qiov = (QEMUIOVector *) (acb->write.cow_buf
+ + nb * 512);
+ read_qiov->iov = (struct iovec *)(read_qiov + 1);
+ read_qiov->nalloc = -1;
+ read_qiov->niov = 1;
+ read_qiov->iov[0].iov_base = acb->write.cow_buf;
+ read_qiov->iov[0].iov_len = read_qiov->size = nb * 512;
+
+ /* For writing to the FVD data file. */
+ QEMUIOVector *write_qiov = (QEMUIOVector *) (read_qiov->iov + 1);
+ write_qiov->iov = (struct iovec *)(write_qiov + 1);
+ write_qiov->nalloc = -1;
+ write_qiov->niov = old_qiov->niov + 1;
+ write_qiov->size = old_qiov->size + read_qiov->size;
+ memcpy(write_qiov->iov, old_qiov->iov,
+ sizeof(struct iovec) * old_qiov->niov);
+
+ /* The last appended entry is for data read from the base image. */
+ write_qiov->iov[old_qiov->niov].iov_base = acb->write.cow_buf;
+ write_qiov->iov[old_qiov->niov].iov_len = read_qiov->size;
+ acb->write.cow_qiov = write_qiov;
+ acb->write.cow_start_sector = acb->sector_num;
+
+ acb->write.hd_acb = bdrv_aio_readv(bs->backing_hd,
+ sector_end, read_qiov, nb,
+ read_backing_for_copy_on_write_cb, acb);
+ if (!acb->write.hd_acb) {
+ goto fail;
+ }
+
+ acb->copy_lock.end = block_end;
+ acb->copy_lock.begin = block_end - s->block_size;
+ QLIST_INSERT_HEAD(&s->copy_locks, acb, copy_lock.next);
+ QDEBUG("WRITE: acb%llu-%p read_last_partial_block_from_backing "
+ "sector_num=%" PRId64 " nb_sectors=%d\n",
+ acb->uuid, acb, sector_end, nb);
+ } else {
+ /* Case 4: Can write directly and no need to merge with data from
+ * the base image. */
+ QDEBUG("WRITE: acb%llu-%p "
+ "write_fvd_without_read_partial_block_from_backing\n",
+ acb->uuid, acb);
+ acb->write.hd_acb = store_data(false, acb, bs, acb->sector_num,
+ acb->write.qiov, acb->nb_sectors,
+ write_data_cb, acb);
+ if (!acb->write.hd_acb) {
+ goto fail;
+ }
+ }
+ }
+
+ QLIST_INSERT_HEAD(&s->write_locks, acb, write.next_write_lock);
+ return 0;
+
+fail:
+ if (acb->write.cow_buf) {
+ my_qemu_vfree(acb->write.cow_buf);
+ }
+ return -EIO;
+}
+
+static void restart_dependent_writes(FvdAIOCB * acb)
+{
+ acb->copy_lock.next.le_prev = NULL;
+ FvdAIOCB *req = acb->copy_lock.dependent_writes.lh_first;
+
+ while (req) {
+ /* Keep a copy of 'next' as it may be changed in do_aiO_write(). */
+ FvdAIOCB *next = req->write.next_dependent_write.le_next;
+
+ /* Indicate that this write is no longer on any depedent list. This
+ * helps fvd_aio_cancel_read() work properly. */
+ req->write.next_dependent_write.le_prev = NULL;
+
+ if (acb->type == OP_WRITE) {
+ QDEBUG("WRITE: acb%llu-%p finished_and_restart_conflict_write "
+ "acb%llu-%p\n", acb->uuid, acb, req->uuid, req);
+ } else {
+ QDEBUG("READ: copy_on_read acb%llu-%p "
+ "finished_and_restart_conflict_write acb%llu-%p\n",
+ acb->uuid, acb, req->uuid, req);
+ }
+
+ if (do_aio_write(req) < 0) {
+ QDEBUG("WRITE: acb%llu-%p finished with error ret=%d\n",
+ req->uuid, req, -1);
+ req->common.cb(req->common.opaque, -1);
+ my_qemu_aio_release(req);
+ }
+
+ req = next;
+ }
}
diff --git a/block/fvd.c b/block/fvd.c
index e41f419..5b3dcac 100644
--- a/block/fvd.c
+++ b/block/fvd.c
@@ -27,11 +27,13 @@
* function optimization. */
#include "block/fvd-debug.c"
#include "block/fvd-flush.c"
+#include "block/fvd-bitmap.c"
#include "block/fvd-misc.c"
#include "block/fvd-create.c"
#include "block/fvd-open.c"
-#include "block/fvd-read.c"
#include "block/fvd-write.c"
+#include "block/fvd-read.c"
+#include "block/fvd-store.c"
#include "block/fvd-journal.c"
#include "block/fvd-prefetch.c"
#include "block/fvd-update.c"
diff --git a/block/fvd.h b/block/fvd.h
index 9847e7f..34ea2b4 100644
--- a/block/fvd.h
+++ b/block/fvd.h
@@ -432,6 +432,7 @@ typedef struct FvdAIOCB {
#endif
} FvdAIOCB;
+static AIOPool fvd_aio_pool;
static BlockDriver bdrv_fvd;
static QEMUOptionParameter fvd_create_options[];
static QEMUOptionParameter fvd_update_options[];
--
1.7.0.4
- [Qemu-devel] [PATCH 22/26] FVD: add impl of interface bdrv_update(), (continued)
- [Qemu-devel] [PATCH 22/26] FVD: add impl of interface bdrv_update(), Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 13/26] FVD: add impl of storing data in compact image, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 19/26] FVD: add support for aio_cancel, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 25/26] FVD: add impl of interface bdrv_probe(), Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 15/26] FVD: add basic journal functionality, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 18/26] FVD: add support for base image prefetching, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 03/26] FVD: add fully automated test-qcow2.sh, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 02/26] FVD: extend qemu-io to do fully automated testing, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 07/26] FVD: extend FVD header fvd.h to be more complete, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 12/26] FVD: add impl of interface bdrv_aio_readv(), Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 11/26] FVD: add impl of interface bdrv_aio_writev(),
Chunqiang Tang <=
- [Qemu-devel] [PATCH 06/26] FVD: skeleton of Fast Virtual Disk, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 05/26] FVD: add the 'qemu-img update' command, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 04/26] FVD: add fully automated test-vdi.sh, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 09/26] FVD: add impl of interface bdrv_create(), Chunqiang Tang, 2011/02/25