[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 1/3 v9] add-cow file format
From: |
Dong Xu Wang |
Subject: |
[Qemu-devel] [PATCH 1/3 v9] add-cow file format |
Date: |
Tue, 8 May 2012 01:34:16 +0800 |
Provide a new file format: add-cow. The usage can be found in add-cow.txt of
this patch.
CC: Kevin Wolf <address@hidden>
CC: Stefan Hajnoczi <address@hidden>
Signed-off-by: Dong Xu Wang <address@hidden>
---
Makefile.objs | 1 +
block.c | 2 +-
block.h | 1 +
block/add-cow-cache.c | 193 +++++++++++++++++++++
block/add-cow.c | 446 ++++++++++++++++++++++++++++++++++++++++++++++++
block/add-cow.h | 83 +++++++++
block_int.h | 1 +
docs/specs/add-cow.txt | 68 ++++++++
qemu-img.c | 39 +++++
9 files changed, 833 insertions(+), 1 deletion(-)
create mode 100644 block/add-cow-cache.c
create mode 100644 block/add-cow.c
create mode 100644 block/add-cow.h
create mode 100644 docs/specs/add-cow.txt
diff --git a/Makefile.objs b/Makefile.objs
index 70c5c79..10c5c52 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -52,6 +52,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o
dmg.o bochs.o vpc.o vv
block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
qcow2-cache.o
block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-nested-y += qed-check.o
+block-nested-y += add-cow.o add-cow-cache.o
block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
block-nested-y += stream.o
block-nested-$(CONFIG_WIN32) += raw-win32.o
diff --git a/block.c b/block.c
index 43c794c..206860c 100644
--- a/block.c
+++ b/block.c
@@ -196,7 +196,7 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs,
}
/* check if the path starts with "<protocol>:" */
-static int path_has_protocol(const char *path)
+int path_has_protocol(const char *path)
{
#ifdef _WIN32
if (is_windows_drive(path) ||
diff --git a/block.h b/block.h
index f163e54..f74c79e 100644
--- a/block.h
+++ b/block.h
@@ -319,6 +319,7 @@ char *bdrv_snapshot_dump(char *buf, int buf_size,
QEMUSnapshotInfo *sn);
char *get_human_readable_size(char *buf, int buf_size, int64_t size);
int path_is_absolute(const char *path);
+int path_has_protocol(const char *path);
void path_combine(char *dest, int dest_size,
const char *base_path,
const char *filename);
diff --git a/block/add-cow-cache.c b/block/add-cow-cache.c
new file mode 100644
index 0000000..3ae23c1
--- /dev/null
+++ b/block/add-cow-cache.c
@@ -0,0 +1,193 @@
+/*
+ * Cache For QEMU ADD-COW Disk Format
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "block_int.h"
+#include "qemu-common.h"
+#include "add-cow.h"
+
+/* Based on qcow2-cache.c */
+AddCowCache *add_cow_cache_create(BlockDriverState *bs, int num_tables,
+ bool writethrough)
+{
+ AddCowCache *c;
+ int i;
+
+ c = g_malloc0(sizeof(*c));
+ c->size = num_tables;
+ c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
+ c->writethrough = writethrough;
+ c->entry_size = ADD_COW_CACHE_ENTRY_SIZE;
+
+ for (i = 0; i < c->size; i++) {
+ c->entries[i].table = qemu_blockalign(bs, c->entry_size);
+ c->entries[i].offset = -1;
+ }
+
+ return c;
+}
+
+int add_cow_cache_destroy(BlockDriverState *bs, AddCowCache *c)
+{
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ qemu_vfree(c->entries[i].table);
+ }
+
+ g_free(c->entries);
+ g_free(c);
+
+ return 0;
+}
+
+static int add_cow_cache_entry_flush(BlockDriverState *bs,
+ AddCowCache *c,
+ int i)
+{
+ BDRVAddCowState *s = bs->opaque;
+ int ret = 0;
+
+ if (!c->entries[i].dirty || -1 == c->entries[i].offset) {
+ return ret;
+ }
+
+ ret = bdrv_pwrite(bs->file, sizeof(AddCowHeader) + c->entries[i].offset,
+ c->entries[i].table,
+ MIN(c->entry_size, s->bitmap_size - c->entries[i].offset));
+ if (ret < 0) {
+ return ret;
+ }
+
+ c->entries[i].dirty = false;
+
+ return 0;
+}
+
+int add_cow_cache_flush(BlockDriverState *bs, AddCowCache *c)
+{
+ BDRVAddCowState *s = bs->opaque;
+ int result = 0;
+ int ret;
+ int i;
+
+ ret = bdrv_flush(s->image_hd);
+ if (ret < 0) {
+ return result;
+ }
+
+ for (i = 0; i < c->size; i++) {
+ ret = add_cow_cache_entry_flush(bs, c, i);
+ if (ret < 0 && result != -ENOSPC) {
+ result = ret;
+ }
+ }
+
+ if (result == 0) {
+ ret = bdrv_flush(bs->file);
+ if (ret < 0) {
+ result = ret;
+ }
+ }
+
+ return result;
+}
+
+static int add_cow_cache_find_entry_to_replace(AddCowCache *c)
+{
+ int i;
+ int min_count = INT_MAX;
+ int min_index = -1;
+
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].cache_hits < min_count) {
+ min_index = i;
+ min_count = c->entries[i].cache_hits;
+ }
+
+ c->entries[i].cache_hits /= 2;
+ }
+
+ if (min_index == -1) {
+ abort();
+ }
+ return min_index;
+}
+
+static int add_cow_cache_do_get(BlockDriverState *bs, AddCowCache *c,
+ uint64_t offset, void **table)
+{
+ int i, ret;
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].offset == offset) {
+ goto found;
+ }
+ }
+
+ i = add_cow_cache_find_entry_to_replace(c);
+ if (i < 0) {
+ return i;
+ }
+
+ ret = add_cow_cache_entry_flush(bs, c, i);
+ if (ret < 0) {
+ return ret;
+ }
+ c->entries[i].offset = -1;
+ ret = bdrv_pread(bs->file, sizeof(AddCowHeader) + offset,
+ c->entries[i].table, c->entry_size);
+ if (ret < 0) {
+ return ret;
+ }
+
+ c->entries[i].cache_hits = 32;
+ c->entries[i].offset = offset;
+
+found:
+ c->entries[i].cache_hits++;
+ *table = c->entries[i].table;
+
+ return 0;
+}
+
+int add_cow_cache_get(BlockDriverState *bs, AddCowCache *c, uint64_t
sector_num,
+ void **table)
+{
+ /* each byte in bitmap indicates 8 * SECTORS_PER_CLUSTER clusters */
+ uint64_t offset = offset_in_bitmap(sector_num) & (~(c->entry_size - 1));
+ return add_cow_cache_do_get(bs, c, offset, table);
+}
+
+void add_cow_cache_entry_mark_dirty(AddCowCache *c, void *table)
+{
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].table == table) {
+ goto found;
+ }
+ }
+ abort();
+
+found:
+ c->entries[i].dirty = true;
+}
+
+bool add_cow_cache_set_writethrough(BlockDriverState *bs, AddCowCache *c,
+ bool enable)
+{
+ bool old = c->writethrough;
+
+ if (!old && enable) {
+ add_cow_cache_flush(bs, c);
+ }
+
+ c->writethrough = enable;
+ return old;
+}
diff --git a/block/add-cow.c b/block/add-cow.c
new file mode 100644
index 0000000..8bf27ab
--- /dev/null
+++ b/block/add-cow.c
@@ -0,0 +1,446 @@
+/*
+ * QEMU ADD-COW Disk Format
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ * Dong Xu Wang <address@hidden>
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+#include "add-cow.h"
+
+static int add_cow_probe(const uint8_t *buf, int buf_size, const char
*filename)
+{
+ const AddCowHeader *header = (const AddCowHeader *)buf;
+
+ if (be64_to_cpu(header->magic) == ADD_COW_MAGIC &&
+ be32_to_cpu(header->version) == ADD_COW_VERSION) {
+ return 100;
+ } else {
+ return 0;
+ }
+}
+
+static int add_cow_create(const char *filename, QEMUOptionParameter *options)
+{
+ AddCowHeader header;
+ int64_t image_sectors = 0;
+ const char *backing_filename = NULL;
+ const char *image_filename = NULL;
+ int ret;
+ BlockDriverState *bs, *image_bs = NULL;
+
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ image_sectors = options->value.n / BDRV_SECTOR_SIZE;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+ backing_filename = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_IMAGE_FILE)) {
+ image_filename = options->value.s;
+ }
+ options++;
+ }
+
+ if (!backing_filename || !image_filename) {
+ error_report("Both backing_file and image_file should be given.");
+ return -EINVAL;
+ }
+
+ ret = bdrv_file_open(&image_bs, image_filename, BDRV_O_RDWR
+ | BDRV_O_CACHE_WB);
+ if (ret < 0) {
+ return ret;
+ }
+ image_sectors = image_bs->total_sectors;
+ bdrv_delete(image_bs);
+
+ ret = bdrv_create_file(filename, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR);
+ if (ret < 0) {
+ return ret;
+ }
+
+ memset(&header, 0, sizeof(header));
+ header.magic = cpu_to_be64(ADD_COW_MAGIC);
+ header.version = cpu_to_be32(ADD_COW_VERSION);
+ pstrcpy(header.backing_file, sizeof(header.backing_file),
backing_filename);
+ pstrcpy(header.image_file, sizeof(header.image_file), image_filename);
+
+ ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
+ if (ret < 0) {
+ bdrv_delete(bs);
+ return ret;
+ }
+
+ BlockDriver *drv = bdrv_find_format("add-cow");
+ assert(drv != NULL);
+ ret = bdrv_open(bs, filename, BDRV_O_RDWR | BDRV_O_NO_FLUSH, drv);
+ if (ret < 0) {
+ bdrv_delete(bs);
+ return ret;
+ }
+
+ ret = bdrv_truncate(bs, image_sectors * BDRV_SECTOR_SIZE);
+ bdrv_delete(bs);
+ return ret;
+}
+
+static int add_cow_open(BlockDriverState *bs, int flags)
+{
+ AddCowHeader header;
+ char image_filename[ADD_COW_FILE_LEN];
+ BlockDriver *image_drv = NULL;
+ int ret;
+ bool writethrough;
+ int sector_per_byte;
+ BDRVAddCowState *s = bs->opaque;
+
+ ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+ if (ret != sizeof(header)) {
+ goto fail;
+ }
+
+ if (be64_to_cpu(header.magic) != ADD_COW_MAGIC) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (be32_to_cpu(header.version) != ADD_COW_VERSION) {
+ char version[64];
+ snprintf(version, sizeof(version), "ADD-COW version %d",
+ be32_to_cpu(header.version));
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "add-cow", version);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ QEMU_BUILD_BUG_ON(sizeof(bs->backing_file) != sizeof(header.backing_file));
+ pstrcpy(bs->backing_file, sizeof(bs->backing_file), header.backing_file);
+
+ if (header.image_file[0] == '\0') {
+ ret = -ENOENT;
+ goto fail;
+ }
+ header.image_file[ADD_COW_FILE_LEN - 1] = '\0';
+ s->image_hd = bdrv_new("");
+ if (path_has_protocol(header.image_file)) {
+ pstrcpy(image_filename, sizeof(image_filename), header.image_file);
+ } else {
+ path_combine(image_filename, sizeof(image_filename),
+ bs->filename, header.image_file);
+ }
+
+ image_drv = bdrv_find_format("raw");
+ ret = bdrv_open(s->image_hd, image_filename, flags, image_drv);
+ if (ret < 0) {
+ bdrv_delete(s->image_hd);
+ goto fail;
+ }
+ bs->total_sectors = s->image_hd->total_sectors;
+ s->cluster_size = ADD_COW_CLUSTER_SIZE;
+ sector_per_byte = SECTORS_PER_CLUSTER * 8;
+ s->bitmap_size =
+ (bs->total_sectors + sector_per_byte - 1) / sector_per_byte;
+ writethrough = ((flags & BDRV_O_CACHE_WB) == 0);
+ s->bitmap_cache =
+ add_cow_cache_create(bs, ADD_COW_CACHE_SIZE, writethrough);
+
+ qemu_co_mutex_init(&s->lock);
+ return 0;
+fail:
+ if (s->bitmap_cache) {
+ add_cow_cache_destroy(bs, s->bitmap_cache);
+ }
+ return ret;
+}
+
+static void add_cow_close(BlockDriverState *bs)
+{
+ BDRVAddCowState *s = bs->opaque;
+ add_cow_cache_destroy(bs, s->bitmap_cache);
+ bdrv_delete(s->image_hd);
+}
+
+static bool is_allocated(BlockDriverState *bs, int64_t sector_num)
+{
+ BDRVAddCowState *s = bs->opaque;
+ int64_t cluster_num = sector_num / SECTORS_PER_CLUSTER;
+ uint8_t *table = NULL;
+ int ret = add_cow_cache_get(bs, s->bitmap_cache,
+ sector_num, (void **)&table);
+
+ if (ret < 0) {
+ return 0;
+ }
+ return table[cluster_num / 8 % ADD_COW_CACHE_ENTRY_SIZE]
+ & (1 << (cluster_num % 8));
+}
+
+static coroutine_fn int add_cow_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *num_same)
+{
+ int changed;
+ int64_t cluster_num;
+
+ if (nb_sectors == 0) {
+ *num_same = 0;
+ return 0;
+ }
+
+ cluster_num = sector_num / SECTORS_PER_CLUSTER;
+ changed = is_allocated(bs, sector_num);
+ *num_same =
+ MIN(nb_sectors, (cluster_num + 1) * SECTORS_PER_CLUSTER - sector_num);
+
+ for (cluster_num = sector_num / SECTORS_PER_CLUSTER + 1;
+ cluster_num <= (sector_num + nb_sectors - 1) / SECTORS_PER_CLUSTER;
+ cluster_num++) {
+ if (is_allocated(bs, cluster_num * SECTORS_PER_CLUSTER) != changed) {
+ break;
+ }
+ *num_same = MIN(nb_sectors,
+ (cluster_num + 1) * SECTORS_PER_CLUSTER - sector_num);
+ }
+
+ return changed;
+}
+
+static coroutine_fn int add_cow_co_readv(BlockDriverState *bs,
+ int64_t sector_num, int remaining_sectors, QEMUIOVector *qiov)
+{
+ BDRVAddCowState *s = bs->opaque;
+ int cur_nr_sectors;
+ uint64_t bytes_done = 0;
+ QEMUIOVector hd_qiov;
+ int n, ret = 0;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+ qemu_co_mutex_lock(&s->lock);
+ while (remaining_sectors != 0) {
+ cur_nr_sectors = remaining_sectors;
+ if (add_cow_is_allocated(bs, sector_num, cur_nr_sectors, &n)) {
+ cur_nr_sectors = n;
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_copy(&hd_qiov, qiov, bytes_done,
+ cur_nr_sectors * BDRV_SECTOR_SIZE);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(s->image_hd, sector_num, n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ cur_nr_sectors = n;
+ if (bs->backing_hd) {
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_copy(&hd_qiov, qiov, bytes_done,
+ cur_nr_sectors * BDRV_SECTOR_SIZE);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ qemu_iovec_memset(&hd_qiov, 0,
+ BDRV_SECTOR_SIZE * cur_nr_sectors);
+ }
+ }
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * BDRV_SECTOR_SIZE;
+ }
+fail:
+ qemu_co_mutex_unlock(&s->lock);
+ qemu_iovec_destroy(&hd_qiov);
+ return ret;
+}
+
+static int coroutine_fn copy_sectors(BlockDriverState *bs,
+ int n_start, int n_end)
+{
+ BDRVAddCowState *s = bs->opaque;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ int n, ret;
+
+ n = n_end - n_start;
+ if (n <= 0) {
+ return 0;
+ }
+
+ iov.iov_len = n * BDRV_SECTOR_SIZE;
+ iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = bdrv_co_readv(bs->backing_hd, n_start, n, &qiov);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = bdrv_co_writev(s->image_hd, n_start, n, &qiov);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0;
+out:
+ qemu_vfree(iov.iov_base);
+ return ret;
+}
+
+static coroutine_fn int add_cow_co_writev(BlockDriverState *bs,
+ int64_t sector_num, int remaining_sectors, QEMUIOVector *qiov)
+{
+ BDRVAddCowState *s = bs->opaque;
+ int ret = 0, i;
+ QEMUIOVector hd_qiov;
+ uint8_t *table;
+ bool changed = false;
+
+ qemu_co_mutex_lock(&s->lock);
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+ ret = bdrv_co_writev(s->image_hd,
+ sector_num,
+ remaining_sectors, qiov);
+
+ if (ret < 0) {
+ goto fail;
+ }
+ /* copy content of unmodified sectors */
+ if (!is_cluster_head(sector_num) && !is_allocated(bs, sector_num)) {
+ qemu_co_mutex_unlock(&s->lock);
+ ret = copy_sectors(bs, sector_num & ~(SECTORS_PER_CLUSTER - 1),
+ sector_num);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ if (!is_cluster_tail(sector_num + remaining_sectors - 1)
+ && !is_allocated(bs, sector_num + remaining_sectors - 1)) {
+ qemu_co_mutex_unlock(&s->lock);
+ ret = copy_sectors(bs, sector_num + remaining_sectors,
+ ((sector_num + remaining_sectors) | (SECTORS_PER_CLUSTER - 1)) +
1);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ for (i = sector_num / SECTORS_PER_CLUSTER;
+ i <= (sector_num + remaining_sectors - 1) / SECTORS_PER_CLUSTER;
+ i++) {
+ ret = add_cow_cache_get(bs, s->bitmap_cache,
+ i * SECTORS_PER_CLUSTER, (void **)&table);
+ if (ret < 0) {
+ return 0;
+ }
+ if ((table[i / 8] & (1 << (i % 8))) == 0) {
+ table[i / 8] |= (1 << (i % 8));
+ changed = true;
+ add_cow_cache_entry_mark_dirty(s->bitmap_cache, table);
+ }
+
+ }
+ ret = 0;
+fail:
+ if (changed) {
+ ret = add_cow_cache_flush(bs, s->bitmap_cache);
+ }
+ qemu_co_mutex_unlock(&s->lock);
+ qemu_iovec_destroy(&hd_qiov);
+ return ret;
+}
+
+static int bdrv_add_cow_truncate(BlockDriverState *bs, int64_t size)
+{
+ BDRVAddCowState *s = bs->opaque;
+ int sector_per_byte = SECTORS_PER_CLUSTER * 8;
+ int ret;
+ int64_t old_image_sector = s->image_hd->total_sectors;
+ int64_t bitmap_size =
+ (size / BDRV_SECTOR_SIZE + sector_per_byte - 1) / sector_per_byte;
+
+ ret = bdrv_truncate(bs->file,
+ sizeof(AddCowHeader) + bitmap_size);
+ if (ret < 0) {
+ bdrv_truncate(s->image_hd, old_image_sector * BDRV_SECTOR_SIZE);
+ return ret;
+ }
+ return 0;
+}
+
+static coroutine_fn int add_cow_co_flush(BlockDriverState *bs)
+{
+ BDRVAddCowState *s = bs->opaque;
+ int ret;
+
+ qemu_co_mutex_lock(&s->lock);
+ ret = add_cow_cache_flush(bs, s->bitmap_cache);
+ qemu_co_mutex_unlock(&s->lock);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return bdrv_co_flush(bs->file);
+}
+
+static QEMUOptionParameter add_cow_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_IMAGE_FILE,
+ .type = OPT_STRING,
+ .help = "File name of a image file"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FMT,
+ .type = OPT_STRING,
+ .help = "Image format of the base image"
+ },
+ { NULL }
+};
+
+static BlockDriver bdrv_add_cow = {
+ .format_name = "add-cow",
+ .instance_size = sizeof(BDRVAddCowState),
+ .bdrv_probe = add_cow_probe,
+ .bdrv_open = add_cow_open,
+ .bdrv_close = add_cow_close,
+ .bdrv_create = add_cow_create,
+ .bdrv_co_readv = add_cow_co_readv,
+ .bdrv_co_writev = add_cow_co_writev,
+ .bdrv_truncate = bdrv_add_cow_truncate,
+ .bdrv_co_is_allocated = add_cow_is_allocated,
+
+ .create_options = add_cow_create_options,
+ .bdrv_co_flush_to_disk = add_cow_co_flush,
+};
+
+static void bdrv_add_cow_init(void)
+{
+ bdrv_register(&bdrv_add_cow);
+}
+
+block_init(bdrv_add_cow_init);
diff --git a/block/add-cow.h b/block/add-cow.h
new file mode 100644
index 0000000..46567bb
--- /dev/null
+++ b/block/add-cow.h
@@ -0,0 +1,83 @@
+/*
+ * QEMU ADD-COW Disk Format
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ * Dong Xu Wang <address@hidden>
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef BLOCK_ADD_COW_H
+#define BLOCK_ADD_COW_H
+
+#define ADD_COW_MAGIC (((uint64_t)'A' << 56) | ((uint64_t)'D' << 48) | \
+ ((uint64_t)'D' << 40) | ((uint64_t)'_' << 32) | \
+ ((uint64_t)'C' << 24) | ((uint64_t)'O' << 16) | \
+ ((uint64_t)'W' << 8) | 0xFF)
+#define ADD_COW_VERSION 1
+#define ADD_COW_FILE_LEN 1024
+#define ADD_COW_CACHE_SIZE 16
+#define ADD_COW_CACHE_ENTRY_SIZE 65536
+#define ADD_COW_CLUSTER_SIZE 65536
+#define SECTORS_PER_CLUSTER (ADD_COW_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
+
+typedef struct AddCowHeader {
+ uint64_t magic;
+ uint32_t version;
+ char backing_file[ADD_COW_FILE_LEN];
+ char image_file[ADD_COW_FILE_LEN];
+ char reserved[500];
+} QEMU_PACKED AddCowHeader;
+
+typedef struct AddCowCachedTable {
+ void *table;
+ int64_t offset;
+ bool dirty;
+ int cache_hits;
+} AddCowCachedTable;
+
+typedef struct AddCowCache {
+ AddCowCachedTable *entries;
+ int entry_size;
+ int size;
+ bool writethrough;
+} AddCowCache;
+
+typedef struct BDRVAddCowState {
+ BlockDriverState *image_hd;
+ CoMutex lock;
+ int cluster_size;
+ AddCowCache *bitmap_cache;
+ uint64_t bitmap_size;
+} BDRVAddCowState;
+
+/* convert sector_num to offset in bitmap */
+static inline int64_t offset_in_bitmap(int64_t sector_num)
+{
+ int64_t cluster_num = sector_num / SECTORS_PER_CLUSTER;
+ return cluster_num / 8;
+}
+
+static inline bool is_cluster_head(int64_t sector_num)
+{
+ return sector_num % SECTORS_PER_CLUSTER == 0;
+}
+
+static inline bool is_cluster_tail(int64_t sector_num)
+{
+ return (sector_num + 1) % SECTORS_PER_CLUSTER == 0;
+}
+
+AddCowCache *add_cow_cache_create(BlockDriverState *bs, int num_tables,
+ bool writethrough);
+int add_cow_cache_destroy(BlockDriverState *bs, AddCowCache *c);
+void add_cow_cache_entry_mark_dirty(AddCowCache *c, void *table);
+int add_cow_cache_get(BlockDriverState *bs, AddCowCache *c, uint64_t offset,
+ void **table);
+int add_cow_cache_flush(BlockDriverState *bs, AddCowCache *c);
+bool add_cow_cache_set_writethrough(BlockDriverState *bs, AddCowCache *c,
+ bool enable);
+#endif
diff --git a/block_int.h b/block_int.h
index 086832a..83e75ea 100644
--- a/block_int.h
+++ b/block_int.h
@@ -51,6 +51,7 @@
#define BLOCK_OPT_PREALLOC "preallocation"
#define BLOCK_OPT_SUBFMT "subformat"
#define BLOCK_OPT_COMPAT_LEVEL "compat"
+#define BLOCK_OPT_IMAGE_FILE "image_file"
typedef struct BdrvTrackedRequest BdrvTrackedRequest;
diff --git a/docs/specs/add-cow.txt b/docs/specs/add-cow.txt
new file mode 100644
index 0000000..28085d4
--- /dev/null
+++ b/docs/specs/add-cow.txt
@@ -0,0 +1,68 @@
+== General ==
+
+Raw file format does not support backing_file and copy on write feature.
+The add-cow image format makes it possible to use backing files with raw
+image by keeping a separate .add-cow metadata file. Once all sectors
+have been written to in the raw image it is safe to discard the .add-cow
+and backing files and instead use the raw image directly.
+
+When using add-cow, procedures may like this:
+(ubuntu.img is a disk image which has been installed OS.)
+ 1) Create a raw image with the same size of ubuntu.img
+ qemu-img create -f raw test.raw 8G
+ 2) Create a add-cow image which will store dirty bitmap
+ qemu-img create -f add-cow test.add-cow -o
backing_file=ubuntu.img,image_file=test.raw
+ 3) Run qemu with add-cow image
+ qemu -drive if=virtio,file=test.add-cow
+
+=Specification=
+
+The file format looks like this:
+
+ +---------------+--------------------------+
+ | Header | COW bitmap |
+ +---------------+--------------------------+
+
+All numbers in add-cow are stored in Big Endian byte order.
+
+== Header ==
+
+The Header is included in the first bytes:
+
+ Byte 0 - 7: magic
+ add-cow magic string ("ADD_COW\xff")
+
+ 8 - 11: version
+ Version number (only valid value is 1 now)
+
+ 12 - 1035: backing_file
+ backing_file file name related to add-cow file. All
+ unused bytes are padded with zeros. Must not be longer
+ than 1023 bytes.
+
+ 1036 - 2059: image_file
+ image_file is a raw file. All unused bytes are padded
+ with zeros. Must not be longer than 1023 bytes.
+
+ 2060 - 2559: The Reserved field is used to make sure Data field
starts
+ at the multiple of 512, not used currently. All bytes
are
+ filled with 0.
+
+== COW bitmap ==
+
+The "COW bitmap" field starts at the 2560th byte, stores a bitmap related to
+backing_file and image_file. The bitmap will track whether the sector in
+backing_file is dirty or not.
+
+Each bit in the bitmap indicates one cluster's status. One cluster includes 128
+sectors, then each bit indicates 512 * 128 = 64k bytes, So the size of bitmap
is
+calculated according to virtual size of image_file. In each byte, bit 0 to 7
+will track the 1st to 7th cluster in sequence, bit orders in one byte look
like:
+ +----+----+----+----+----+----+----+----+
+ | b7 | b6 | b5 | b4 | b3 | b2 | b1 | b0 |
+ +----+----+----+----+----+----+----+----+
+
+If the bit is 0, indicates the sector has not been allocated in image_file,
data
+should be loaded from backing_file while reading; if the bit is 1, indicates
the
+related sector has been dirty, should be loaded from image_file while reading.
+Writing to a sector causes the corresponding bit to be set to 1.
diff --git a/qemu-img.c b/qemu-img.c
index 0ae543c..29865b5 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -628,7 +628,9 @@ static int img_convert(int argc, char **argv)
int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
int progress = 0, flags;
const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
+ char image_filename[1024], backing_filename[1024];
BlockDriver *drv, *proto_drv;
+ BlockDriver *image_drv, *backing_drv;
BlockDriverState **bs = NULL, *out_bs = NULL;
int64_t total_sectors, nb_sectors, sector_num, bs_offset;
uint64_t bs_sectors;
@@ -637,6 +639,7 @@ static int img_convert(int argc, char **argv)
BlockDriverInfo bdi;
QEMUOptionParameter *param = NULL, *create_options = NULL;
QEMUOptionParameter *out_baseimg_param;
+ QEMUOptionParameter *image_param = NULL;
char *options = NULL;
const char *snapshot_name = NULL;
float local_progress;
@@ -797,6 +800,7 @@ static int img_convert(int argc, char **argv)
out_baseimg = out_baseimg_param->value.s;
}
+ image_param = get_option_parameter(param, BLOCK_OPT_SIZE);
/* Check if compression is supported */
if (compress) {
QEMUOptionParameter *encryption =
@@ -828,6 +832,41 @@ static int img_convert(int argc, char **argv)
}
/* Create the new image */
+
+ if (0 == strcmp(out_fmt, "add-cow")) {
+ image_drv = bdrv_find_format("raw");
+ if (!drv) {
+ ret = -1;
+ goto out;
+ }
+ snprintf(image_filename, sizeof(image_filename),
+ "%s"".ct.raw", out_filename);
+ ret = bdrv_create(image_drv, image_filename, image_param);
+ if (ret < 0) {
+ error_report("%s: error while creating image_file: %s",
+ image_filename, strerror(-ret));
+ goto out;
+ }
+ set_option_parameter(param, BLOCK_OPT_IMAGE_FILE, image_filename);
+
+ if (!out_baseimg) {
+ backing_drv = bdrv_find_format("qcow2");
+ if (!drv) {
+ ret = -1;
+ goto out;
+ }
+ snprintf(backing_filename, sizeof(backing_filename),
+ "%s"".ct.qcow2", out_filename);
+ ret = bdrv_create(backing_drv, backing_filename, image_param);
+ if (ret < 0) {
+ error_report("%s: error while creating backing_file: %s",
+ backing_filename, strerror(-ret));
+ goto out;
+ }
+ set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
+ backing_filename);
+ }
+ }
ret = bdrv_create(drv, out_filename, param);
if (ret < 0) {
if (ret == -ENOTSUP) {
--
1.7.9.5
- [Qemu-devel] [PATCH 1/3 v9] add-cow file format,
Dong Xu Wang <=