[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 07/26] FVD: extend FVD header fvd.h to be more compl
From: |
Chunqiang Tang |
Subject: |
[Qemu-devel] [PATCH 07/26] FVD: extend FVD header fvd.h to be more complete |
Date: |
Fri, 25 Feb 2011 17:37:47 -0500 |
This patch is part of the Fast Virtual Disk (FVD) proposal.
See http://wiki.qemu.org/Features/FVD.
This patch makes FVD's header file fvd.h more complete, by adding type
definition for BDRVFvdState, FvdAIOCB, etc.
Signed-off-by: Chunqiang Tang <address@hidden>
---
block/fvd.h | 337 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 337 insertions(+), 0 deletions(-)
diff --git a/block/fvd.h b/block/fvd.h
index f2da330..b83b7aa 100644
--- a/block/fvd.h
+++ b/block/fvd.h
@@ -168,4 +168,341 @@ typedef struct __attribute__ ((__packed__)) FvdHeader {
} FvdHeader;
typedef struct BDRVFvdState {
+ BlockDriverState *fvd_metadata;
+ BlockDriverState *fvd_data;
+ uint64_t virtual_disk_size; /*in bytes. */
+ uint64_t bitmap_offset; /* in sectors */
+ uint64_t bitmap_size; /* in bytes. */
+ uint64_t data_offset; /* in sectors. Begin of real data. */
+ uint64_t base_img_sectors;
+ uint64_t block_size; /* in sectors. */
+ bool copy_on_read;
+ uint64_t max_outstanding_copy_on_read_data; /* in bytes. */
+ uint64_t outstanding_copy_on_read_data; /* in bytes. */
+ bool data_region_prepared;
+ QLIST_HEAD(WriteLocks, FvdAIOCB) write_locks; /* All writes. */
+ QLIST_HEAD(CopyLocks, FvdAIOCB) copy_locks; /* copy-on-read and CoW. */
+
+ /* Keep two copies of bitmap to reduce the overhead of updating the
+ * on-disk bitmap, i.e., copy-on-read and prefetching do not update the
+ * on-disk bitmap. See Section 3.3.4 of the FVD-cow paper. */
+ uint8_t *fresh_bitmap;
+ uint8_t *stale_bitmap;
+
+ /******** Begin: for compact image. *************************************/
+ uint32_t *table; /* Mapping table stored in memory in little endian. */
+ uint64_t table_size; /* in bytes. */
+ uint64_t used_storage; /* in sectors. */
+ uint64_t avail_storage; /* in sectors. */
+ uint64_t chunk_size; /* in sectors. */
+ uint64_t storage_grow_unit; /* in sectors. */
+ uint64_t table_offset; /* in sectors. */
+ char *add_storage_cmd;
+ uint32_t *leaked_chunks;
+ uint32_t num_leaked_chunks;
+ uint32_t next_avail_leaked_chunk;
+ uint32_t chunks_relocated; /* Affect bdrv_has_zero_init(). */
+ /******** Begin: for compact image. *************************************/
+
+ /******** Begin: for journal. *******************************************/
+ uint64_t journal_offset; /* in sectors. */
+ uint64_t journal_size; /* in sectors. */
+ uint64_t journal_epoch;
+ uint64_t next_journal_sector; /* in sector. */
+ bool dirty_image;
+ bool metadata_err_prohibit_write;
+
+ /* There are two different ways of writing metadata changes to the
+ * journal. If cache=writethrough, metadata changes are written to the
+ * journal immediately. If (cache!=writethrough||IN_QEMU_TOOL), metadata
+ * changes are buffered in memory (bjnl.journal_buf below), and later
+ * written to the journal either triggered by bdrv_aio_flush() or by a
+ * timeout (bjnl.clean_buf_timer below). */
+ bool use_bjnl; /* 'bjnl' stands for buffered journal update. */
+ union {
+ /* 'ujnl' stands for unbuffered journal update. */
+ struct {
+ int active_writes;
+ /* Journal writes waiting for journal recycle to finish.
+ * See JournalCB.ujnl_next_wait4_recycle. */
+ QLIST_HEAD(JournalRecycle, FvdAIOCB) wait4_recycle;
+ } ujnl;
+
+ /* 'bjnl' stands for buffered journal update. */
+ struct {
+ uint8_t *buf;
+ size_t buf_size;
+ size_t def_buf_size;
+ size_t buf_used;
+ bool buf_contains_bitmap_update;
+ QEMUTimer *clean_buf_timer;
+ bool timer_scheduled;
+ uint64_t clean_buf_period;
+ /* See JournalCB.bjnl_next_queued_buf. */
+ QTAILQ_HEAD(CleanBuf, FvdAIOCB) queued_bufs;
+ } bjnl;
+ };
+ /******** End: for journal. ********************************************/
+
+ /******** Begin: for prefetching. ***********************************/
+ struct FvdAIOCB **prefetch_acb;
+ int prefetch_state; /* PREFETCH_STATE_RUNNING, FINISHED, or DISABLED. */
+ int num_prefetch_slots;
+ int num_filled_prefetch_slots;
+ int next_prefetch_read_slot;
+ bool prefetch_read_active;
+ bool pause_prefetch_requested;
+ int64_t prefetch_start_delay; /* in seconds */
+ uint64_t unclaimed_prefetch_region_start;
+ uint64_t prefetch_read_time; /* in milliseconds. */
+ uint64_t prefetch_write_time; /* in milliseconds. */
+ uint64_t prefetch_data_read; /* in bytes. */
+ uint64_t prefetch_data_written; /* in bytes. */
+ double prefetch_read_throughput; /* in bytes/millisecond.
*/
+ double prefetch_write_throughput; /* in bytes/millisecond.
*/
+ double prefetch_min_read_throughput; /* in bytes/millisecond.
*/
+ double prefetch_min_write_throughput; /* in bytes/millisecond.
*/
+ uint64_t prefetch_read_throughput_measure_time; /* in millisecond. */
+ uint64_t prefetch_write_throughput_measure_time; /* in millisecond.*/
+ uint64_t prefetch_throttle_time; /* in millisecond. */
+ uint64_t sectors_per_prefetch;
+ QEMUTimer *prefetch_timer;
+ /******** End: for prefetching. ***********************************/
+
+#ifdef FVD_DEBUG
+ int64_t total_copy_on_read_data; /* in bytes. */
+ int64_t total_prefetch_data; /* in bytes. */
+#endif
} BDRVFvdState;
+
+/* Begin of data type definitions. */
+struct FvdAIOCB;
+
+typedef struct JournalCB {
+ BlockDriverAIOCB *hd_acb;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ bool bitmap_updated;
+ union {
+ QLIST_ENTRY(FvdAIOCB) ujnl_next_wait4_recycle;
+ QTAILQ_ENTRY(FvdAIOCB) bjnl_next_queued_buf;
+ };
+} JournalCB;
+
+/* CopyLock is used by AIOWriteCB and AIOCopyCB. */
+typedef struct CopyLock {
+ QLIST_ENTRY(FvdAIOCB) next;
+ int64_t begin;
+ int64_t end;
+ QLIST_HEAD(DependentWritesHead, FvdAIOCB) dependent_writes;
+} CopyLock;
+
+typedef struct ChildAIOReadCB {
+ BlockDriverAIOCB *hd_acb;
+ struct iovec iov;
+ QEMUIOVector qiov;
+ int64_t sector_num;
+ int nb_sectors;
+ int done;
+} ChildAIOReadCB;
+
+typedef struct AIOReadCB {
+ QEMUIOVector *qiov;
+ int ret;
+ ChildAIOReadCB read_backing;
+ ChildAIOReadCB read_fvd;
+} AIOReadCB;
+
+/* For copy-on-read and prefetching. */
+typedef struct AIOCopyCB {
+ BlockDriverAIOCB *hd_acb;
+ struct iovec iov;
+ QEMUIOVector qiov;
+ uint8_t *buf;
+ int64_t buffered_sector_begin;
+ int64_t buffered_sector_end;
+ int64_t last_prefetch_op_start_time; /* For prefetch only. */
+} AIOCopyCB;
+
+typedef struct AIOWriteCB {
+ BlockDriverAIOCB *hd_acb;
+ QEMUIOVector *qiov;
+ uint8_t *cow_buf;
+ QEMUIOVector *cow_qiov;
+ int64_t cow_start_sector;
+ int ret;
+ union {
+ bool update_table;
+ bool update_bitmap;
+ };
+
+ /* See BDRVFvdState.write_locks */
+ QLIST_ENTRY(FvdAIOCB) next_write_lock;
+
+ /* See FvdAIOCB.write.dependent_writes. */
+ QLIST_ENTRY(FvdAIOCB) next_dependent_write;
+} AIOWriteCB;
+
+/* For AIOStoreCompactCB and AIOLoadCompactCB. */
+typedef struct CompactChildCB {
+ struct FvdAIOCB *acb;
+ BlockDriverAIOCB *hd_acb;
+} CompactChildCB;
+
+/* For storing data to a compact image. */
+typedef struct AIOStoreCompactCB {
+ CompactChildCB one_child;
+ CompactChildCB *children;
+ int update_table;
+ int num_children;
+ int finished_children;
+ struct FvdAIOCB *parent_acb;
+ int ret;
+ int soft_write; /*true if the store is caused by copy-on-read or
prefetch.*/
+ QEMUIOVector *orig_qiov;
+} AIOStoreCompactCB;
+
+/* For loading data from a compact image. */
+typedef struct AIOLoadCompactCB {
+ CompactChildCB *children;
+ CompactChildCB one_child;
+ int num_children;
+ int finished_children;
+ struct FvdAIOCB *parent_acb;
+ int ret;
+ QEMUIOVector *orig_qiov;
+} AIOLoadCompactCB;
+
+typedef struct AIOFlushCB {
+ BlockDriverAIOCB *data_acb;
+ BlockDriverAIOCB *metadata_acb;
+ int num_finished;
+ int ret;
+} AIOFlushCB;
+
+typedef struct AIOCleanJournalBufCB {
+ uint8_t *buf;
+} AIOCleanJournalBufCB;
+
+typedef struct AIOWrapperCB {
+ QEMUBH *bh;
+} AIOWrapperCB;
+
+typedef enum { OP_READ = 1, OP_WRITE, OP_COPY, OP_STORE_COMPACT,
+ OP_LOAD_COMPACT, OP_WRAPPER, OP_FLUSH, OP_BJNL_BUF_WRITE, OP_BJNL_FLUSH
+} op_type;
+
+#ifdef FVD_DEBUG
+/* For debugging memory leadk. */
+typedef struct alloc_tracer_t {
+ int64_t magic;
+ int alloc_tracer;
+ const char *alloc_file;
+ int alloc_line;
+ size_t size;
+} alloc_tracer_t;
+#endif
+
+typedef struct FvdAIOCB {
+ BlockDriverAIOCB common;
+ op_type type;
+ int64_t sector_num;
+ int nb_sectors;
+ JournalCB jcb; /* For AIOWriteCB and AIOStoreCompactCB. */
+ CopyLock copy_lock; /* For AIOWriteCB and AIOCopyCB. */
+ bool cancel_in_progress;
+
+ /* Use a union so that all requests can efficiently share one big
AIOPool.*/
+ union {
+ AIOWrapperCB wrapper;
+ AIOReadCB read;
+ AIOWriteCB write;
+ AIOCopyCB copy;
+ AIOLoadCompactCB load;
+ AIOStoreCompactCB store;
+ AIOFlushCB flush;
+ };
+
+#ifdef FVD_DEBUG
+ int64_t magic;
+ alloc_tracer_t tracer; /* For debugging memory leak. */
+ /* Uniquely identifies a request across all processing activities. */
+ unsigned long long int uuid;
+#endif
+} FvdAIOCB;
+
+static BlockDriver bdrv_fvd;
+static QEMUOptionParameter fvd_create_options[];
+static QEMUOptionParameter fvd_update_options[];
+
+/* Function prototypes. */
+static int fvd_create(const char *filename, QEMUOptionParameter * options);
+static int fvd_probe(const uint8_t * buf, int buf_size, const char *filename);
+static int fvd_open(BlockDriverState * bs, const char *filename, int flags);
+static void fvd_close(BlockDriverState * bs);
+static int fvd_is_allocated(BlockDriverState * bs, int64_t sector_num,
+ int nb_sectors, int *pnum);
+static int fvd_flush(BlockDriverState * bs);
+static BlockDriverAIOCB *fvd_aio_readv(BlockDriverState * bs,
+ int64_t sector_num, QEMUIOVector * qiov, int nb_sectors,
+ BlockDriverCompletionFunc * cb, void *opaque);
+static BlockDriverAIOCB *fvd_aio_writev(BlockDriverState * bs,
+ int64_t sector_num, QEMUIOVector * qiov, int nb_sectors,
+ BlockDriverCompletionFunc * cb, void *opaque);
+static BlockDriverAIOCB *fvd_aio_flush(BlockDriverState * bs,
+ BlockDriverCompletionFunc * cb, void *opaque);
+static int fvd_get_info(BlockDriverState * bs, BlockDriverInfo * bdi);
+static int fvd_update (BlockDriverState * bs, QEMUOptionParameter * options);
+static int fvd_has_zero_init(BlockDriverState * bs);
+
+/* Default configurations. */
+#define BYTES_PER_PREFETCH 1048576 /* bytes */
+#define PREFETCH_THROTTLING_TIME 30000 /* milliseconds */
+#define NUM_PREFETCH_SLOTS 2
+#define PREFETCH_MIN_MEASURE_READ_TIME 100 /* milliseconds */
+#define PREFETCH_MIN_MEASURE_WRITE_TIME 100 /* milliseconds */
+#define PREFETCH_MIN_READ_THROUGHPUT 5120 /* KB/s */
+#define PREFETCH_MIN_WRITE_THROUGHPUT 5120 /* KB/s */
+#define PREFETCH_MAX_READ_THROUGHPUT 1000000000L /* KB/s */
+#define PREFETCH_MAX_WRITE_THROUGHPUT 1000000000L /* KB/s */
+#define PREFETCH_PERF_CALC_ALPHA 0.8
+#define MAX_OUTSTANDING_COPY_ON_READ_DATA 2000000 /* bytes */
+#define MODERATE_BITMAP_SIZE 4194304L /* bytes */
+#define CHUNK_SIZE 1048576LL /* bytes */
+#define JOURNAL_SIZE 16777216LL /* bytes */
+#define STORAGE_GROW_UNIT 104857600LL /* bytes */
+#define JOURNAL_BUF_SIZE (64*1024) /* bytes */
+#define JOURNAL_CLEAN_BUF_PERIOD 5000 /* milliseconds */
+
+/* State of BDRVFvdState.prefetch_state. */
+#define PREFETCH_STATE_RUNNING 1
+#define PREFETCH_STATE_FINISHED 2
+#define PREFETCH_STATE_DISABLED 3
+
+/* For convience. */
+#define IN_QEMU_TOOL (rt_clock == NULL) /* a trick */
+#define ROUND_UP(x, base) ((((x)+(base)-1) / (base)) * (base))
+#define ROUND_DOWN(x, base) ((((x) / (base)) * (base)))
+#define BOOL(x) ((x) ? "true" : "false")
+#define EMPTY_TABLE ((uint32_t)0xFFFFFFFF)
+#define DIRTY_TABLE ((uint32_t)0x80000000)
+#define READ_TABLE(entry) (le32_to_cpu(entry) & ~DIRTY_TABLE)
+# define FVDAIOCB_MAGIC ((uint64_t)0x3A8FCE89325B976DULL)
+# define FVD_ALLOC_MAGIC ((uint64_t)0x4A7dCEF9925B976DULL)
+#define IS_EMPTY(entry) ((entry) == EMPTY_TABLE)
+#define IS_DIRTY(entry) (le32_to_cpu(entry) & DIRTY_TABLE)
+#define WRITE_TABLE(entry,id) ((entry) = cpu_to_le32(id))
+#define READ_TABLE2(entry) \
+ ((entry)==EMPTY_TABLE ? EMPTY_TABLE : (le32_to_cpu(entry) & ~DIRTY_TABLE))
+
+#define CLEAN_DIRTY(entry) \
+ do { \
+ if (!IS_EMPTY(entry)) \
+ entry = cpu_to_le32(le32_to_cpu(entry) & ~DIRTY_TABLE); \
+ } while (0)
+
+#define CLEAN_DIRTY2(entry) \
+ do { \
+ ASSERT(!IS_EMPTY(entry)); \
+ entry = cpu_to_le32(le32_to_cpu(entry) & ~DIRTY_TABLE); \
+ } while (0)
--
1.7.0.4
- [Qemu-devel] [PATCH 23/26] FVD: add impl of interface bdrv_is_allocated(), (continued)
- [Qemu-devel] [PATCH 23/26] FVD: add impl of interface bdrv_is_allocated(), Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 17/26] FVD: add impl of bdrv_flush() and bdrv_aio_flush(), Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 22/26] FVD: add impl of interface bdrv_update(), Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 13/26] FVD: add impl of storing data in compact image, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 19/26] FVD: add support for aio_cancel, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 25/26] FVD: add impl of interface bdrv_probe(), Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 15/26] FVD: add basic journal functionality, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 18/26] FVD: add support for base image prefetching, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 03/26] FVD: add fully automated test-qcow2.sh, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 02/26] FVD: extend qemu-io to do fully automated testing, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 07/26] FVD: extend FVD header fvd.h to be more complete,
Chunqiang Tang <=
- [Qemu-devel] [PATCH 12/26] FVD: add impl of interface bdrv_aio_readv(), Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 11/26] FVD: add impl of interface bdrv_aio_writev(), Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 06/26] FVD: skeleton of Fast Virtual Disk, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 05/26] FVD: add the 'qemu-img update' command, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 04/26] FVD: add fully automated test-vdi.sh, Chunqiang Tang, 2011/02/25
- [Qemu-devel] [PATCH 09/26] FVD: add impl of interface bdrv_create(), Chunqiang Tang, 2011/02/25