This patch creates intermediate buffer for reading from block driver
state and performs read-ahead to this buffer. Snapshot code performs
reads sequentially and thus we know what offsets will be required
and when they will become not needed.
Results are fantastic. Switch to snapshot times of 2GB Fedora 31 VM
over NVME storage are the following:
original fixed
cached: 1.84s 1.16s
non-cached: 12.74s 1.27s
The difference over HDD would be more significant :)
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <fam@euphon.net>
CC: Juan Quintela <quintela@redhat.com>
CC: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
CC: Denis Plotnikov <dplotnikov@virtuozzo.com>
---
block/io.c | 225 +++++++++++++++++++++++++++++++++++++-
include/block/block_int.h | 3 +
2 files changed, 225 insertions(+), 3 deletions(-)
diff --git a/block/io.c b/block/io.c
index 71a696deb7..bb06f750d8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2739,6 +2739,180 @@ static int
bdrv_co_do_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
}
}
+
+typedef struct BdrvLoadVMChunk {
+ void *buf;
+ uint64_t offset;
+ ssize_t bytes;
+
+ QLIST_ENTRY(BdrvLoadVMChunk) list;
+} BdrvLoadVMChunk;
+
+typedef struct BdrvLoadVMState {
+ AioTaskPool *pool;
+
+ int64_t offset;
+ int64_t last_loaded;
+
+ int chunk_count;
+ QLIST_HEAD(, BdrvLoadVMChunk) chunks;
+ QLIST_HEAD(, BdrvLoadVMChunk) loading;
+ CoMutex lock;
+ CoQueue waiters;
+} BdrvLoadVMState;
+
+typedef struct BdrvLoadVMStateTask {
+ AioTask task;
+
+ BlockDriverState *bs;
+ BdrvLoadVMChunk *chunk;
+} BdrvLoadVMStateTask;
+
+static BdrvLoadVMChunk *bdrv_co_find_loadvmstate_chunk(int64_t pos,
+
BdrvLoadVMChunk *c)
+{
+ for (; c != NULL; c = QLIST_NEXT(c, list)) {
+ if (c->offset <= pos && c->offset + c->bytes > pos) {
+ return c;
+ }
+ }
+
+ return NULL;
+}
+
+static void bdrv_free_loadvm_chunk(BdrvLoadVMChunk *c)
+{
+ qemu_vfree(c->buf);
+ g_free(c);
+}
+
+static coroutine_fn int bdrv_co_vmstate_load_task_entry(AioTask *task)
+{
+ int err = 0;
+ BdrvLoadVMStateTask *t = container_of(task, BdrvLoadVMStateTask,
task);
+ BdrvLoadVMChunk *c = t->chunk;
+ BdrvLoadVMState *state = t->bs->loadvm_state;
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, c->buf, c->bytes);
+
+ bdrv_inc_in_flight(t->bs);
+ err = t->bs->drv->bdrv_load_vmstate(t->bs, &qiov, c->offset);
+ bdrv_dec_in_flight(t->bs);
+
+ qemu_co_mutex_lock(&state->lock);
+ QLIST_REMOVE(c, list);
+ if (err == 0) {
+ QLIST_INSERT_HEAD(&state->chunks, c, list);
+ } else {
+ bdrv_free_loadvm_chunk(c);
+ }
+ qemu_co_mutex_unlock(&state->lock);
+ qemu_co_queue_restart_all(&state->waiters);
+
+ return err;
+}
+
+static void bdrv_co_start_loadvmstate(BlockDriverState *bs,
+ BdrvLoadVMState *state)
+{
+ int i;
+ size_t buf_size = MAX(bdrv_get_cluster_size(bs), 1 * MiB);
+
+ qemu_co_mutex_assert_locked(&state->lock);
+ for (i = state->chunk_count; i < BDRV_VMSTATE_WORKERS_MAX; i++) {
+ BdrvLoadVMStateTask *t = g_new(BdrvLoadVMStateTask, 1);
+
+ *t = (BdrvLoadVMStateTask) {
+ .task.func = bdrv_co_vmstate_load_task_entry,
+ .bs = bs,
+ .chunk = g_new(BdrvLoadVMChunk, 1),
+ };
+
+ *t->chunk = (BdrvLoadVMChunk) {
+ .buf = qemu_blockalign(bs, buf_size),
+ .offset = state->last_loaded,
+ .bytes = buf_size,
+ };
+ /* FIXME: tail of stream */
+
+ QLIST_INSERT_HEAD(&state->loading, t->chunk, list);
+ state->chunk_count++;
+ state->last_loaded += buf_size;
+
+ qemu_co_mutex_unlock(&state->lock);
+ aio_task_pool_start_task(state->pool, &t->task);