[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 2/2] qcow2: improve savevm performance - please ignore
From: |
Denis V. Lunev |
Subject: |
Re: [PATCH 2/2] qcow2: improve savevm performance - please ignore |
Date: |
Wed, 10 Jun 2020 22:01:46 +0300 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Thunderbird/68.8.0 |
On 6/10/20 9:58 PM, Denis V. Lunev wrote:
> This patch does 2 standard basic things:
> - it creates intermediate buffer for all writes from QEMU migration code
> to QCOW2 image,
> - this buffer is sent to disk asynchronously, allowing several writes to
> run in parallel.
>
> In general, migration code is fantastically inefficent (by observation),
> buffers are not aligned and sent with arbitrary pieces, a lot of time
> less than 100 bytes at a chunk, which results in read-modify-write
> operations with non-cached operations. It should also be noted that all
> operations are performed into unallocated image blocks, which also suffer
> due to partial writes to such new clusters.
>
> Snapshot creation time (2 GB Fedora-31 VM running over NVME storage):
> original fixed
> cached: 1.79s 1.27s
> non-cached: 3.29s 0.81s
>
> The difference over HDD would be more significant :)
>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Kevin Wolf <kwolf@redhat.com>
> CC: Max Reitz <mreitz@redhat.com>
> CC: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> CC: Denis Plotnikov <dplotnikov@virtuozzo.com>
> ---
> block/qcow2.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++-
> block/qcow2.h | 4 ++
> 2 files changed, 113 insertions(+), 2 deletions(-)
>
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 0cd2e6757e..e6232f32e2 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -4797,11 +4797,43 @@ static int qcow2_make_empty(BlockDriverState *bs)
> return ret;
> }
>
> +
> +typedef struct Qcow2VMStateTask {
> + AioTask task;
> +
> + BlockDriverState *bs;
> + int64_t offset;
> + void *buf;
> + size_t bytes;
> +} Qcow2VMStateTask;
> +
> +typedef struct Qcow2SaveVMState {
> + AioTaskPool *pool;
> + Qcow2VMStateTask *t;
> +} Qcow2SaveVMState;
> +
> static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
> {
> BDRVQcow2State *s = bs->opaque;
> + Qcow2SaveVMState *state = s->savevm_state;
> int ret;
>
> + if (state != NULL) {
> + aio_task_pool_start_task(state->pool, &state->t->task);
> +
> + aio_task_pool_wait_all(state->pool);
> + ret = aio_task_pool_status(state->pool);
> +
> + aio_task_pool_free(state->pool);
> + g_free(state);
> +
> + s->savevm_state = NULL;
> +
> + if (ret < 0) {
> + return ret;
> + }
> + }
> +
> qemu_co_mutex_lock(&s->lock);
> ret = qcow2_write_caches(bs);
> qemu_co_mutex_unlock(&s->lock);
> @@ -5098,14 +5130,89 @@ static int qcow2_has_zero_init(BlockDriverState *bs)
> }
> }
>
> +
> +static coroutine_fn int qcow2_co_vmstate_task_entry(AioTask *task)
> +{
> + int err = 0;
> + Qcow2VMStateTask *t = container_of(task, Qcow2VMStateTask, task);
> +
> + if (t->bytes != 0) {
> + QEMUIOVector local_qiov;
> + qemu_iovec_init_buf(&local_qiov, t->buf, t->bytes);
> + err = t->bs->drv->bdrv_co_pwritev_part(t->bs, t->offset, t->bytes,
> + &local_qiov, 0, 0);
> + }
> +
> + qemu_vfree(t->buf);
> + return err;
> +}
> +
> +static Qcow2VMStateTask *qcow2_vmstate_task_create(BlockDriverState *bs,
> + int64_t pos, size_t size)
> +{
> + BDRVQcow2State *s = bs->opaque;
> + Qcow2VMStateTask *t = g_new(Qcow2VMStateTask, 1);
> +
> + *t = (Qcow2VMStateTask) {
> + .task.func = qcow2_co_vmstate_task_entry,
> + .buf = qemu_blockalign(bs, size),
> + .offset = qcow2_vm_state_offset(s) + pos,
> + .bs = bs,
> + };
> +
> + return t;
> +}
> +
> static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
> int64_t pos)
> {
> BDRVQcow2State *s = bs->opaque;
> + Qcow2SaveVMState *state = s->savevm_state;
> + Qcow2VMStateTask *t;
> + size_t buf_size = MAX(s->cluster_size, 1 * MiB);
> + size_t to_copy;
> + size_t off;
>
> BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
> - return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
> - qiov->size, qiov, 0, 0);
> +
> + if (state == NULL) {
> + state = g_new(Qcow2SaveVMState, 1);
> + *state = (Qcow2SaveVMState) {
> + .pool = aio_task_pool_new(QCOW2_MAX_WORKERS),
> + .t = qcow2_vmstate_task_create(bs, pos, buf_size),
> + };
> +
> + s->savevm_state = state;
> + }
> +
> + if (aio_task_pool_status(state->pool) != 0) {
> + return aio_task_pool_status(state->pool);
> + }
> +
> + t = state->t;
> + if (t->offset + t->bytes != qcow2_vm_state_offset(s) + pos) {
> + /* Normally this branch is not reachable from migration */
> + return bs->drv->bdrv_co_pwritev_part(bs,
> + qcow2_vm_state_offset(s) + pos, qiov->size, qiov, 0, 0);
> + }
> +
> + off = 0;
> + while (1) {
> + to_copy = MIN(qiov->size - off, buf_size - t->bytes);
> + qemu_iovec_to_buf(qiov, off, t->buf + t->bytes, to_copy);
> + t->bytes += to_copy;
> + if (t->bytes < buf_size) {
> + return 0;
> + }
> +
> + aio_task_pool_start_task(state->pool, &t->task);
> +
> + pos += to_copy;
> + off += to_copy;
> + state->t = t = qcow2_vmstate_task_create(bs, pos, buf_size);
> + }
> +
> + return 0;
> }
>
> static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 7ce2c23bdb..146cfed739 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -291,6 +291,8 @@ typedef struct Qcow2BitmapHeaderExt {
>
> #define QCOW2_MAX_THREADS 4
>
> +typedef struct Qcow2SaveVMState Qcow2SaveVMState;
> +
> typedef struct BDRVQcow2State {
> int cluster_bits;
> int cluster_size;
> @@ -384,6 +386,8 @@ typedef struct BDRVQcow2State {
> * is to convert the image with the desired compression type set.
> */
> Qcow2CompressionType compression_type;
> +
> + Qcow2SaveVMState *savevm_state;
> } BDRVQcow2State;
>
> typedef struct Qcow2COWRegion {
- please ignore