qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [patch 6/7] QEMU live block copy


From: Blue Swirl
Subject: Re: [Qemu-devel] [patch 6/7] QEMU live block copy
Date: Tue, 24 May 2011 22:15:09 +0300

On Tue, May 24, 2011 at 12:31 AM, Marcelo Tosatti <address@hidden> wrote:
> Support live image copy + switch. That is, copy an image backing
> a guest hard disk to a destination image (destination image must
> be created separately), and switch to this copy.
>
> Command syntax:
>
> block_copy device filename [-i] -- live block copy device to image
>             -i for incremental copy (base image shared between src and 
> destination)
>
> Please refer to qmp-commands diff for more details.
>
> Signed-off-by: Marcelo Tosatti <address@hidden>
>
> Index: qemu-block-copy/block-copy.c
> ===================================================================
> --- /dev/null
> +++ qemu-block-copy/block-copy.c
> @@ -0,0 +1,754 @@
> +/*
> + * QEMU live block copy
> + *
> + * Copyright (C) 2010 Red Hat Inc.
> + *
> + * Authors: Marcelo Tosatti <address@hidden>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu-common.h"
> +#include "block_int.h"
> +#include "blockdev.h"
> +#include "qemu-queue.h"
> +#include "qemu-timer.h"
> +#include "monitor.h"
> +#include "block-copy.h"
> +#include "migration.h"
> +#include "sysemu.h"
> +#include "qjson.h"
> +#include <assert.h>
> +
> +#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
> +#define MAX_IS_ALLOCATED_SEARCH 65536
> +
> +/*
> + * Stages:
> + *
> + * STAGE_BULK: bulk reads/writes in progress
> + * STAGE_BULK_FINISHED: bulk reads finished, bulk writes in progress
> + * STAGE_DIRTY: bulk writes finished, dirty reads/writes in progress
> + * STAGE_MIRROR_WRITES: copy finished, writes mirrored to both images.
> + * STAGE_SWITCH_FINISHED: switched to new image.
> + */
> +
> +enum BdrvCopyStage {
> +    STAGE_BULK,
> +    STAGE_BULK_FINISHED,
> +    STAGE_DIRTY,
> +    STAGE_MIRROR_WRITES,
> +    STAGE_SWITCH_FINISHED,
> +};
> +
> +typedef struct BdrvCopyState {
> +    BlockDriverState *src;
> +    BlockDriverState *dst;
> +    bool shared_base;
> +
> +    int64_t curr_sector;
> +    int64_t completed_sectors;
> +    int64_t nr_sectors;
> +
> +    enum BdrvCopyStage stage;
> +    int inflight_reads;
> +    int error;
> +    int failed;
> +    int cancelled;
> +    QLIST_HEAD(, BdrvCopyBlock) io_list;
> +    unsigned long *aio_bitmap;
> +    QEMUTimer *aio_timer;
> +    QLIST_ENTRY(BdrvCopyState) list;
> +
> +    int64_t blocks;
> +    int64_t total_time;
> +
> +    char src_device_name[32];
> +    char src_filename[1024];

A #define for the buffer size would be nice.

> +} BdrvCopyState;
> +
> +typedef struct BdrvCopyBlock {
> +    BdrvCopyState *state;
> +    uint8_t *buf;
> +    int64_t sector;
> +    int64_t nr_sectors;
> +    struct iovec iov;
> +    QEMUIOVector qiov;
> +    BlockDriverAIOCB *aiocb;
> +    int64_t time;
> +    QLIST_ENTRY(BdrvCopyBlock) list;
> +} BdrvCopyBlock;
> +
> +static QLIST_HEAD(, BdrvCopyState) block_copy_list =
> +    QLIST_HEAD_INITIALIZER(block_copy_list);
> +
> +static void alloc_aio_bitmap(BdrvCopyState *s)
> +{
> +    BlockDriverState *bs = s->src;
> +    int64_t bitmap_size;
> +
> +    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
> +            BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
> +    bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
> +
> +    s->aio_bitmap = qemu_mallocz(bitmap_size);
> +}
> +
> +static bool aio_inflight(BdrvCopyState *s, int64_t sector)
> +{
> +    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
> +
> +    if (s->aio_bitmap &&
> +        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(s->src)) {
> +        return !!(s->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
> +            (1UL << (chunk % (sizeof(unsigned long) * 8))));

Please use the bitmap functions in bitmap.h, also in the next function.

> +    } else {
> +        return 0;
> +    }
> +}
> +
> +static void set_aio_inflight(BdrvCopyState *s, int64_t sector_num,
> +                             int nb_sectors, int set)
> +{
> +    int64_t start, end;
> +    unsigned long val, idx, bit;
> +
> +    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
> +    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
> +
> +    for (; start <= end; start++) {
> +        idx = start / (sizeof(unsigned long) * 8);
> +        bit = start % (sizeof(unsigned long) * 8);
> +        val = s->aio_bitmap[idx];
> +        if (set) {
> +            if (!(val & (1UL << bit))) {
> +                val |= 1UL << bit;
> +            }
> +        } else {
> +            if (val & (1UL << bit)) {
> +                val &= ~(1UL << bit);
> +            }
> +        }
> +        s->aio_bitmap[idx] = val;
> +    }
> +}
> +
> +static void blkcopy_set_stage(BdrvCopyState *s, enum BdrvCopyStage stage)
> +{
> +    s->stage = stage;
> +
> +    switch (stage) {
> +    case STAGE_BULK:
> +        BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_STAGE_BULK);
> +        break;
> +    case STAGE_BULK_FINISHED:
> +        BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_STAGE_BULK_FINISHED);
> +        break;
> +    case STAGE_DIRTY:
> +        BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_STAGE_DIRTY);
> +        break;
> +    case STAGE_MIRROR_WRITES:
> +        BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_STAGE_MIRROR_WRITES);
> +        break;
> +    case STAGE_SWITCH_FINISHED:
> +        BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_STAGE_SWITCH_FINISHED);
> +        break;
> +    default:
> +        break;
> +    }
> +}
> +
> +static void blk_copy_handle_cb_error(BdrvCopyState *s, int ret)
> +{
> +    s->error = ret;
> +    qemu_mod_timer(s->aio_timer, qemu_get_clock_ms(rt_clock));
> +}
> +
> +static inline void add_avg_transfer_time(BdrvCopyState *s, int64_t time)
> +{
> +    s->blocks++;
> +    s->total_time += time;
> +}
> +
> +static void blk_copy_write_cb(void *opaque, int ret)
> +{
> +    BdrvCopyBlock *blk = opaque;
> +    BdrvCopyState *s = blk->state;
> +
> +    if (ret < 0) {
> +        QLIST_REMOVE(blk, list);
> +        qemu_free(blk->buf);
> +        qemu_free(blk);
> +        blk_copy_handle_cb_error(s, ret);
> +        return;
> +    }
> +
> +    QLIST_REMOVE(blk, list);
> +    add_avg_transfer_time(s, qemu_get_clock_ns(rt_clock) - blk->time);
> +
> +    /* schedule switch to STAGE_DIRTY on last bulk write completion */
> +    if (blk->state->stage == STAGE_BULK_FINISHED) {
> +        qemu_mod_timer(s->aio_timer, qemu_get_clock_ms(rt_clock));
> +    }
> +
> +    if (blk->state->stage > STAGE_BULK_FINISHED) {
> +        set_aio_inflight(blk->state, blk->sector, blk->nr_sectors, 0);
> +    }
> +
> +    qemu_free(blk->buf);
> +    qemu_free(blk);
> +}
> +
> +static void blk_copy_issue_write(BdrvCopyState *s, BdrvCopyBlock *read_blk)
> +{
> +    BdrvCopyBlock *blk = qemu_mallocz(sizeof(BdrvCopyBlock));
> +    blk->state = s;
> +    blk->sector = read_blk->sector;
> +    blk->nr_sectors = read_blk->nr_sectors;
> +    blk->time = read_blk->time;
> +    blk->buf = read_blk->buf;
> +    QLIST_INSERT_HEAD(&s->io_list, blk, list);
> +
> +    blk->iov.iov_base = read_blk->buf;
> +    blk->iov.iov_len = read_blk->iov.iov_len;
> +    qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
> +
> +    BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_AIO_WRITE);
> +    blk->aiocb = bdrv_aio_writev(s->dst, blk->sector, &blk->qiov,
> +                                 blk->iov.iov_len / BDRV_SECTOR_SIZE,
> +                                 blk_copy_write_cb, blk);
> +    if (!blk->aiocb) {
> +        s->error = 1;
> +        goto error;
> +    }
> +
> +    return;
> +
> +error:
> +    QLIST_REMOVE(blk, list);
> +    qemu_free(read_blk->buf);
> +    qemu_free(blk);
> +}
> +
> +static void blk_copy_read_cb(void *opaque, int ret)
> +{
> +    BdrvCopyBlock *blk = opaque;
> +    BdrvCopyState *s = blk->state;
> +
> +    s->inflight_reads--;
> +    if (ret < 0) {
> +        QLIST_REMOVE(blk, list);
> +        qemu_free(blk->buf);
> +        qemu_free(blk);
> +        blk_copy_handle_cb_error(s, ret);
> +        return;
> +    }
> +    blk_copy_issue_write(s, blk);
> +    QLIST_REMOVE(blk, list);
> +    qemu_free(blk);
> +    qemu_mod_timer(s->aio_timer, qemu_get_clock_ms(rt_clock));
> +}
> +
> +static void blk_copy_issue_read(BdrvCopyState *s, int64_t sector,
> +                                int nr_sectors)
> +{
> +    BdrvCopyBlock *blk = qemu_mallocz(sizeof(BdrvCopyBlock));
> +    blk->buf = qemu_mallocz(BLOCK_SIZE);
> +    blk->state = s;
> +    blk->sector = sector;
> +    blk->nr_sectors = nr_sectors;
> +    QLIST_INSERT_HEAD(&s->io_list, blk, list);
> +
> +    blk->iov.iov_base = blk->buf;
> +    blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
> +    qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
> +
> +    s->inflight_reads++;
> +    blk->time = qemu_get_clock_ns(rt_clock);
> +    blk->aiocb = bdrv_aio_readv(s->src, sector, &blk->qiov, nr_sectors,
> +                                blk_copy_read_cb, blk);
> +    if (!blk->aiocb) {
> +        s->error = 1;
> +        goto error;
> +    }
> +
> +    return;
> +
> +error:
> +    s->inflight_reads--;
> +    QLIST_REMOVE(blk, list);
> +    qemu_free(blk->buf);
> +    qemu_free(blk);
> +}
> +
> +static bool blkcopy_can_switch(BdrvCopyState *s)
> +{
> +    int64_t remaining_dirty;
> +    int64_t avg_transfer_time;
> +
> +    remaining_dirty = bdrv_get_dirty_count(s->src);
> +    if (remaining_dirty == 0 || s->blocks == 0) {
> +        return true;
> +    }
> +
> +    avg_transfer_time = s->total_time / s->blocks;
> +    if ((remaining_dirty * avg_transfer_time) <= migrate_max_downtime()) {
> +        return true;
> +    }
> +    return false;
> +}
> +
> +static int blk_issue_reads_dirty(BdrvCopyState *s)
> +{
> +    int64_t sector;
> +
> +    for (sector = s->curr_sector; sector < s->nr_sectors;) {
> +        if (bdrv_get_dirty(s->src, sector) && !aio_inflight(s, sector)) {
> +            int nr_sectors = MIN(s->nr_sectors - s->curr_sector,
> +                                 BDRV_SECTORS_PER_DIRTY_CHUNK);
> +
> +            blk_copy_issue_read(s, sector, nr_sectors);
> +            bdrv_reset_dirty(s->src, sector, nr_sectors);
> +            set_aio_inflight(s, sector, nr_sectors, 1);
> +            break;
> +        }
> +
> +        sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
> +        s->curr_sector = sector;
> +    }
> +
> +    if (sector >= s->nr_sectors) {
> +        s->curr_sector = 0;
> +    }
> +    return 0;
> +}
> +
> +static int blk_issue_reads_bulk(BdrvCopyState *s)
> +{
> +    int nr_sectors;
> +    int64_t curr_sector = s->curr_sector;
> +
> +    if (s->shared_base) {
> +        while (curr_sector < s->nr_sectors &&
> +                !bdrv_is_allocated(s->src, curr_sector,
> +                                   MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) {
> +                curr_sector += nr_sectors;
> +        }
> +    }
> +
> +    if (curr_sector >= s->nr_sectors) {
> +        s->curr_sector = 0;
> +        return 1;
> +    }
> +
> +    curr_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
> +    nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
> +
> +    blk_copy_issue_read(s, s->curr_sector, nr_sectors);
> +    s->curr_sector += nr_sectors;
> +    s->completed_sectors = curr_sector;
> +    return 0;
> +}
> +
> +static void blkcopy_finish(BdrvCopyState *s)
> +{
> +    int64_t sector;
> +    uint8_t *buf;
> +
> +    buf = qemu_malloc(BLOCK_SIZE);
> +
> +    /* FIXME: speed up loop, get_next_dirty_block? */
> +    for (sector = 0; sector < s->nr_sectors;
> +         sector += BDRV_SECTORS_PER_DIRTY_CHUNK) {
> +        if (bdrv_get_dirty(s->src, sector)) {
> +            int nr_sectors = MIN(s->nr_sectors - sector,
> +                                 BDRV_SECTORS_PER_DIRTY_CHUNK);
> +
> +            memset(buf, 0, BLOCK_SIZE);
> +            if (bdrv_read(s->src, sector, buf, nr_sectors) < 0) {
> +                goto error;
> +            }
> +            if (bdrv_write(s->dst, sector, buf, nr_sectors) < 0) {
> +                goto error;
> +            }
> +            bdrv_reset_dirty(s->src, sector, nr_sectors);
> +        }
> +
> +        if (bdrv_get_dirty_count(s->src) == 0)
> +            break;

Braces, please use checkpatch.pl.

> +    }
> +    qemu_free(buf);
> +    return;
> +
> +error:
> +    qemu_free(buf);
> +    s->error = 1;
> +}
> +
> +static void blkcopy_cleanup(BdrvCopyState *s)
> +{
> +    assert(s->inflight_reads == 0);
> +    assert(QLIST_EMPTY(&s->io_list));
> +    bdrv_set_dirty_tracking(s->src, 0);
> +    drive_put_ref(drive_get_by_blockdev(s->src));
> +    bdrv_set_in_use(s->src, 0);
> +    if (s->stage >= STAGE_DIRTY)
> +        qemu_free(s->aio_bitmap);
> +    qemu_del_timer(s->aio_timer);
> +}
> +
> +static void blkcopy_free(BdrvCopyState *s)
> +{
> +    QLIST_REMOVE(s, list);
> +    qemu_free(s);
> +}
> +
> +static void handle_error(BdrvCopyState *s)
> +{
> +    if (!QLIST_EMPTY(&s->io_list))
> +        return;
> +    s->failed = 1;
> +    blkcopy_cleanup(s);
> +}
> +
> +static void blkcopy_switch(BdrvCopyState *s)
> +{
> +    char src_filename[1024], mirror_name[2048];

#defines instead of magic constants, please.

> +    int open_flags, ret;
> +
> +    strncpy(src_filename, s->src->filename, sizeof(src_filename));

Are the lengths checked somewhere?

> +    open_flags = s->src->open_flags;
> +
> +    assert(s->stage == STAGE_DIRTY);
> +
> +    vm_stop(VMSTOP_BLOCKCOPY);
> +    /* flush any guest writes, dirty bitmap uptodate after this.
> +     * copy AIO also finished.
> +     */
> +    qemu_aio_flush();
> +    assert(QLIST_EMPTY(&s->io_list));
> +    if (s->error) {
> +        handle_error(s);
> +        goto vm_start;
> +    }
> +    blkcopy_finish(s);
> +    if (s->error) {
> +        handle_error(s);
> +        goto vm_start;
> +    }
> +    assert(bdrv_get_dirty_count(s->src) == 0);
> +    /* turn dirty bitmap off */
> +    bdrv_set_dirty_tracking(s->src, 0);
> +    /* switch to double writes */
> +    bdrv_flush_all();
> +    bdrv_close(s->src);
> +    bdrv_close(s->dst);
> +
> +    snprintf(mirror_name, sizeof(mirror_name)-1,
> +             "blkmirror:%s:%s", s->dst->filename, s->src->filename);
> +
> +    ret = bdrv_open(s->src, mirror_name, s->src->open_flags, NULL);
> +    if (ret < 0) {
> +        error_report("%s: cannot open blkmirror device, err %d",
> +                      mirror_name, ret);
> +        s->failed = 1;
> +        goto err;
> +    }
> +
> +    blkcopy_set_stage(s, STAGE_MIRROR_WRITES);
> +    qemu_del_timer(s->aio_timer);
> +
> +vm_start:
> +    vm_start();
> +    return;
> +
> +err:
> +    if (bdrv_open(s->src, src_filename, open_flags, NULL) < 0) {
> +        error_report("%s: %s: cannot fallback to source image\n", __func__,
> +                     s->src_filename);
> +        abort();
> +    }
> +    blkcopy_cleanup(s);
> +    goto vm_start;
> +}
> +
> +#define BLKCOPY_INFLIGHT 2
> +
> +/*
> + * To simplify the implementation, the IO completion callbacks do not
> + * handle stage control or submit IO for further blocks. A timer is used
> + * for such purpose.
> + */
> +
> +static void aio_timer(void *opaque)
> +{
> +    BdrvCopyState *s = opaque;
> +
> +    assert(s->cancelled == 0);
> +    assert(s->stage < STAGE_MIRROR_WRITES);
> +
> +    if (s->error) {
> +        handle_error(s);
> +        return;
> +    }
> +
> +    while (s->stage == STAGE_BULK) {
> +        if (s->inflight_reads >= BLKCOPY_INFLIGHT) {
> +            break;
> +        }
> +        if (blk_issue_reads_bulk(s)) {
> +            blkcopy_set_stage(s, STAGE_BULK_FINISHED);
> +        }
> +    }
> +
> +    if (s->stage == STAGE_BULK_FINISHED) {
> +        if (QLIST_EMPTY(&s->io_list)) {
> +            blkcopy_set_stage(s, STAGE_DIRTY);
> +            alloc_aio_bitmap(s);
> +        }
> +    }
> +
> +    while (s->stage == STAGE_DIRTY) {
> +        if (s->inflight_reads >= BLKCOPY_INFLIGHT) {
> +            break;
> +        }
> +        blk_issue_reads_dirty(s);
> +        if (blkcopy_can_switch(s)) {
> +            BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_SWITCH_START);
> +            blkcopy_switch(s);
> +            return;
> +        }
> +    }
> +}
> +
> +
> +int do_bdrv_copy_switch(Monitor *mon, const QDict *qdict, QObject **ret_data)
> +{
> +    const char *device = qdict_get_str(qdict, "device");
> +    BdrvCopyState *s = NULL;
> +    int open_flags;
> +
> +    QLIST_FOREACH(s, &block_copy_list, list) {
> +        if (!strcmp(s->src_device_name, device)) {
> +            if (s->stage != STAGE_MIRROR_WRITES) {
> +                qerror_report(QERR_IN_PROGRESS, "block copy");
> +                return -1;
> +            }
> +            break;
> +        }
> +    }
> +
> +    if (!s) {
> +        qerror_report(QERR_DEVICE_NOT_FOUND, device);
> +        return -1;
> +    }
> +
> +    open_flags = s->src->open_flags;
> +
> +    /* switch from mirrored writes to destination only */
> +    bdrv_flush_all();
> +    bdrv_close(s->src);
> +    if (bdrv_open(s->src, s->dst->filename, s->src->open_flags, NULL) < 0) {
> +        s->failed = 1;
> +        goto err;
> +    }
> +
> +    blkcopy_set_stage(s, STAGE_SWITCH_FINISHED);
> +    blkcopy_cleanup(s);
> +    return 0;
> +
> +err:
> +    if (bdrv_open(s->src, s->src_filename, open_flags, NULL) < 0) {
> +        error_report("%s: %s: cannot fallback to source image\n", __func__,
> +                     s->src_filename);

Below qerror_report() is used.

> +        abort();
> +    }
> +    return -1;
> +}
> +
> +static int bdrv_copy(Monitor *mon, const char * device, BlockDriverState 
> *src,
> +                     BlockDriverState *dst, bool shared_base)
> +{
> +    int64_t sectors;
> +    BdrvCopyState *blkcopy, *safe;
> +
> +    QLIST_FOREACH_SAFE(blkcopy, &block_copy_list, list, safe) {
> +        if (!strcmp(blkcopy->src_device_name, src->device_name)) {
> +            if (blkcopy->stage == STAGE_SWITCH_FINISHED || blkcopy->failed) {
> +                blkcopy_free(blkcopy);
> +            } else {
> +                qerror_report(QERR_IN_PROGRESS, "block copy");
> +                return -1;
> +            }
> +        }
> +    }
> +
> +    sectors = bdrv_getlength(src) >> BDRV_SECTOR_BITS;
> +    if (sectors != bdrv_getlength(dst) >> BDRV_SECTOR_BITS) {
> +        qerror_report(QERR_BLOCKCOPY_IMAGE_SIZE_DIFFERS);
> +        return -1;
> +    }
> +
> +    blkcopy = qemu_mallocz(sizeof(BdrvCopyState));
> +    blkcopy->src = src;
> +    blkcopy->dst = dst;
> +    blkcopy->curr_sector = 0;
> +    blkcopy->nr_sectors = sectors;
> +    blkcopy_set_stage(blkcopy, STAGE_BULK);
> +    blkcopy->aio_timer = qemu_new_timer_ms(rt_clock, aio_timer, blkcopy);
> +    blkcopy->shared_base = shared_base;
> +    strncpy(blkcopy->src_device_name, blkcopy->src->device_name,
> +            sizeof(blkcopy->src_device_name) - 1);
> +    strncpy(blkcopy->src_filename, blkcopy->src->filename,
> +            sizeof(blkcopy->src_filename) - 1);
> +
> +    bdrv_set_dirty_tracking(src, 1);
> +    qemu_mod_timer(blkcopy->aio_timer, qemu_get_clock_ms(rt_clock));
> +    drive_get_ref(drive_get_by_blockdev(src));
> +    bdrv_set_in_use(src, 1);
> +
> +    QLIST_INSERT_HEAD(&block_copy_list, blkcopy, list);
> +    return 0;
> +}
> +
> +int do_bdrv_copy(Monitor *mon, const QDict *qdict, QObject **ret_data)
> +{
> +    const char *device = qdict_get_str(qdict, "device");
> +    const char *filename = qdict_get_str(qdict, "filename");
> +    bool shared_base = qdict_get_try_bool(qdict, "incremental", 0);
> +    BlockDriverState *new_bs, *bs;
> +    int ret;
> +
> +    if (migration_active()) {
> +        qerror_report(QERR_IN_PROGRESS, "migration");
> +        return -1;
> +    }
> +
> +    bs = bdrv_find(device);
> +    if (!bs) {
> +        qerror_report(QERR_DEVICE_NOT_FOUND, device);
> +        return -1;
> +    }
> +
> +    new_bs = bdrv_new("");
> +    if (bdrv_open(new_bs, filename, bs->open_flags, NULL) < 0) {
> +        bdrv_delete(new_bs);
> +        qerror_report(QERR_OPEN_FILE_FAILED, filename);
> +        return -1;
> +    }
> +
> +    ret = bdrv_copy(mon, device, bs, new_bs, shared_base);
> +    return ret;
> +}
> +
> +int do_bdrv_copy_cancel(Monitor *mon, const QDict *qdict, QObject **ret_data)
> +{
> +    BdrvCopyState *blkcopy, *s = NULL;
> +    const char *device = qdict_get_str(qdict, "device");
> +
> +    QLIST_FOREACH(blkcopy, &block_copy_list, list) {
> +        if (!strcmp(blkcopy->src_device_name, device)) {
> +            s = blkcopy;
> +            break;
> +        }
> +    }
> +
> +    if (!s || s->stage == STAGE_SWITCH_FINISHED || s->failed) {
> +        qerror_report(QERR_DEVICE_NOT_FOUND, device);
> +        return -1;
> +    }
> +
> +    s->cancelled = 1;
> +    do {
> +        qemu_aio_flush();
> +    } while (!QLIST_EMPTY(&s->io_list));
> +    blkcopy_cleanup(s);
> +    blkcopy_free(s);
> +
> +    return 0;
> +}
> +
> +static void blockcopy_print_dict(QObject *obj, void *opaque)
> +{
> +    QDict *c_dict;
> +    Monitor *mon = opaque;
> +
> +    c_dict = qobject_to_qdict(obj);
> +
> +    monitor_printf(mon, "%s: status=%s ",
> +                        qdict_get_str(c_dict, "device"),
> +                        qdict_get_str(c_dict, "status"));
> +
> +    if (qdict_haskey(c_dict, "info")) {
> +        QDict *qdict = qobject_to_qdict(qdict_get(c_dict, "info"));
> +
> +        monitor_printf(mon, "percentage=%ld %%",
> +                       qdict_get_int(qdict, "percentage"));
> +    }
> +
> +    monitor_printf(mon, "\n");
> +}
> +
> +void do_info_blockcopy_print(Monitor *mon, const QObject *data)
> +{
> +    qlist_iter(qobject_to_qlist(data), blockcopy_print_dict, mon);
> +}
> +
> +void do_info_blockcopy(Monitor *mon, QObject **ret_data)
> +{
> +    QList *c_list;
> +    BdrvCopyState *s;
> +
> +    c_list = qlist_new();
> +
> +    QLIST_FOREACH(s, &block_copy_list, list) {
> +        QObject *c_obj;
> +        static const char *status[] = { "failed", "active", "mirrored", 
> "completed" };
> +        int i;
> +
> +        if (s->failed) {
> +            i = 0;
> +        } else if (s->stage < STAGE_MIRROR_WRITES) {
> +            i = 1;
> +        } else if (s->stage < STAGE_SWITCH_FINISHED) {
> +            i = 2;
> +        } else {
> +            i = 3;
> +        }
> +
> +        c_obj = qobject_from_jsonf("{ 'device': %s, 'status': %s }",
> +                                    s->src_device_name, status[i]);
> +
> +        if (i == 1) {
> +            QDict *dict = qobject_to_qdict(c_obj);
> +            QObject *obj;
> +
> +            /* FIXME: add dirty stage progress? */
> +            obj = qobject_from_jsonf("{ 'percentage': %" PRId64 "}",
> +                                     s->completed_sectors * 100 / 
> s->nr_sectors);
> +            qdict_put_obj(dict, "info", obj);
> +        }
> +        qlist_append_obj(c_list, c_obj);
> +    }
> +
> +    *ret_data = QOBJECT(c_list);
> +}
> +
> +bool block_copy_active(void)
> +{
> +    BdrvCopyState *s;
> +
> +    QLIST_FOREACH(s, &block_copy_list, list) {
> +        if (s->failed) {
> +            continue;
> +        }
> +        if (s->stage < STAGE_SWITCH_FINISHED) {
> +            return true;
> +        }
> +    }
> +
> +    return false;
> +}
> +
> Index: qemu-block-copy/block-copy.h
> ===================================================================
> --- /dev/null
> +++ qemu-block-copy/block-copy.h
> @@ -0,0 +1,26 @@
> +/*
> + * QEMU live block copy
> + *
> + * Copyright (C) 2010 Red Hat Inc.
> + *
> + * Authors: Marcelo Tosatti <address@hidden>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef BLOCK_COPY_H
> +#define BLOCK_COPY_H
> +
> +int do_bdrv_copy(Monitor *mon, const QDict *qdict, QObject **ret_data);
> +int do_bdrv_copy_cancel(Monitor *mon, const QDict *qdict, QObject 
> **ret_data);
> +int do_bdrv_copy_switch(Monitor *mon, const QDict *qdict, QObject 
> **ret_data);
> +
> +void do_info_blockcopy_print(Monitor *mon, const QObject *data);
> +void do_info_blockcopy(Monitor *mon, QObject **ret_data);
> +
> +bool block_copy_active(void);
> +
> +#endif /* BLOCK_COPY_H */
> +
> Index: qemu-block-copy/hmp-commands.hx
> ===================================================================
> --- qemu-block-copy.orig/hmp-commands.hx
> +++ qemu-block-copy/hmp-commands.hx
> @@ -806,6 +806,63 @@ Set maximum speed to @var{value} (in byt
>  ETEXI
>
>     {
> +        .name       = "block_copy",
> +        .args_type  = "device:s,filename:s,incremental:-i",
> +        .params     = "device filename [-i]",
> +        .help       = "live block copy device to image"
> +                      "\n\t\t\t -i for incremental copy "
> +                      "(base image shared between original and destination)",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = do_bdrv_copy,
> +    },
> +
> +STEXI
> address@hidden block_copy @var{device} @var{filename} [-i]
> address@hidden block_copy
> +Live copy block device @var{device} to image @var{filename}.
> +        -i for incremental copy (base image is shared)
> +
> +Destination image @var{filename} must be created with qemu-img prior
> +to execution of this command, with image size equal to the original
> +image size.
> +
> +Incremental copy allows the destination image @var{filename} to share
> +a common base image with the original image. This option skips copying
> +blocks which are not allocated in the original image.
> +ETEXI
> +
> +    {
> +        .name       = "block_copy_cancel",
> +        .args_type  = "device:s",
> +        .params     = "device",
> +        .help       = "cancel live block copy",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = do_bdrv_copy_cancel,
> +    },
> +
> +STEXI
> address@hidden block_copy_cancel @var{device}
> address@hidden block_copy_cancel
> +Cancel live block copy on @var{device}.
> +ETEXI
> +
> +    {
> +        .name       = "block_copy_switch",
> +        .args_type  = "device:s",
> +        .params     = "device",
> +        .help       = "finish live block copy",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = do_bdrv_copy_switch,
> +    },
> +
> +STEXI
> address@hidden block_copy_switch @var{device}
> address@hidden block_copy_switch
> +Finish live block copy on @var{device} by switching
> +to destination image.
> +ETEXI
> +
> +    {
>         .name       = "migrate_set_downtime",
>         .args_type  = "value:T",
>         .params     = "value",
> @@ -1352,6 +1409,8 @@ show device tree
>  show qdev device model list
> address@hidden info roms
>  show roms
> address@hidden info block-copy
> +show block copy status
> address@hidden table
>  ETEXI
>
> Index: qemu-block-copy/monitor.c
> ===================================================================
> --- qemu-block-copy.orig/monitor.c
> +++ qemu-block-copy/monitor.c
> @@ -45,6 +45,7 @@
>  #include "balloon.h"
>  #include "qemu-timer.h"
>  #include "migration.h"
> +#include "block-copy.h"
>  #include "kvm.h"
>  #include "acl.h"
>  #include "qint.h"
> @@ -3101,6 +3102,14 @@ static const mon_cmd_t info_cmds[] = {
>     },
>  #endif
>     {
> +        .name       = "block-copy",
> +        .args_type  = "",
> +        .params     = "",
> +        .help       = "show block copy status",
> +        .user_print = do_info_blockcopy_print,
> +        .mhandler.info_new = do_info_blockcopy,
> +    },
> +    {
>         .name       = NULL,
>     },
>  };
> @@ -3242,6 +3251,14 @@ static const mon_cmd_t qmp_query_cmds[]
>         .mhandler.info_async = do_info_balloon,
>         .flags      = MONITOR_CMD_ASYNC,
>     },
> +    {
> +        .name       = "block-copy",
> +        .args_type  = "",
> +        .params     = "",
> +        .help       = "show block copy status",
> +        .user_print = do_info_blockcopy_print,
> +        .mhandler.info_new = do_info_blockcopy,
> +    },
>     { /* NULL */ },
>  };
>
> Index: qemu-block-copy/qmp-commands.hx
> ===================================================================
> --- qemu-block-copy.orig/qmp-commands.hx
> +++ qemu-block-copy/qmp-commands.hx
> @@ -581,6 +581,98 @@ Example:
>  EQMP
>
>     {
> +        .name       = "block_copy",
> +        .args_type  = "device:s,filename:s,inc:-i",
> +        .params     = "device filename [-i]",
> +        .help       = "live block copy device to image"
> +                      "\n\t\t\t -i for incremental copy "
> +                      "(base image shared between src and destination)",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = do_bdrv_copy,
> +    },
> +
> +SQMP
> +block-copy
> +-------
> +
> +Live block copy.
> +
> +Arguments:
> +
> +- "device": device name (json-string)
> +- "filename": target image filename (json-string)
> +- "incremental": incremental disk copy (json-bool, optional)
> +
> +Example:
> +
> +-> { "execute": "block_copy",
> +                            "arguments": { "device": "ide0-hd1",
> +                               "filename": "/mnt/new-disk.img",
> +                             } }
> +
> +<- { "return": {} }
> +
> +Notes:
> +
> +(1) The 'query-block-copy' command should be used to check block copy 
> progress
> +    and final result (this information is provided by the 'status' member)
> +(2) Boolean argument "incremental" defaults to false
> +
> +EQMP
> +
> +    {
> +        .name       = "block_copy_cancel",
> +        .args_type  = "device:s",
> +        .params     = "device",
> +        .help       = "cancel live block copy",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = do_bdrv_copy_cancel,
> +    },
> +
> +SQMP
> +block_copy_cancel
> +--------------
> +
> +Cancel live block copy.
> +
> +Arguments:
> +
> +- device: device name (json-string)
> +
> +Example:
> +
> +-> { "execute": "block_copy_cancel", "arguments": { "device": "ide0-hd1" } }
> +<- { "return": {} }
> +
> +EQMP
> +
> +    {
> +        .name       = "block_copy_switch",
> +        .args_type  = "device:s",
> +        .params     = "device",
> +        .help       = "finish live block copy",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = do_bdrv_copy_switch,
> +    },
> +
> +SQMP
> +block_copy_switch
> +--------------
> +
> +Finish live block copy, switching device to destination image.
> +
> +Arguments:
> +
> +- device: device name (json-string)
> +
> +Example:
> +
> +-> { "execute": "block_copy_switch", "arguments": { "device": "ide0-hd1" } }
> +<- { "return": {} }
> +
> +EQMP
> +
> +    {
>         .name       = "netdev_add",
>         .args_type  = "netdev:O",
>         .params     = "[user|tap|socket],id=str[,prop=value][,...]",
> @@ -1744,6 +1836,44 @@ Examples:
>  EQMP
>
>  SQMP
> +query-block-copy
> +-------------
> +
> +Live block copy status.
> +
> +Each block copy instance information is stored in a json-object and the 
> returned
> +value is a json-array of all instances.
> +
> +Each json-object contains the following:
> +
> +- "device": device name (json-string)
> +- "status": block copy status (json-string)
> +    - Possible values: "active", "failed", "mirrored", "completed"
> +- "info": A json-object with the statistics information, if status is 
> "active":
> +    - "percentage": percentage completed (json-int)
> +
> +Example:
> +
> +Block copy for "ide1-hd0" active and block copy for "ide1-hd1" failed:
> +
> +-> { "execute": "query-block-copy" }
> +<- {
> +      "return":[
> +        {"device":"ide1-hd0",
> +            "status":"active",
> +            "info":{
> +               "percentage":23,
> +            }
> +        },
> +        {"device":"ide1-hd1",
> +         "status":"failed"
> +        }
> +      ]
> +   }
> +
> +EQMP
> +
> +SQMP
>  query-balloon
>  -------------
>
> Index: qemu-block-copy/Makefile.objs
> ===================================================================
> --- qemu-block-copy.orig/Makefile.objs
> +++ qemu-block-copy/Makefile.objs
> @@ -98,7 +98,7 @@ common-obj-y += buffered_file.o migratio
>  common-obj-y += qemu-char.o savevm.o #aio.o
>  common-obj-y += msmouse.o ps2.o
>  common-obj-y += qdev.o qdev-properties.o
> -common-obj-y += block-migration.o iohandler.o
> +common-obj-y += block-migration.o iohandler.o block-copy.o
>  common-obj-y += pflib.o
>  common-obj-y += bitmap.o bitops.o
>
>
>
>
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]