qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [patch 6/7] QEMU live block copy


From: Stefan Hajnoczi
Subject: Re: [Qemu-devel] [patch 6/7] QEMU live block copy
Date: Tue, 7 Jun 2011 13:15:02 +0100

On Mon, Jun 6, 2011 at 5:55 PM, Marcelo Tosatti <address@hidden> wrote:

I haven't reviewed this whole patch yet, but comments below.

This patch, like image streaming, may hit deadlocks due to synchronous
I/O emulation.  I discovered this problem when working on image
streaming and it should be solved by getting rid of the asynchronous
context concept.  The problem is that async I/O emulation will push a
new context, preventing existing requests to complete until the
current context is popped again.  If the image format has dependencies
between requests (e.g. QED allocating writes are serialized), then
this leads to deadlock because the new request cannot complete until
the old one does, but the old one needs to wait for the context to be
popped.  I think you are not affected by the QED allocating write case
since the source image is only read, not written, by live block copy.
But you might encounter this problem in other places.

> +#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)

Should this be called DIRTY_CHUNK_SIZE?

> +static void alloc_aio_bitmap(BdrvCopyState *s)
> +{
> +    BlockDriverState *bs = s->src;
> +    int64_t bitmap_size;
> +
> +    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
> +            BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
> +    bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
> +
> +    s->aio_bitmap = qemu_mallocz(bitmap_size);
> +}

There is bitmap.h, which has a bunch of common bitmap code that could be reused.

> +static void blk_copy_issue_read(BdrvCopyState *s, int64_t sector,
> +                                int nr_sectors)
> +{
> +    BdrvCopyBlock *blk = qemu_mallocz(sizeof(BdrvCopyBlock));
> +    blk->buf = qemu_mallocz(BLOCK_SIZE);

qemu_blockalign() should be used to meet block device alignment requirements.

> +    blk->state = s;
> +    blk->sector = sector;
> +    blk->nr_sectors = nr_sectors;
> +    QLIST_INSERT_HEAD(&s->io_list, blk, list);
> +
> +    blk->iov.iov_base = blk->buf;
> +    blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
> +    qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
> +
> +    s->inflight_reads++;
> +    blk->time = qemu_get_clock_ns(rt_clock);
> +    blk->aiocb = bdrv_aio_readv(s->src, sector, &blk->qiov, nr_sectors,
> +                                blk_copy_read_cb, blk);
> +    if (!blk->aiocb) {
> +        s->error = 1;
> +        goto error;
> +    }
> +
> +    return;
> +
> +error:
> +    s->inflight_reads--;
> +    QLIST_REMOVE(blk, list);
> +    qemu_free(blk->buf);

qemu_vfree() after you switch to qemu_blockalign() above.

> +    qemu_free(blk);
> +}

> +static void blkcopy_cleanup(BdrvCopyState *s)
> +{

Need to free dst, at least on error?

> +    assert(s->inflight_reads == 0);
> +    assert(QLIST_EMPTY(&s->io_list));
> +    bdrv_set_dirty_tracking(s->src, 0);
> +    drive_put_ref(drive_get_by_blockdev(s->src));
> +    bdrv_set_in_use(s->src, 0);
> +    if (s->stage >= STAGE_DIRTY) {
> +        qemu_free(s->aio_bitmap);
> +    }
> +    qemu_del_timer(s->aio_timer);

Missing qemu_free_timer().

> +}

> +static int bdrv_copy_setup(const char *device, const char *filename,
> +                           bool shared_base)
> +{
> +    int64_t sectors;
> +    BdrvCopyState *blkcopy, *safe;
> +    BlockDriverState *src, *dst;
> +
> +    src = bdrv_find(device);
> +    if (src) {
> +        qerror_report(QERR_DEVICE_NOT_FOUND, device);
> +        return -1;
> +    }
> +
> +    dst = bdrv_new("");
> +    if (bdrv_open(dst, filename, src->open_flags, NULL) < 0) {
> +        bdrv_delete(dst);
> +        qerror_report(QERR_OPEN_FILE_FAILED, filename);
> +        return -1;
> +    }
> +
> +    QLIST_FOREACH_SAFE(blkcopy, &block_copy_list, list, safe) {
> +        if (!strcmp(blkcopy->device_name, src->device_name)) {
> +            if (blkcopy->stage == STAGE_SWITCH_FINISHED || blkcopy->failed) {
> +                blkcopy_free(blkcopy);
> +            } else {
> +                qerror_report(QERR_IN_PROGRESS, "block copy");

dst is leaked.

> +                return -1;
> +            }
> +        }
> +    }
> +
> +    sectors = bdrv_getlength(src) >> BDRV_SECTOR_BITS;
> +    if (sectors != bdrv_getlength(dst) >> BDRV_SECTOR_BITS) {
> +        qerror_report(QERR_BLOCKCOPY_IMAGE_SIZE_DIFFERS);
> +        return -1;

dst is leaked.

Stefan



reply via email to

[Prev in Thread] Current Thread [Next in Thread]