[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v2] mirror: follow AioContext change gracefully
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH v2] mirror: follow AioContext change gracefully |
Date: |
Mon, 13 Jun 2016 11:55:30 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.1.0 |
On 12/06/2016 08:51, Fam Zheng wrote:
> From: Stefan Hajnoczi <address@hidden>
>
> When dataplane is enabled or disabled the drive switches to a new
> AioContext. The mirror block job must also move to the new AioContext
> so that drive accesses are always made within its AioContext.
>
> This patch partially achieves that by draining target and source
> requests to reach a quiescent point. The job is resumed in the new
> AioContext after moving s->target into the new AioContext.
>
> The quiesce_requested flag is added to deal with yield points in
> block_job_sleep_ns(), bdrv_is_allocated_above(), and
> bdrv_get_block_status_above(). Previously they continue executing in
> the old AioContext. The nested aio_poll in mirror_detach_aio_context
> will drive the mirror coroutine upto fixed yield points, where
> mirror_check_for_quiesce is called.
>
> Cc: Fam Zheng <address@hidden>
> Cc: Paolo Bonzini <address@hidden>
> Cc: Jeff Cody <address@hidden>
> Signed-off-by: Stefan Hajnoczi <address@hidden>
> [Drain source as well, and add s->quiesce_requested flag. -- Fam]
> Signed-off-by: Fam Zheng <address@hidden>
As discussed on IRC, perhaps we can reuse the pausing/job->busy
mechanism to detect quiescence.
There's no synchronous pause function, but it can be realized with
block_job_pause and aio_poll.
Also while discussing this patch on IRC Fam noticed that resume
currently clears the job's iostatus. I think that functionality can be
moved to the QMP command.
Thanks,
Paolo
> ---
>
> v2: Picked up Stefan's RFC patch and move on towards a more complete
> fix. Please review!
>
> Jason: it would be nice if you could test this version again. It differs
> from the previous version.
> ---
> block/mirror.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 44 insertions(+), 1 deletion(-)
>
> diff --git a/block/mirror.c b/block/mirror.c
> index 80fd3c7..142199a 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -63,6 +63,8 @@ typedef struct MirrorBlockJob {
> int ret;
> bool unmap;
> bool waiting_for_io;
> + bool quiesce_requested; /* temporarily detached to move AioContext,
> + don't do more I/O */
> int target_cluster_sectors;
> int max_iov;
> } MirrorBlockJob;
> @@ -119,7 +121,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
> qemu_iovec_destroy(&op->qiov);
> g_free(op);
>
> - if (s->waiting_for_io) {
> + if (s->waiting_for_io && !s->quiesce_requested) {
> qemu_coroutine_enter(s->common.co, NULL);
> }
> }
> @@ -307,6 +309,14 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
> }
> }
>
> +static void coroutine_fn mirror_check_for_quiesce(MirrorBlockJob *s)
> +{
> + if (s->quiesce_requested) {
> + s->quiesce_requested = false;
> + qemu_coroutine_yield();
> + }
> +}
> +
> static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
> {
> BlockDriverState *source = blk_bs(s->common.blk);
> @@ -331,6 +341,7 @@ static uint64_t coroutine_fn
> mirror_iteration(MirrorBlockJob *s)
> mirror_wait_for_io(s);
> }
>
> + mirror_check_for_quiesce(s);
> /* Find the number of consective dirty chunks following the first dirty
> * one, and wait for in flight requests in them. */
> while (nb_chunks * sectors_per_chunk < (s->buf_size >>
> BDRV_SECTOR_BITS)) {
> @@ -442,6 +453,31 @@ static void mirror_drain(MirrorBlockJob *s)
> }
> }
>
> +static void mirror_attached_aio_context(AioContext *new_context, void
> *opaque)
> +{
> + MirrorBlockJob *s = opaque;
> +
> + blk_set_aio_context(s->target, new_context);
> +
> + /* Resume execution */
> + assert(!s->quiesce_requested);
> + if (s->waiting_for_io) {
> + qemu_coroutine_enter(s->common.co, NULL);
> + }
> +}
> +
> +static void mirror_detach_aio_context(void *opaque)
> +{
> + MirrorBlockJob *s = opaque;
> +
> + /* Complete pending write requests */
> + assert(!s->quiesce_requested);
> + s->quiesce_requested = true;
> + while (s->quiesce_requested || s->in_flight) {
> + aio_poll(blk_get_aio_context(s->common.blk), true);
> + }
> +}
> +
> typedef struct {
> int ret;
> } MirrorExitData;
> @@ -491,6 +527,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
> if (replace_aio_context) {
> aio_context_release(replace_aio_context);
> }
> + blk_remove_aio_context_notifier(s->common.blk,
> mirror_attached_aio_context,
> + mirror_detach_aio_context, s);
> g_free(s->replaces);
> bdrv_op_unblock_all(target_bs, s->common.blocker);
> blk_unref(s->target);
> @@ -583,6 +621,7 @@ static void coroutine_fn mirror_run(void *opaque)
> block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
> }
>
> + mirror_check_for_quiesce(s);
> if (block_job_is_cancelled(&s->common)) {
> goto immediate_exit;
> }
> @@ -612,6 +651,7 @@ static void coroutine_fn mirror_run(void *opaque)
> goto immediate_exit;
> }
>
> + mirror_check_for_quiesce(s);
> cnt = bdrv_get_dirty_count(s->dirty_bitmap);
> /* s->common.offset contains the number of bytes already processed so
> * far, cnt is the number of dirty sectors remaining and
> @@ -851,6 +891,9 @@ static void mirror_start_job(BlockDriverState *bs,
> BlockDriverState *target,
>
> bdrv_op_block_all(target, s->common.blocker);
>
> + blk_add_aio_context_notifier(s->common.blk, mirror_attached_aio_context,
> + mirror_detach_aio_context, s);
> +
> s->common.co = qemu_coroutine_create(mirror_run);
> trace_mirror_start(bs, s, s->common.co, opaque);
> qemu_coroutine_enter(s->common.co, s);
>