[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH RFC 1/1] Stream block job involves copy-on-read
From: |
Andrey Shinkevich |
Subject: |
Re: [Qemu-devel] [PATCH RFC 1/1] Stream block job involves copy-on-read filter |
Date: |
Thu, 31 Jan 2019 14:02:14 +0000 |
PINGing
Please help!
On 23/01/2019 14:54, Andrey Shinkevich wrote:
> The copy-on-read filter driver is applied to block-stream operations.
> The 'test_stream_parallel' in the file tests/qemu-iotests/030 runs
> jobs that use nodes for streaming in parallel through the backing chain.
> We've got filters being inserted to and removed from the backing chain
> while jobs are running. As a result, a filter node may be passed as the
> 'base' parameter to the stream_start() function when the base node name
> is not specified (the base node is identified by its file name which is
> the same to the related filter node).
> Another issue is that a function keeps the pointer to the filter BDS
> object that can be replaced and deleted after the co-routine switch.
> For example, the function backing_bs() returns the pointer to the
> backing BDS and the BDS reference counter is not incremented.
> A solution (or workaround) made with the given patch for block-stream
> job helps to pass all the iotests in the file tests/qemu-iotests/030.
> Any piece of advice how to amend the solution will be appreciated.
> I am looking forward to hearing from you.
>
> Signed-off-by: Andrey Shinkevich <address@hidden>
> ---
> block/stream.c | 154
> ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 143 insertions(+), 11 deletions(-)
>
> diff --git a/block/stream.c b/block/stream.c
> index 7a49ac0..18e51b3 100644
> --- a/block/stream.c
> +++ b/block/stream.c
> @@ -16,6 +16,7 @@
> #include "block/block_int.h"
> #include "block/blockjob_int.h"
> #include "qapi/error.h"
> +#include "qapi/qmp/qdict.h"
> #include "qapi/qmp/qerror.h"
> #include "qemu/ratelimit.h"
> #include "sysemu/block-backend.h"
> @@ -35,8 +36,26 @@ typedef struct StreamBlockJob {
> BlockdevOnError on_error;
> char *backing_file_str;
> bool bs_read_only;
> + BlockDriverState *cor_filter_bs;
> + BlockDriverState *target_bs;
> } StreamBlockJob;
>
> +static BlockDriverState *child_file_bs(BlockDriverState *bs)
> +{
> + return bs->file ? bs->file->bs : NULL;
> +}
> +
> +static BlockDriverState *skip_filter(BlockDriverState *bs)
> +{
> + BlockDriverState *ret_bs = bs;
> +
> + while (ret_bs && ret_bs->drv && ret_bs->drv->is_filter) {
> + ret_bs = child_file_bs(ret_bs);
> + }
> +
> + return ret_bs;
> +}
> +
> static int coroutine_fn stream_populate(BlockBackend *blk,
> int64_t offset, uint64_t bytes,
> void *buf)
> @@ -51,14 +70,12 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
> qemu_iovec_init_external(&qiov, &iov, 1);
>
> /* Copy-on-read the unallocated clusters */
> - return blk_co_preadv(blk, offset, qiov.size, &qiov,
> BDRV_REQ_COPY_ON_READ);
> + return blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
> }
>
> -static int stream_prepare(Job *job)
> +static int stream_change_backing_file(StreamBlockJob *s)
> {
> - StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
> - BlockJob *bjob = &s->common;
> - BlockDriverState *bs = blk_bs(bjob->blk);
> + BlockDriverState *bs = s->target_bs;
> BlockDriverState *base = s->base;
> Error *local_err = NULL;
> int ret = 0;
> @@ -82,11 +99,53 @@ static int stream_prepare(Job *job)
> return ret;
> }
>
> +static int stream_exit(Job *job, bool abort)
> +{
> + StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
> + BlockJob *bjob = &s->common;
> + BlockDriverState *target_bs = s->target_bs;
> + int ret = 0;
> +
> + /* Retain the BDS until we complete the graph change. */
> + bdrv_ref(target_bs);
> + /* Hold a guest back from writing while permissions are being reset. */
> + bdrv_drained_begin(target_bs);
> + /* Drop permissions before the graph change. */
> + bdrv_child_try_set_perm(s->cor_filter_bs->file, 0, BLK_PERM_ALL,
> + &error_abort);
> + if (!abort) {
> + ret = stream_change_backing_file(s);
> + }
> +
> + bdrv_replace_node(s->cor_filter_bs, target_bs, &error_abort);
> + /* Switch the BB back to the filter so that job terminated properly. */
> + blk_remove_bs(bjob->blk);
> + blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
> + blk_insert_bs(bjob->blk, s->cor_filter_bs, &error_abort);
> +
> + bdrv_drained_end(target_bs);
> + bdrv_unref(target_bs);
> + /* Submit control over filter to the job instance. */
> + bdrv_unref(s->cor_filter_bs);
> +
> + return ret;
> +}
> +
> +static int stream_prepare(Job *job)
> +{
> + return stream_exit(job, false);
> +}
> +
> +static void stream_abort(Job *job)
> +{
> + stream_exit(job, job->ret < 0);
> +}
> +
> static void stream_clean(Job *job)
> {
> StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
> BlockJob *bjob = &s->common;
> - BlockDriverState *bs = blk_bs(bjob->blk);
> + BlockDriverState *bs = s->target_bs;
>
> /* Reopen the image back in read-only mode if necessary */
> if (s->bs_read_only) {
> @@ -102,7 +161,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
> {
> StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
> BlockBackend *blk = s->common.blk;
> - BlockDriverState *bs = blk_bs(blk);
> + BlockDriverState *bs = s->target_bs;
> BlockDriverState *base = s->base;
> int64_t len;
> int64_t offset = 0;
> @@ -213,12 +272,64 @@ static const BlockJobDriver stream_job_driver = {
> .free = block_job_free,
> .run = stream_run,
> .prepare = stream_prepare,
> + .abort = stream_abort,
> .clean = stream_clean,
> .user_resume = block_job_user_resume,
> .drain = block_job_drain,
> },
> };
>
> +static BlockDriverState *create_filter_node(BlockDriverState *bs,
> + Error **errp)
> +{
> + QDict *opts = qdict_new();
> +
> + qdict_put_str(opts, "driver", "copy-on-read");
> + qdict_put_str(opts, "file", bdrv_get_node_name(bs));
> +
> + return bdrv_open(NULL, NULL, opts, BDRV_O_RDWR, errp);
> +}
> +
> +static void remove_filter(BlockDriverState *cor_filter_bs)
> +{
> + BlockDriverState *bs = child_file_bs(cor_filter_bs);
> +
> + /* Hold a guest back from writing until we remove the filter */
> + bdrv_drained_begin(bs);
> + bdrv_child_try_set_perm(cor_filter_bs->file, 0, BLK_PERM_ALL,
> + &error_abort);
> + bdrv_replace_node(cor_filter_bs, bs, &error_abort);
> + bdrv_drained_end(bs);
> +
> + bdrv_unref(cor_filter_bs);
> +}
> +
> +static BlockDriverState *insert_filter(BlockDriverState *bs, Error **errp)
> +{
> + BlockDriverState *cor_filter_bs;
> + Error *local_err = NULL;
> +
> + cor_filter_bs = create_filter_node(bs, errp);
> + if (cor_filter_bs == NULL) {
> + error_prepend(errp, "Could not create filter node: ");
> + return NULL;
> + }
> +
> + bdrv_set_aio_context(cor_filter_bs, bdrv_get_aio_context(bs));
> +
> + bdrv_drained_begin(bs);
> + bdrv_replace_node(bs, cor_filter_bs, &local_err);
> + bdrv_drained_end(bs);
> +
> + if (local_err) {
> + bdrv_unref(cor_filter_bs);
> + error_propagate(errp, local_err);
> + return NULL;
> + }
> +
> + return cor_filter_bs;
> +}
> +
> void stream_start(const char *job_id, BlockDriverState *bs,
> BlockDriverState *base, const char *backing_file_str,
> int creation_flags, int64_t speed,
> @@ -227,6 +338,14 @@ void stream_start(const char *job_id, BlockDriverState
> *bs,
> StreamBlockJob *s;
> BlockDriverState *iter;
> bool bs_read_only;
> + BlockDriverState *cor_filter_bs;
> +
> + /*
> + * The base node might be identified by its file name rather than
> + * by its node name. In that case, we can encounter a filter node
> + * instead which has other BS pointer and the same file name.
> + */
> + base = skip_filter(base);
>
> /* Make sure that the image is opened in read-write mode */
> bs_read_only = bdrv_is_read_only(bs);
> @@ -236,10 +355,15 @@ void stream_start(const char *job_id, BlockDriverState
> *bs,
> }
> }
>
> + cor_filter_bs = insert_filter(bs, errp);
> + if (cor_filter_bs == NULL) {
> + goto fail;
> + }
> +
> /* Prevent concurrent jobs trying to modify the graph structure here, we
> * already have our own plans. Also don't allow resize as the image
> size is
> * queried only at the job start and then cached. */
> - s = block_job_create(job_id, &stream_job_driver, NULL, bs,
> + s = block_job_create(job_id, &stream_job_driver, NULL, cor_filter_bs,
> BLK_PERM_CONSISTENT_READ |
> BLK_PERM_WRITE_UNCHANGED |
> BLK_PERM_GRAPH_MOD,
> BLK_PERM_CONSISTENT_READ |
> BLK_PERM_WRITE_UNCHANGED |
> @@ -249,16 +373,21 @@ void stream_start(const char *job_id, BlockDriverState
> *bs,
> goto fail;
> }
>
> - /* Block all intermediate nodes between bs and base, because they will
> + /*
> + * Block all intermediate nodes between bs and base, because they will
> * disappear from the chain after this operation. The streaming job
> reads
> * every block only once, assuming that it doesn't change, so block
> writes
> - * and resizes. */
> - for (iter = backing_bs(bs); iter && iter != base; iter =
> backing_bs(iter)) {
> + * and resizes. We account a filter node may be a part of the graph.
> + */
> + for (iter = skip_filter(backing_bs(bs)); iter && iter != base;
> + iter = skip_filter(backing_bs(iter))) {
> block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
> BLK_PERM_CONSISTENT_READ |
> BLK_PERM_WRITE_UNCHANGED,
> &error_abort);
> }
>
> + s->cor_filter_bs = cor_filter_bs;
> + s->target_bs = bs;
> s->base = base;
> s->backing_file_str = g_strdup(backing_file_str);
> s->bs_read_only = bs_read_only;
> @@ -269,6 +398,9 @@ void stream_start(const char *job_id, BlockDriverState
> *bs,
> return;
>
> fail:
> + if (cor_filter_bs) {
> + remove_filter(cor_filter_bs);
> + }
> if (bs_read_only) {
> bdrv_reopen_set_read_only(bs, true, NULL);
> }
>
--
With the best regards,
Andrey Shinkevich