qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] block/replication.c: Properly attach children


From: Vladimir Sementsov-Ogievskiy
Subject: Re: [PATCH] block/replication.c: Properly attach children
Date: Wed, 7 Jul 2021 16:01:31 +0300
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.11.0

06.07.2021 19:11, Lukas Straub wrote:
The replication driver needs access to the children block-nodes of
it's child so it can issue bdrv_make_empty to manage the replication.
However, it does this by directly copying the BdrvChilds, which is
wrong.

Fix this by properly attaching the block-nodes with
bdrv_attach_child().

Also, remove a workaround introduced in commit
6ecbc6c52672db5c13805735ca02784879ce8285
"replication: Avoid blk_make_empty() on read-only child".

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---

-v2: Test for BDRV_CHILD_PRIMARY in replication_child_perm, since
      bs->file might not be set yet. (Vladimir)

  block/replication.c | 94 +++++++++++++++++++++++++++++----------------
  1 file changed, 61 insertions(+), 33 deletions(-)

diff --git a/block/replication.c b/block/replication.c
index 52163f2d1f..fd8cb728a3 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -166,7 +166,12 @@ static void replication_child_perm(BlockDriverState *bs, 
BdrvChild *c,
                                     uint64_t perm, uint64_t shared,
                                     uint64_t *nperm, uint64_t *nshared)
  {
-    *nperm = BLK_PERM_CONSISTENT_READ;
+    if (role & BDRV_CHILD_PRIMARY) {
+        *nperm = BLK_PERM_CONSISTENT_READ;
+    } else {
+        *nperm = 0;
+    }

Why you drop READ access for other children? You don't mention it in 
commit-msg..

Upd: ok now I see that we are not going to read from hidden_disk child, and that's the 
only "other" child that worth to mention.
Still, we should be sure that hidden_disk child gets WRITE permission in case 
we are going to call bdrv_make_empty on it.

+
      if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
          *nperm |= BLK_PERM_WRITE;
      }
@@ -340,17 +345,7 @@ static void secondary_do_checkpoint(BDRVReplicationState 
*s, Error **errp)
          return;
      }
- BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
-                                BLK_PERM_WRITE, BLK_PERM_ALL);
-    blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        blk_unref(blk);
-        return;
-    }
-
-    ret = blk_make_empty(blk, errp);
-    blk_unref(blk);
+    ret = bdrv_make_empty(s->hidden_disk, errp);

So, here you rely on BLK_PERM_WRITE being set in replication_child_perm().. 
Probably that's OK, however logic is changed. Shouldn't we always require write 
permission in replication_child_perm() for hidden_disk ?

      if (ret < 0) {
          return;
      }
@@ -365,27 +360,35 @@ static void reopen_backing_file(BlockDriverState *bs, 
bool writable,
                                  Error **errp)
  {
      BDRVReplicationState *s = bs->opaque;
+    BdrvChild *hidden_disk, *secondary_disk;
      BlockReopenQueue *reopen_queue = NULL;
+ /*
+     * s->hidden_disk and s->secondary_disk may not be set yet, as they will
+     * only be set after the children are writable.
+     */
+    hidden_disk = bs->file->bs->backing;
+    secondary_disk = hidden_disk->bs->backing;
+
      if (writable) {
-        s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs);
-        s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs);
+        s->orig_hidden_read_only = bdrv_is_read_only(hidden_disk->bs);
+        s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
      }
- bdrv_subtree_drained_begin(s->hidden_disk->bs);
-    bdrv_subtree_drained_begin(s->secondary_disk->bs);
+    bdrv_subtree_drained_begin(hidden_disk->bs);
+    bdrv_subtree_drained_begin(secondary_disk->bs);

That kind of staff may be done as a separate preparation patch, with 
no-logic-change refactoring, this makes the main logic-change patch simpler.

if (s->orig_hidden_read_only) {
          QDict *opts = qdict_new();
          qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
-        reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs,
+        reopen_queue = bdrv_reopen_queue(reopen_queue, hidden_disk->bs,
                                           opts, true);
      }
if (s->orig_secondary_read_only) {
          QDict *opts = qdict_new();
          qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
-        reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs,
+        reopen_queue = bdrv_reopen_queue(reopen_queue, secondary_disk->bs,
                                           opts, true);
      }, probably it could be a separate patch if it is needed.
@@ -393,8 +396,8 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
          bdrv_reopen_multiple(reopen_queue, errp);
      }
- bdrv_subtree_drained_end(s->hidden_disk->bs);
-    bdrv_subtree_drained_end(s->secondary_disk->bs);
+    bdrv_subtree_drained_end(hidden_disk->bs);
+    bdrv_subtree_drained_end(secondary_disk->bs);
  }
static void backup_job_cleanup(BlockDriverState *bs)
@@ -451,6 +454,7 @@ static void replication_start(ReplicationState *rs, 
ReplicationMode mode,
      BlockDriverState *bs = rs->opaque;
      BDRVReplicationState *s;
      BlockDriverState *top_bs;
+    BdrvChild *active_disk, *hidden_disk, *secondary_disk;
      int64_t active_length, hidden_length, disk_length;
      AioContext *aio_context;
      Error *local_err = NULL;
@@ -488,32 +492,32 @@ static void replication_start(ReplicationState *rs, 
ReplicationMode mode,
      case REPLICATION_MODE_PRIMARY:
          break;
      case REPLICATION_MODE_SECONDARY:
-        s->active_disk = bs->file;
-        if (!s->active_disk || !s->active_disk->bs ||
-                                    !s->active_disk->bs->backing) {
+        active_disk = bs->file;
+        if (!active_disk || !active_disk->bs ||
+                                    !active_disk->bs->backing) {
              error_setg(errp, "Active disk doesn't have backing file");
              aio_context_release(aio_context);
              return;
          }
- s->hidden_disk = s->active_disk->bs->backing;
-        if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) {
+        hidden_disk = active_disk->bs->backing;
+        if (!hidden_disk->bs || !hidden_disk->bs->backing) {
              error_setg(errp, "Hidden disk doesn't have backing file");
              aio_context_release(aio_context);
              return;
          }
- s->secondary_disk = s->hidden_disk->bs->backing;
-        if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) {
+        secondary_disk = hidden_disk->bs->backing;
+        if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) {
              error_setg(errp, "The secondary disk doesn't have block backend");
              aio_context_release(aio_context);
              return;
          }
  , probably it could be a separate patch if it is needed.
          /* verify the length */
-        active_length = bdrv_getlength(s->active_disk->bs);
-        hidden_length = bdrv_getlength(s->hidden_disk->bs);
-        disk_length = bdrv_getlength(s->secondary_disk->bs);
+        active_length = bdrv_getlength(active_disk->bs);
+        hidden_length = bdrv_getlength(hidden_disk->bs);
+        disk_length = bdrv_getlength(secondary_disk->bs);
          if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
              active_length != hidden_length || hidden_length != disk_length) {
              error_setg(errp, "Active disk, hidden disk, secondary disk's 
length"
@@ -523,10 +527,10 @@ static void replication_start(ReplicationState *rs, 
ReplicationMode mode,
          }
/* Must be true, or the bdrv_getlength() calls would have failed */
-        assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv);
+        assert(active_disk->bs->drv && hidden_disk->bs->drv);
- if (!s->active_disk->bs->drv->bdrv_make_empty ||
-            !s->hidden_disk->bs->drv->bdrv_make_empty) {
+        if (!active_disk->bs->drv->bdrv_make_empty ||
+            !hidden_disk->bs->drv->bdrv_make_empty) {
              error_setg(errp,
                         "Active disk or hidden disk doesn't support 
make_empty");
              aio_context_release(aio_context);
@@ -541,6 +545,28 @@ static void replication_start(ReplicationState *rs, 
ReplicationMode mode,
              return;
          }
+ s->active_disk = active_disk;
+
+        bdrv_ref(hidden_disk->bs);
+        s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
+                                           &child_of_bds, BDRV_CHILD_DATA,
+                                           &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            aio_context_release(aio_context);
+            return;
+        }

Ok, the point of creating hidden_disk is to call bdrv_make_empty on it.

+
+        bdrv_ref(secondary_disk->bs);
+        s->secondary_disk = bdrv_attach_child(bs, secondary_disk->bs,
+                                              "secondary disk", &child_of_bds,
+                                              BDRV_CHILD_DATA, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            aio_context_release(aio_context);
+            return;
+        }

But s->secondary_disk child is actually unused.. No reason to create it.

+
          /* start backup job now */
          error_setg(&s->blocker,
                     "Block device is in use by internal backup job");
@@ -646,7 +672,9 @@ static void replication_done(void *opaque, int ret)
          s->stage = BLOCK_REPLICATION_DONE;
s->active_disk = NULL;
+        bdrv_unref_child(bs, s->secondary_disk);
          s->secondary_disk = NULL;
+        bdrv_unref_child(bs, s->hidden_disk);
          s->hidden_disk = NULL;
          s->error = 0;
      } else {


For me it looks like the good way to update is:

1. drop s->active_disk. it seems to be just a copy of bs->file, better to use 
bs->file directly, like other drivers do.
2. reduce usage of s->hidden_disk and s->secondary_disk, like you do in this 
patch, using local variables instead. Also probably just drop s->secondary_disk..
3. introduce a child, to be used with bdrv_make_empty(s->hidden_disk)

And these are 3 separate patches. 1 and 2 may be merged, or instead 2 may be 
divided into two (to refactor secondary_disk and hidden_disk separately)..

Still, I'm not a maintainer of replication, neither I have good understanding 
of how it works, so don't take my advises to heart :)

--
Best regards,
Vladimir



reply via email to

[Prev in Thread] Current Thread [Next in Thread]