[Top][All Lists]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-block] [Qemu-devel] [PATCH 07/11] block: drive_backup transact

From: Max Reitz
Subject: Re: [Qemu-block] [Qemu-devel] [PATCH 07/11] block: drive_backup transaction callback support
Date: Wed, 18 Mar 2015 09:41:43 -0400
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.5.0

On 2015-03-17 at 19:27, John Snow wrote:

On 03/17/2015 03:49 PM, Max Reitz wrote:
On 2015-03-04 at 23:15, John Snow wrote:
This patch actually implements the transactional callback system
for the drive_backup transaction.

(1) We manually pick up a reference to the bitmap if present to allow
its cleanup to be delayed until after all drive_backup jobs launched
     by the transaction have fully completed.

(2) We create a functional closure that envelops the original
     callback, to be able to intercept the completion status and
return code
     for the job.

(3) We add the drive_backup_cb method for the drive_backup action, which
     unpacks the completion information and invokes the final cleanup.

(4) backup_transaction_complete will perform the final cleanup on the
     backup job.

(5) In the case of transaction cancellation, drive_backup_cb is still
     responsible for cleaning up the mess we may have already made.

Signed-off-by: John Snow <address@hidden>
  block/backup.c            |  9 +++++++
  blockdev.c                | 64
  include/block/block_int.h |  8 ++++++
  3 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 4332df4..3673fb0 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -233,6 +233,15 @@ typedef struct {
      int ret;
  } BackupCompleteData;
+void backup_transaction_complete(BlockJob *job, int ret)
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+    if (s->sync_bitmap) {
+        bdrv_dirty_bitmap_decref(job->bs, s->sync_bitmap, ret, NULL);
+    }
  static void backup_complete(BlockJob *job, void *opaque)
      BackupBlockJob *s = container_of(job, BackupBlockJob, common);
diff --git a/blockdev.c b/blockdev.c
index 9e3b9e9..3ff14a7 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1363,14 +1363,6 @@ static void transaction_callback(void *opaque,
int ret)
  typedef void (CallbackFn)(void *opaque, int ret);
-/* Temporary. Removed in the next patch. */
-CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
-                                    void *opaque,
-                                    void (*callback)(void *, int),
-                                    void **new_opaque);
-void undo_transaction_wrapper(BlkTransactionState *common);
   * Create a new transactional callback wrapper.
@@ -1389,7 +1381,7 @@ void
undo_transaction_wrapper(BlkTransactionState *common);
   * @return The callback to be used instead of @callback.
-CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
+static CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
                                             void *opaque,
                                             CallbackFn *callback,
                                             void **new_opaque)
@@ -1416,7 +1408,7 @@ CallbackFn
*new_transaction_wrapper(BlkTransactionState *common,
   * Undo any actions performed by the above call.
-void undo_transaction_wrapper(BlkTransactionState *common)
+static void undo_transaction_wrapper(BlkTransactionState *common)
      BlkTransactionList *btl = common->list;
      BlkTransactionState *bts;
@@ -1449,6 +1441,7 @@ void
undo_transaction_wrapper(BlkTransactionState *common)
+static void block_job_cb(void *opaque, int ret);
  static void _drive_backup(const char *device, const char *target,
                            bool has_format, const char *format,
                            enum MirrorSyncMode sync,
@@ -1767,6 +1760,9 @@ static void
drive_backup_prepare(BlkTransactionState *common, Error **errp)
      BlockDriverState *bs;
      DriveBackup *backup;
      Error *local_err = NULL;
+    CallbackFn *cb;
+    void *opaque;
+    BdrvDirtyBitmap *bmap = NULL;
      assert(common->action->kind ==
      backup = common->action->drive_backup;
@@ -1777,6 +1773,19 @@ static void
drive_backup_prepare(BlkTransactionState *common, Error **errp)
+    /* BackupBlockJob is opaque to us, so look up the bitmap
ourselves */
+    if (backup->has_bitmap) {
+        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
+        if (!bmap) {
+            error_setg(errp, "Bitmap '%s' could not be found",
+            return;
+        }
+    }
+    /* Create our transactional callback wrapper,
+       and register that we'd like to call .cb() later. */
+    cb = new_transaction_wrapper(common, bs, block_job_cb, &opaque);
      /* AioContext is released in .clean() */
      state->aio_context = bdrv_get_aio_context(bs);
@@ -1789,7 +1798,7 @@ static void
drive_backup_prepare(BlkTransactionState *common, Error **errp)
                    backup->has_bitmap, backup->bitmap,
backup->has_on_source_error, backup->on_source_error, backup->has_on_target_error, backup->on_target_error,
-                  NULL, NULL,
+                  cb, opaque,
      if (local_err) {
          error_propagate(errp, local_err);
@@ -1798,6 +1807,11 @@ static void
drive_backup_prepare(BlkTransactionState *common, Error **errp)
      state->bs = bs;
      state->job = state->bs->job;
+    /* Keep the job alive until .cb(), too. */
+    block_job_incref(state->job);
+    if (bmap) {
+        bdrv_dirty_bitmap_incref(bmap);
+    }
  static void drive_backup_abort(BlkTransactionState *common)
@@ -1809,6 +1823,10 @@ static void
drive_backup_abort(BlkTransactionState *common)
      if (bs && bs->job && bs->job == state->job) {
+    /* Undo any callback actions we may have done. Putting down
+     * obtained during prepare() is handled by cb(). */
+    undo_transaction_wrapper(common);
  static void drive_backup_clean(BlkTransactionState *common)
@@ -1820,6 +1838,27 @@ static void
drive_backup_clean(BlkTransactionState *common)
+static void drive_backup_cb(BlkTransactionState *common)
+    BlkTransactionData *btd = common->opaque;
+    BlockDriverState *bs = btd->opaque;
+    DriveBackupState *state = DO_UPCAST(DriveBackupState, common,
+    assert(state->bs == bs);
+    if (bs->job) {
+        assert(state->job == bs->job);
+    }
+    state->aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(state->aio_context);
+    /* Note: We also have the individual job's return code in
btd->ret */
+    backup_transaction_complete(state->job, common->list->status);
+    block_job_decref(state->job);
+    aio_context_release(state->aio_context);
  typedef struct BlockdevBackupState {
      BlkTransactionState common;
      BlockDriverState *bs;
@@ -2004,7 +2043,8 @@ static const BdrvActionOps actions[] = {
          .instance_size = sizeof(DriveBackupState),
          .prepare = drive_backup_prepare,
          .abort = drive_backup_abort,
-        .clean = drive_backup_clean
+        .clean = drive_backup_clean,
+        .cb = drive_backup_cb
          .instance_size = sizeof(BlockdevBackupState),
diff --git a/include/block/block_int.h b/include/block/block_int.h
index e0d5561..731684d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -619,6 +619,14 @@ void backup_start(BlockDriverState *bs,
BlockDriverState *target,
                    BlockCompletionFunc *cb, void *opaque,
                    Error **errp);
+ * backup_transaction_complete
+ * @job The BackupJob that was associated with a transaction

s/BackupJob/backup block job/ or s/BackupJob/backup job/? (there is no
structure named "BackupJob", but this looks like there might be one)

+ * @ret Amalgamated return code for the entire transaction

Hm. The call to this function you're introducing in this patch will
probably stay the only one so there won't be anyone who'll have to worry
about what this means, but if there was, they probably won't reach a
conclusive result.

I know what it means because I've seen patch 3 (right now it means
"everything OR-ed together so it's 0 for success or some non-zero (maybe
positive, maybe negative, depending on whether you have an even or an
odd number of errors, and depending on whether the jobs return negative
values for errors or not) for error"), but I wouldn't be able to infer
it from this. At the least you should add that 0 means success and
everything else means error (if you take my suggestion for patch 3, it
would be 0 for success and -errno for error, where that error number is
one of the errors encountered).

Other than that, looks good (as far as I can tell with my still limited
insights into patch 3).


It's my opinion that this patch should give you insight into patch #3, instead of the other way around. This patch is useful for demonstrating the general flow, because I kept all drive_backup specific concerns strictly separated from patch #3.

It only truly helps me understand patch 3 under the assumption that this patch is correct. For reviewing, I cannot really take that assumption. ;-)

I mean, it does help me with the interfaces patch 3 offers, but it still doesn't help me with the objects introduced in patch 3, for instance, because those are all contained in patch 3 itself. And if I cannot verify patch 3 on its own, I cannot really verify that what this patch does is correct in regards to the interfaces offered there.

I'll write a more comprehensive document for the docs/ folder soon, but the general shape of the idea is "The cleanup actions defined in the .cb() method will be executed after every job in the transactional group has completed."

Right, again, assuming your implementation is correct, but questioning that is part of the natural reviewing process. :-)

But there's some fine print:

"'every job' refers only to jobs that have the capability to register a post-transaction callback, which currently means only drive_backup."

The general approach to this is, mechanically:

(1) Extend the life of objects of interest with reference counts, including Jobs, Bitmaps, and BlkTransactionStates.

(2) "steal" the callback from a BlockJob and, when invoked, update our management information for the transaction group in BlkTransactionList.

(3) Once all jobs we have sent out return, execute the .cb() methods as indicated in the BlkTransactionList.

So, if you were adding a callback to a different QMP transaction:
- Look at new_transaction_wrapper; you'll use this to bump up various references used internally for this whole system, and it'll keep qmp_transaction from being able to delete the list and state objects.

This function will give to you (as a gimmick) a new callback and opaque data structure pointer for you to give to the job that you are starting. I obfuscate this just a bit to make it clear that you should be using this function every time to help manage the transactional state.

- Now, the job will run on its own happy way, When it finishes, it will call transaction_callback, which is the function that "intercepts" the callbacks, and dispatches the original enveloped callbacks. It ditches the original data we used to know how to call the original callback, and begins storing completion information for jobs instead.

- transaction_callback will call blk_transaction_complete to store completion info for one job. In turn, it calls put_blk_transaction_list to decrement the pending jobs count (the "jobs" refcount) and once that hits zero ...

- All of the callback mechanisms associated with each BlockTransactionState are run via blk_run_transaction_callbacks.

- This is where drive_backup_cb is going to get invoked again, and then we will splinter back out into backup-specific concerns, with Jobs and Bitmaps and what not.

Yes, I can see the control flow; my main problem lies in the three different structs introduced in patch 3. I'll tell you why I'm confused by them:

First, there is BlkTransactionList. Sounds to me like it should be a list of transactions. Judging from its members, it is rather a list of operations ("actions") for a single transaction, however. Apparently, there is a double-use for the word "transaction": I'd consider a transaction to be the thing started by the QMP command which is a group of atomic operations. However, that one structure is called BlkTransactionState, but it apparently this is only the state of a single operation in the transaction. Pre-existing, but doesn't make it better. You added comments for its list entry members, with the first one stating "All transactions in the current group" and the second being a subset of that. However, again, I consider these to be operations and not transactions. The group is the transaction.

So we have that adding to my confusion. Furthermore, I think we don't need to list entry members (entry and list_entry) in BlkTransactionState. I think it should be fine to drop the snap_bdrv_states list in qmp_transaction() and just add all BlkTransactionStates to the BlkTransactionList (or however you name it if you rename it) in the preparation while () loop there. I don't think we need to add them only in blk_transaction_complete() (and we should drop that from there if it's been done in qmp_transaction() already, of course). Then we can really just use that list generally as *the* list of operations in a transaction.

Second, there is BlkTransactionData. From the name I cannot judge at all how it is supposed to differ from BlkTransactionState, because, well, it contains some data about a transaction... Maybe, because I consider BlkTransactionState to be misnamed, BlkTransactionData actually contains data about the transaction and not about a single operation? But from looking at the members, I have no idea. It has "opaque" and "ret"... I don't know how that is vital data to the transaction itself (and I don't think it is, it is rather data for the completion callback, I guess?), it only looks like some sort of pattern I have seen for AIO callbacks and the like.

Third, there is BlkTransactionWrapper. I suppose it wraps a transaction...? Or maybe at least an operation, because apparently I can no longer be sure whether a transaction is a transaction or an operation... But there is no transaction or even a single operation in there. It's just a structure describing a callback. I don't know how that is a "transaction wrapper"...

So, I don't understand any of the names given to the objects, and while I might understand what they are used for by looking at them, I have a *really* hard time understanding the code that uses them, because if I don't stare at their definitions all the time, I will immediately forget what they contain and what they are used for because I don't understand the names at all (and there is nothing which explicitly tells me what they are used for either). The fact that all of them are starting with BlkTransaction makes mixing them up even easier.

From the point of view of the "transaction action developer," the interface to the new feature is found via the "new_transaction_wrapper" function, But some care must be taken to make sure this callback actually gets used once it has been requested. The whole thing can be aborted with the "undo_transaction_wrapper" function.

All of the other functions that got added to blockdev.c in Patch #3 are there to assist these two "interface" functions in doing what they claim to.

Everything in patch 4 and 5 just adds more reference count flexibility to things that are only of interest to drive_backup.

Patch 6 allows us to inject our special callback wrapper into the qmp_drive_backup function.

Clear as mud?

Control flow and concept, clear. Implementation, still very muddy, sorry...



+ */
+void backup_transaction_complete(BlockJob *job, int ret);
  void blk_dev_change_media_cb(BlockBackend *blk, bool load);
  bool blk_dev_has_removable_media(BlockBackend *blk);
  void blk_dev_eject_request(BlockBackend *blk, bool force);

reply via email to

[Prev in Thread] Current Thread [Next in Thread]