qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH 5/6] job: use global job_mutex to protect struct Job


From: Stefan Hajnoczi
Subject: Re: [RFC PATCH 5/6] job: use global job_mutex to protect struct Job
Date: Thu, 8 Jul 2021 13:56:08 +0100

On Wed, Jul 07, 2021 at 06:58:12PM +0200, Emanuele Giuseppe Esposito wrote:
> This lock is going to replace most of the AioContext locks
> in the job and blockjob, so that a Job can run in an arbitrary
> AioContext.
> 
> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> ---
>  include/block/blockjob_int.h |   1 +
>  include/qemu/job.h           |   2 +
>  block/backup.c               |   4 +
>  block/mirror.c               |  11 +-
>  blockdev.c                   |  62 ++++----
>  blockjob.c                   |  67 +++++++--
>  job-qmp.c                    |  55 +++----
>  job.c                        | 284 +++++++++++++++++++++++++++--------
>  qemu-img.c                   |  15 +-
>  9 files changed, 350 insertions(+), 151 deletions(-)
> 
> diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
> index 6633d83da2..8b91126506 100644
> --- a/include/block/blockjob_int.h
> +++ b/include/block/blockjob_int.h
> @@ -53,6 +53,7 @@ struct BlockJobDriver {
>       */
>      void (*attached_aio_context)(BlockJob *job, AioContext *new_context);
>  
> +    /* Called with job mutex *not* held. */
>      void (*set_speed)(BlockJob *job, int64_t speed);
>  };
>  
> diff --git a/include/qemu/job.h b/include/qemu/job.h
> index 4421d08d93..359f4e6b3a 100644
> --- a/include/qemu/job.h
> +++ b/include/qemu/job.h
> @@ -49,6 +49,8 @@ typedef struct Job {
>      /**
>       * The type of this job.
>       * Set it in job_create and just read.
> +     * All calls to the driver function must be not locked by job_mutex,
> +     * to avoid deadlocks.
>       */
>      const JobDriver *driver;
>  
> diff --git a/block/backup.c b/block/backup.c
> index bd3614ce70..80ce956299 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -315,6 +315,10 @@ static void coroutine_fn backup_pause(Job *job)
>      }
>  }
>  
> +/*
> + * Called with job mutex *not* held (we don't want to call block_copy_kick
> + * with the lock held!)
> + */
>  static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed)
>  {
>      BackupBlockJob *s = container_of(job, BackupBlockJob, common);
> diff --git a/block/mirror.c b/block/mirror.c
> index 49aaaafffa..deefaa6a39 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -1150,9 +1150,11 @@ static void mirror_complete(Job *job, Error **errp)
>      s->should_complete = true;
>  
>      /* If the job is paused, it will be re-entered when it is resumed */
> +    job_lock();
>      if (!job_is_paused(job)) {
> -        job_enter(job);
> +        job_enter_locked(job);
>      }
> +    job_unlock();
>  }
>  
>  static void coroutine_fn mirror_pause(Job *job)
> @@ -1171,10 +1173,13 @@ static bool mirror_drained_poll(BlockJob *job)
>       * from one of our own drain sections, to avoid a deadlock waiting for
>       * ourselves.
>       */
> -    if (!job_is_paused(&s->common.job) && !job_is_cancelled(&s->common.job) 
> &&
> -        !s->in_drain) {
> +    job_lock();
> +    if (!job_is_paused(&s->common.job) &&
> +        !job_is_cancelled_locked(&s->common.job) && !s->in_drain) {
> +        job_unlock();
>          return true;
>      }
> +    job_unlock();
>  
>      return !!s->in_flight;
>  }
> diff --git a/blockdev.c b/blockdev.c
> index 8e2c15370e..9255aea6a2 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -150,9 +150,11 @@ void blockdev_mark_auto_del(BlockBackend *blk)
>              AioContext *aio_context = job_get_aiocontext(&job->job);
>              aio_context_acquire(aio_context);
>  
> +            job_lock();
>              job_cancel(&job->job, false);
>  
>              aio_context_release(aio_context);
> +            job_unlock();

This looks strange. The way it's written suggests there is a reason why
job_unlock() has to be called after aio_context_release(). Can
job_unlock() be called immediately after job_cancel()?

>          }
>      }
>  
> @@ -3309,48 +3311,44 @@ out:
>      aio_context_release(aio_context);
>  }
>  
> -/* Get a block job using its ID and acquire its AioContext */
> -static BlockJob *find_block_job(const char *id, AioContext **aio_context,
> -                                Error **errp)
> +/* Get a block job using its ID and acquire its job_lock */

"its" suggests job_lock is per-Job. I suggest saying something like
"Returns with job_lock held on success" instead.

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]