qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 1/3] Make paio subsystem use threadlets infrastr


From: Stefan Hajnoczi
Subject: Re: [Qemu-devel] [PATCH 1/3] Make paio subsystem use threadlets infrastructure
Date: Mon, 8 Nov 2010 21:29:12 +0000

On Mon, Nov 8, 2010 at 2:33 PM, Arun R Bharadwaj
<address@hidden> wrote:
> diff --git a/Makefile.objs b/Makefile.objs
> index cd5a24b..3b7ec27 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -9,6 +9,7 @@ qobject-obj-y += qerror.o
>
>  block-obj-y = cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o
>  block-obj-y += nbd.o block.o aio.o aes.o osdep.o qemu-config.o
> +block-obj-$(CONFIG_POSIX) += qemu-thread.o
>  block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
>  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>
> @@ -124,7 +125,6 @@ endif
>  common-obj-y += $(addprefix ui/, $(ui-obj-y))
>
>  common-obj-y += iov.o acl.o
> -common-obj-$(CONFIG_THREAD) += qemu-thread.o
>  common-obj-y += notify.o event_notifier.o
>  common-obj-y += qemu-timer.o

This change makes CONFIG_THREAD unused.  The ./configure code that
sets CONFIG_THREAD=y should be removed.

> diff --git a/posix-aio-compat.c b/posix-aio-compat.c
> index 7b862b5..00b2a4e 100644
> --- a/posix-aio-compat.c
> +++ b/posix-aio-compat.c
> @@ -29,7 +29,32 @@
>  #include "block_int.h"
>
>  #include "block/raw-posix-aio.h"
> +#include "qemu-thread.h"
>
> +#define MAX_GLOBAL_THREADS  64
> +#define MIN_GLOBAL_THREADS   8
> +
> +QemuMutex aiocb_mutex;

This variable should be static since it isn't used externally.

> +
> +typedef struct ThreadletQueue
> +{
> +    QemuMutex lock;
> +    QemuCond cond;
> +    int max_threads;
> +    int min_threads;
> +    int cur_threads;
> +    int idle_threads;
> +    QTAILQ_HEAD(, ThreadletWork) request_list;
> +} ThreadletQueue;
> +
> +typedef struct ThreadletWork
> +{
> +    QTAILQ_ENTRY(ThreadletWork) node;
> +    void (*func)(struct ThreadletWork *work);
> +} ThreadletWork;
> +
> +static ThreadletQueue globalqueue;
> +static int globalqueue_init;
>
>  struct qemu_paiocb {
>     BlockDriverAIOCB common;
> @@ -44,13 +69,13 @@ struct qemu_paiocb {
>     int ev_signo;
>     off_t aio_offset;
>
> -    QTAILQ_ENTRY(qemu_paiocb) node;
>     int aio_type;
>     ssize_t ret;
>     int active;
>     struct qemu_paiocb *next;
>
>     int async_context_id;
> +    ThreadletWork work;
>  };
>
>  typedef struct PosixAioState {
> @@ -58,64 +83,169 @@ typedef struct PosixAioState {
>     struct qemu_paiocb *first_aio;
>  } PosixAioState;
>
> +static void *threadlet_worker(void *data)
> +{
> +    ThreadletQueue *queue = data;
>
> -static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
> -static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
> -static pthread_t thread_id;
> -static pthread_attr_t attr;
> -static int max_threads = 64;
> -static int cur_threads = 0;
> -static int idle_threads = 0;
> -static QTAILQ_HEAD(, qemu_paiocb) request_list;
> +    qemu_mutex_lock(&queue->lock);
> +    while (1) {
> +        ThreadletWork *work;
> +        int ret = 0;
> +
> +        while (QTAILQ_EMPTY(&queue->request_list) &&
> +               (ret != ETIMEDOUT)) {
> +            /* wait for cond to be signalled or broadcast for 1000s */
> +            ret = qemu_cond_timedwait((&queue->cond),
> +                                         &(queue->lock), 10*100000);
> +        }
>
> -#ifdef CONFIG_PREADV
> -static int preadv_present = 1;
> -#else
> -static int preadv_present = 0;
> -#endif
> +        assert(queue->idle_threads != 0);
> +        if (QTAILQ_EMPTY(&queue->request_list)) {
> +            if (queue->cur_threads > queue->min_threads) {
> +                /* We retain the minimum number of threads */
> +                break;
> +            }
> +        } else {
> +            work = QTAILQ_FIRST(&queue->request_list);
> +            QTAILQ_REMOVE(&queue->request_list, work, node);
>
> -static void die2(int err, const char *what)
> -{
> -    fprintf(stderr, "%s failed: %s\n", what, strerror(err));
> -    abort();
> +            queue->idle_threads--;
> +            qemu_mutex_unlock(&queue->lock);
> +
> +            /* execute the work function */
> +            work->func(work);
> +
> +            qemu_mutex_lock(&queue->lock);
> +            queue->idle_threads++;
> +        }
> +    }
> +
> +    queue->idle_threads--;
> +    queue->cur_threads--;
> +    qemu_mutex_unlock(&queue->lock);
> +
> +    return NULL;
>  }
>
> -static void die(const char *what)
> +static void spawn_threadlet(ThreadletQueue *queue)
>  {
> -    die2(errno, what);
> +    QemuThread thread;
> +
> +    queue->cur_threads++;
> +    queue->idle_threads++;
> +
> +    qemu_thread_create(&thread, threadlet_worker, queue);
>  }
>
> -static void mutex_lock(pthread_mutex_t *mutex)
> +/**
> + * submit_work_to_queue: Submit a new task to a private queue to be
> + *                            executed asynchronously.
> + * @queue: Per-subsystem private queue to which the new task needs
> + *         to be submitted.
> + * @work: Contains information about the task that needs to be submitted.
> + */
> +static void submit_work_to_queue(ThreadletQueue *queue, ThreadletWork *work)
>  {
> -    int ret = pthread_mutex_lock(mutex);
> -    if (ret) die2(ret, "pthread_mutex_lock");
> +    qemu_mutex_lock(&queue->lock);
> +    if (queue->idle_threads == 0 && queue->cur_threads < queue->max_threads) 
> {
> +        spawn_threadlet(queue);
> +    } else {
> +        qemu_cond_signal(&queue->cond);
> +    }
> +    QTAILQ_INSERT_TAIL(&queue->request_list, work, node);
> +    qemu_mutex_unlock(&queue->lock);
>  }
>
> -static void mutex_unlock(pthread_mutex_t *mutex)
> +static void threadlet_queue_init(ThreadletQueue *queue,
> +                          int max_threads, int min_threads);
> +
> +/**
> + * submit_work: Submit to the global queue a new task to be executed
> + *                   asynchronously.
> + * @work: Contains information about the task that needs to be submitted.
> + */
> +static void submit_work(ThreadletWork *work)
>  {
> -    int ret = pthread_mutex_unlock(mutex);
> -    if (ret) die2(ret, "pthread_mutex_unlock");
> +    if (!globalqueue_init) {
> +        threadlet_queue_init(&globalqueue, MAX_GLOBAL_THREADS,
> +                                MIN_GLOBAL_THREADS);
> +        globalqueue_init = 1;
> +    }
> +
> +    submit_work_to_queue(&globalqueue, work);
>  }
>
> -static int cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
> -                           struct timespec *ts)
> +/**
> + * dequeue_work_on_queue: Cancel a task queued on a Queue.
> + * @queue: The queue containing the task to be cancelled.
> + * @work: Contains the information of the task that needs to be cancelled.
> + *
> + * Returns: 0 if the task is successfully cancelled.
> + *          1 otherwise.
> + */
> +static int dequeue_work_on_queue(ThreadletQueue *queue, ThreadletWork *work)
>  {
> -    int ret = pthread_cond_timedwait(cond, mutex, ts);
> -    if (ret && ret != ETIMEDOUT) die2(ret, "pthread_cond_timedwait");
> +    ThreadletWork *ret_work;
> +    int ret = 1;
> +
> +    qemu_mutex_lock(&queue->lock);
> +    QTAILQ_FOREACH(ret_work, &(queue->request_list), node) {
> +        if (ret_work == work) {
> +            QTAILQ_REMOVE(&queue->request_list, ret_work, node);
> +            ret = 0;
> +            break;
> +        }
> +    }
> +    qemu_mutex_unlock(&queue->lock);
> +
>     return ret;
>  }
>
> -static void cond_signal(pthread_cond_t *cond)
> +/**
> + * dequeue_work: Cancel a task queued on the global queue.
> + * @work: Contains the information of the task that needs to be cancelled.
> + *
> + * Returns: 0 if the task is successfully cancelled.
> + *          1 otherwise.
> + */
> +static int dequeue_work(ThreadletWork *work)
> +{
> +    return dequeue_work_on_queue(&globalqueue, work);
> +}
> +
> +/**
> + * threadlet_queue_init: Initialize a threadlet queue.
> + * @queue: The threadlet queue to be initialized.
> + * @max_threads: Maximum number of threads processing the queue.
> + * @min_threads: Minimum number of threads processing the queue.
> + */
> +static void threadlet_queue_init(ThreadletQueue *queue,
> +                                    int max_threads, int min_threads)
> +{
> +    queue->cur_threads  = 0;
> +    queue->idle_threads = 0;
> +    queue->max_threads  = max_threads;
> +    queue->min_threads  = min_threads;
> +    QTAILQ_INIT(&queue->request_list);
> +    qemu_mutex_init(&queue->lock);
> +    qemu_cond_init(&queue->cond);
> +}
> +
> +#ifdef CONFIG_PREADV
> +static int preadv_present = 1;
> +#else
> +static int preadv_present = 0;
> +#endif
> +
> +static void die2(int err, const char *what)
>  {
> -    int ret = pthread_cond_signal(cond);
> -    if (ret) die2(ret, "pthread_cond_signal");
> +    fprintf(stderr, "%s failed: %s\n", what, strerror(err));
> +    abort();
>  }
>
> -static void thread_create(pthread_t *thread, pthread_attr_t *attr,
> -                          void *(*start_routine)(void*), void *arg)
> +static void die(const char *what)
>  {
> -    int ret = pthread_create(thread, attr, start_routine, arg);
> -    if (ret) die2(ret, "pthread_create");
> +    die2(errno, what);
>  }
>
>  static ssize_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
> @@ -301,106 +431,59 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb 
> *aiocb)
>     return nbytes;
>  }
>
> -static void *aio_thread(void *unused)
> +static void aio_thread(ThreadletWork *work)
>  {
>     pid_t pid;
> +    struct qemu_paiocb *aiocb = container_of(work, struct qemu_paiocb, work);
> +    ssize_t ret = 0;
>
>     pid = getpid();
> +    aiocb->active = 1;

aiocb->active needs to be assigned with aiocb_mutex held and then released in
order for this memory write to be visible to other threads after this
line of code.

>
> -    while (1) {
> -        struct qemu_paiocb *aiocb;
> -        ssize_t ret = 0;
> -        qemu_timeval tv;
> -        struct timespec ts;
> -
> -        qemu_gettimeofday(&tv);
> -        ts.tv_sec = tv.tv_sec + 10;
> -        ts.tv_nsec = 0;
> -
> -        mutex_lock(&lock);
> -
> -        while (QTAILQ_EMPTY(&request_list) &&
> -               !(ret == ETIMEDOUT)) {
> -            ret = cond_timedwait(&cond, &lock, &ts);
> -        }
> -
> -        if (QTAILQ_EMPTY(&request_list))
> -            break;
> -
> -        aiocb = QTAILQ_FIRST(&request_list);
> -        QTAILQ_REMOVE(&request_list, aiocb, node);
> -        aiocb->active = 1;
> -        idle_threads--;
> -        mutex_unlock(&lock);
> -
> -        switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
> -        case QEMU_AIO_READ:
> -        case QEMU_AIO_WRITE:
> -            ret = handle_aiocb_rw(aiocb);
> -            break;
> -        case QEMU_AIO_FLUSH:
> -            ret = handle_aiocb_flush(aiocb);
> -            break;
> -        case QEMU_AIO_IOCTL:
> -            ret = handle_aiocb_ioctl(aiocb);
> -            break;
> -        default:
> -            fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
> -            ret = -EINVAL;
> -            break;
> -        }
> -
> -        mutex_lock(&lock);
> -        aiocb->ret = ret;
> -        idle_threads++;
> -        mutex_unlock(&lock);
> -
> -        if (kill(pid, aiocb->ev_signo)) die("kill failed");
> +    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
> +    case QEMU_AIO_READ:
> +    case QEMU_AIO_WRITE:
> +        ret = handle_aiocb_rw(aiocb);
> +        break;
> +    case QEMU_AIO_FLUSH:
> +        ret = handle_aiocb_flush(aiocb);
> +        break;
> +    case QEMU_AIO_IOCTL:
> +        ret = handle_aiocb_ioctl(aiocb);
> +        break;
> +    default:
> +        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
> +        ret = -EINVAL;
> +        break;
>     }
>
> -    idle_threads--;
> -    cur_threads--;
> -    mutex_unlock(&lock);
> -
> -    return NULL;
> -}
> -
> -static void spawn_thread(void)
> -{
> -    sigset_t set, oldset;
> -
> -    cur_threads++;
> -    idle_threads++;
> -
> -    /* block all signals */
> -    if (sigfillset(&set)) die("sigfillset");
> -    if (sigprocmask(SIG_SETMASK, &set, &oldset)) die("sigprocmask");
> +    qemu_mutex_lock(&aiocb_mutex);
> +    aiocb->ret = ret;
> +    qemu_mutex_unlock(&aiocb_mutex);
>
> -    thread_create(&thread_id, &attr, aio_thread, NULL);
> -
> -    if (sigprocmask(SIG_SETMASK, &oldset, NULL)) die("sigprocmask restore");
> +    if (kill(pid, aiocb->ev_signo)) {
> +         die("kill failed");
> +    }
>  }
>
>  static void qemu_paio_submit(struct qemu_paiocb *aiocb)
>  {
> +    qemu_mutex_lock(&aiocb_mutex);
>     aiocb->ret = -EINPROGRESS;
> +    qemu_mutex_unlock(&aiocb_mutex);
>     aiocb->active = 0;
> -    mutex_lock(&lock);
> -    if (idle_threads == 0 && cur_threads < max_threads)
> -        spawn_thread();
> -    QTAILQ_INSERT_TAIL(&request_list, aiocb, node);
> -    mutex_unlock(&lock);
> -    cond_signal(&cond);
> +
> +    aiocb->work.func = aio_thread;
> +    submit_work(&aiocb->work);
>  }
>
>  static ssize_t qemu_paio_return(struct qemu_paiocb *aiocb)
>  {
>     ssize_t ret;
>
> -    mutex_lock(&lock);
> +    qemu_mutex_lock(&aiocb_mutex);
>     ret = aiocb->ret;
> -    mutex_unlock(&lock);
> -
> +    qemu_mutex_unlock(&aiocb_mutex);
>     return ret;
>  }
>
> @@ -536,20 +619,20 @@ static void paio_cancel(BlockDriverAIOCB *blockacb)
>     struct qemu_paiocb *acb = (struct qemu_paiocb *)blockacb;
>     int active = 0;
>
> -    mutex_lock(&lock);
>     if (!acb->active) {
> -        QTAILQ_REMOVE(&request_list, acb, node);
> -        acb->ret = -ECANCELED;
> +        if (!dequeue_work(&acb->work)) {
> +            acb->ret = -ECANCELED;
> +         } else {
> +            active = 1;
> +         }
>     } else if (acb->ret == -EINPROGRESS) {
>         active = 1;
>     }
> -    mutex_unlock(&lock);
>
>     if (active) {
>         /* fail safe: if the aio could not be canceled, we wait for
>            it */
> -        while (qemu_paio_error(acb) == EINPROGRESS)
> -            ;
> +        active = qemu_paio_error(acb);
>     }
>
>     paio_remove(acb);

acb->ret is not being consistently accessed with aiocb_mutex held.

We don't wait for the work item to complete if it is active.  This changes the
semantics of paio_cancel() and will break callers who expect the request to be
cancelled/completed when paio_cancel() returns.  Also, we go ahead and free the
acb for a running request which is dangerous because it may be reused and
corrupted.

I think both the active variable and field in qemu_paiocb are unnecessary
because dequeue_work() already deals with inactive work items.  If
dequeue_work() was unsuccessful you need to wait until ret != -EINPROGRESS.

Stefan



reply via email to

[Prev in Thread] Current Thread [Next in Thread]