qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC 1/1] linux-aio: consume events in userspace instea


From: Stefan Hajnoczi
Subject: Re: [Qemu-devel] [RFC 1/1] linux-aio: consume events in userspace instead of calling io_getevents
Date: Thu, 14 Jul 2016 13:18:16 +0100
User-agent: Mutt/1.6.1 (2016-04-27)

On Tue, Jul 12, 2016 at 05:25:39PM +0200, Roman Pen wrote:
> @@ -101,6 +101,60 @@ static void qemu_laio_process_completion(struct 
> qemu_laiocb *laiocb)
>      }
>  }
>  
> +/**
> + * aio_ring buffer which is shared between userspace and kernel.
> + */

Please extend the comment to explain that this is copy-pasted from Linux
fs/aio.c because there is no uapi header for this struct.  Other
userspace programs like fio(1) already use the ring buffer directly, so
we assume it's stable ABI...

> +struct aio_ring {
> +     unsigned        id;     /* kernel internal index number */
> +     unsigned        nr;     /* number of io_events */
> +     unsigned        head;   /* Written to by userland or under ring_lock
> +                              * mutex by aio_read_events_ring(). */

This comment refers to the aio_read_events_ring() kernel function, which
doesn't exist in the QEMU codebase.  Please drop it.

> +     unsigned        tail;
> +
> +     unsigned        magic;
> +     unsigned        compat_features;
> +     unsigned        incompat_features;
> +     unsigned        header_length;  /* size of aio_ring */
> +
> +     struct io_event io_events[0];
> +};
> +
> +/**
> + * io_getevents_peek() - returns the number of completed events
> + *                       and sets a pointer on events array.
> + *
> + * This function does not update the internal ring buffer, only
> + * reads head and tail.  When @events has been processed
> + * io_getevents_commit() must be called.
> + */

Please follow the doc comment style in include/qom/object.h (it's the
GTK Doc style).

> +static int io_getevents_peek(io_context_t aio_ctx, unsigned int max,
> +                             struct io_event **events)

max is unused and can be dropped.

> +{
> +     struct aio_ring *ring = (struct aio_ring*)aio_ctx;
> +     unsigned head = ring->head, tail = ring->tail;
> +     unsigned nr;
> +
> +     nr = tail >= head ? tail - head : ring->nr - head;
> +     *events = ring->io_events + head;
> +     /* To avoid speculative loads of s->events[i] before observing tail.
> +        Paired with smp_wmb() inside linux/fs/aio.c: aio_complete(). */
> +     smp_rmb();
> +
> +     return nr;

head and tail are unsigned.  Any reason to make the return value int
instead of unsigned?

> +}
> +
> +/**
> + * io_getevents_commit() - advances head of a ring buffer and returns
> + *                         1 if there are some events left in a ring.
> + */
> +static int io_getevents_commit(io_context_t aio_ctx, unsigned int nr)

QEMU uses the bool type for boolean values, so the return value should
be bool.

> +{
> +     struct aio_ring *ring = (struct aio_ring*)aio_ctx;
> +
> +     ring->head = (ring->head + nr) % ring->nr;
> +     return (ring->head != ring->tail);
> +}
> +
>  /* The completion BH fetches completed I/O requests and invokes their
>   * callbacks.
>   *
> @@ -118,32 +172,32 @@ static void qemu_laio_completion_bh(void *opaque)
>  
>      /* Fetch more completion events when empty */
>      if (s->event_idx == s->event_max) {
> -        do {
> -            struct timespec ts = { 0 };
> -            s->event_max = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS,
> -                                        s->events, &ts);
> -        } while (s->event_max == -EINTR);
> -
> +more:
> +        s->event_max = io_getevents_peek(s->ctx, MAX_EVENTS, &s->events);
>          s->event_idx = 0;
> -        if (s->event_max <= 0) {
> -            s->event_max = 0;
> +        if (s->event_max == 0)
>              return; /* no more events */
> -        }
>      }
>  
>      /* Reschedule so nested event loops see currently pending completions */
>      qemu_bh_schedule(s->completion_bh);
>  
>      /* Process completion events */
> -    while (s->event_idx < s->event_max) {
> -        struct iocb *iocb = s->events[s->event_idx].obj;
> -        struct qemu_laiocb *laiocb =
> +    if (s->event_idx < s->event_max) {
> +        unsigned nr = s->event_max - s->event_idx;
> +
> +        while (s->event_idx < s->event_max) {
> +            struct iocb *iocb = s->events[s->event_idx].obj;
> +            struct qemu_laiocb *laiocb =
>                  container_of(iocb, struct qemu_laiocb, iocb);
>  
> -        laiocb->ret = io_event_ret(&s->events[s->event_idx]);
> -        s->event_idx++;
> +            laiocb->ret = io_event_ret(&s->events[s->event_idx]);
> +            s->event_idx++;
>  
> -        qemu_laio_process_completion(laiocb);
> +            qemu_laio_process_completion(laiocb);
> +        }
> +        if (io_getevents_commit(s->ctx, nr))
> +            goto more;

QEMU always uses curly braces, even if the body of the if statement is
only one line.  scripts/checkpatch.pl should show this.

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]