qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PULL v2 5/7] aio: Introduce aio-epoll.c


From: Paolo Bonzini
Subject: Re: [Qemu-devel] [PULL v2 5/7] aio: Introduce aio-epoll.c
Date: Fri, 13 Nov 2015 18:09:53 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.3.0


On 09/11/2015 11:08, Stefan Hajnoczi wrote:
> From: Fam Zheng <address@hidden>
> 
> To minimize code duplication, epoll is hooked into aio-posix's
> aio_poll() instead of rolling its own. This approach also has both
> compile-time and run-time switchability.
> 
> 1) When QEMU starts with a small number of fds in the event loop, ppoll
> is used.
> 
> 2) When QEMU starts with a big number of fds, or when more devices are
> hot plugged, epoll kicks in when the number of fds hits the threshold.
> 
> 3) Some fds may not support epoll, such as tty based stdio. In this
> case, it falls back to ppoll.
> 
> A rough benchmark with scsi-disk on virtio-scsi dataplane (epoll gets
> enabled from 64 onward). Numbers are in MB/s.
> 
> ===============================================
>              |     master     |     epoll
>              |                |
> scsi disks # | read    randrw | read    randrw
> -------------|----------------|----------------
> 1            | 86      36     | 92      45
> 8            | 87      43     | 86      41
> 64           | 71      32     | 70      38
> 128          | 48      24     | 58      31
> 256          | 37      19     | 57      28
> ===============================================
> 
> To comply with aio_{disable,enable}_external, we always use ppoll when
> aio_external_disabled() is true.
> 
> [Removed #ifdef CONFIG_EPOLL around AioContext epollfd field declaration
> since the field is also referenced outside CONFIG_EPOLL code.
> --Stefan]
> 
> Signed-off-by: Fam Zheng <address@hidden>
> Message-id: address@hidden
> Signed-off-by: Stefan Hajnoczi <address@hidden>
> ---
>  aio-posix.c         | 184 
> +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/block/aio.h |   5 ++
>  2 files changed, 188 insertions(+), 1 deletion(-)
> 
> diff --git a/aio-posix.c b/aio-posix.c
> index 5bff3cd..06148a9 100644
> --- a/aio-posix.c
> +++ b/aio-posix.c
> @@ -17,6 +17,9 @@
>  #include "block/block.h"
>  #include "qemu/queue.h"
>  #include "qemu/sockets.h"
> +#ifdef CONFIG_EPOLL
> +#include <sys/epoll.h>
> +#endif
>  
>  struct AioHandler
>  {
> @@ -29,6 +32,162 @@ struct AioHandler
>      QLIST_ENTRY(AioHandler) node;
>  };
>  
> +#ifdef CONFIG_EPOLL
> +
> +/* The fd number threashold to switch to epoll */
> +#define EPOLL_ENABLE_THRESHOLD 64
> +
> +static void aio_epoll_disable(AioContext *ctx)
> +{
> +    ctx->epoll_available = false;
> +    if (!ctx->epoll_enabled) {
> +        return;
> +    }
> +    ctx->epoll_enabled = false;
> +    close(ctx->epollfd);
> +}
> +
> +static inline int epoll_events_from_pfd(int pfd_events)
> +{
> +    return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
> +           (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
> +           (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
> +           (pfd_events & G_IO_ERR ? EPOLLERR : 0);
> +}
> +
> +static bool aio_epoll_try_enable(AioContext *ctx)
> +{
> +    AioHandler *node;
> +    struct epoll_event event;
> +
> +    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
> +        int r;
> +        if (node->deleted || !node->pfd.events) {
> +            continue;
> +        }
> +        event.events = epoll_events_from_pfd(node->pfd.events);
> +        event.data.ptr = node;
> +        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
> +        if (r) {
> +            return false;
> +        }
> +    }
> +    ctx->epoll_enabled = true;
> +    return true;
> +}
> +
> +static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
> +{
> +    struct epoll_event event;
> +    int r;
> +
> +    if (!ctx->epoll_enabled) {
> +        return;
> +    }
> +    if (!node->pfd.events) {

Coverity says that node might have been freed by the time you call
aio_epoll_update.  You need to pass node->pfd.fd and node->pfd.events by
value instead, I think, or move the call earlier in aio_set_fd_handler.

Paolo

> +        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event);
> +        if (r) {
> +            aio_epoll_disable(ctx);
> +        }
> +    } else {
> +        event.data.ptr = node;
> +        event.events = epoll_events_from_pfd(node->pfd.events);
> +        if (is_new) {
> +            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
> +            if (r) {
> +                aio_epoll_disable(ctx);
> +            }
> +        } else {
> +            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event);
> +            if (r) {
> +                aio_epoll_disable(ctx);
> +            }
> +        }
> +    }
> +}
> +
> +static int aio_epoll(AioContext *ctx, GPollFD *pfds,
> +                     unsigned npfd, int64_t timeout)
> +{
> +    AioHandler *node;
> +    int i, ret = 0;
> +    struct epoll_event events[128];
> +
> +    assert(npfd == 1);
> +    assert(pfds[0].fd == ctx->epollfd);
> +    if (timeout > 0) {
> +        ret = qemu_poll_ns(pfds, npfd, timeout);
> +    }
> +    if (timeout <= 0 || ret > 0) {
> +        ret = epoll_wait(ctx->epollfd, events,
> +                         sizeof(events) / sizeof(events[0]),
> +                         timeout);
> +        if (ret <= 0) {
> +            goto out;
> +        }
> +        for (i = 0; i < ret; i++) {
> +            int ev = events[i].events;
> +            node = events[i].data.ptr;
> +            node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) |
> +                (ev & EPOLLOUT ? G_IO_OUT : 0) |
> +                (ev & EPOLLHUP ? G_IO_HUP : 0) |
> +                (ev & EPOLLERR ? G_IO_ERR : 0);
> +        }
> +    }
> +out:
> +    return ret;
> +}
> +
> +static bool aio_epoll_enabled(AioContext *ctx)
> +{
> +    /* Fall back to ppoll when external clients are disabled. */
> +    return !aio_external_disabled(ctx) && ctx->epoll_enabled;
> +}
> +
> +static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
> +                                 unsigned npfd, int64_t timeout)
> +{
> +    if (!ctx->epoll_available) {
> +        return false;
> +    }
> +    if (aio_epoll_enabled(ctx)) {
> +        return true;
> +    }
> +    if (npfd >= EPOLL_ENABLE_THRESHOLD) {
> +        if (aio_epoll_try_enable(ctx)) {
> +            return true;
> +        } else {
> +            aio_epoll_disable(ctx);
> +        }
> +    }
> +    return false;
> +}
> +
> +#else
> +
> +static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
> +{
> +}
> +
> +static int aio_epoll(AioContext *ctx, GPollFD *pfds,
> +                     unsigned npfd, int64_t timeout)
> +{
> +    assert(false);
> +}
> +
> +static bool aio_epoll_enabled(AioContext *ctx)
> +{
> +    return false;
> +}
> +
> +static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
> +                          unsigned npfd, int64_t timeout)
> +{
> +    return false;
> +}
> +
> +#endif
> +
>  static AioHandler *find_aio_handler(AioContext *ctx, int fd)
>  {
>      AioHandler *node;
> @@ -50,6 +209,7 @@ void aio_set_fd_handler(AioContext *ctx,
>                          void *opaque)
>  {
>      AioHandler *node;
> +    bool is_new = false;
>  
>      node = find_aio_handler(ctx, fd);
>  
> @@ -79,6 +239,7 @@ void aio_set_fd_handler(AioContext *ctx,
>              QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
>  
>              g_source_add_poll(&ctx->source, &node->pfd);
> +            is_new = true;
>          }
>          /* Update handler with latest information */
>          node->io_read = io_read;
> @@ -90,6 +251,7 @@ void aio_set_fd_handler(AioContext *ctx,
>          node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
>      }
>  
> +    aio_epoll_update(ctx, node, is_new);
>      aio_notify(ctx);
>  }
>  
> @@ -262,6 +424,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
>      /* fill pollfds */
>      QLIST_FOREACH(node, &ctx->aio_handlers, node) {
>          if (!node->deleted && node->pfd.events
> +            && !aio_epoll_enabled(ctx)
>              && aio_node_check(ctx, node->is_external)) {
>              add_pollfd(node);
>          }
> @@ -273,7 +436,17 @@ bool aio_poll(AioContext *ctx, bool blocking)
>      if (timeout) {
>          aio_context_release(ctx);
>      }
> -    ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout);
> +    if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
> +        AioHandler epoll_handler;
> +
> +        epoll_handler.pfd.fd = ctx->epollfd;
> +        epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
> +        npfd = 0;
> +        add_pollfd(&epoll_handler);
> +        ret = aio_epoll(ctx, pollfds, npfd, timeout);
> +    } else  {
> +        ret = qemu_poll_ns(pollfds, npfd, timeout);
> +    }
>      if (blocking) {
>          atomic_sub(&ctx->notify_me, 2);
>      }
> @@ -305,4 +478,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
>  
>  void aio_context_setup(AioContext *ctx, Error **errp)
>  {
> +#ifdef CONFIG_EPOLL
> +    assert(!ctx->epollfd);
> +    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
> +    if (ctx->epollfd == -1) {
> +        ctx->epoll_available = false;
> +    } else {
> +        ctx->epoll_available = true;
> +    }
> +#endif
>  }
> diff --git a/include/block/aio.h b/include/block/aio.h
> index 9ffecf7..e086e3b 100644
> --- a/include/block/aio.h
> +++ b/include/block/aio.h
> @@ -124,6 +124,11 @@ struct AioContext {
>      QEMUTimerListGroup tlg;
>  
>      int external_disable_cnt;
> +
> +    /* epoll(7) state used when built with CONFIG_EPOLL */
> +    int epollfd;
> +    bool epoll_enabled;
> +    bool epoll_available;
>  };
>  
>  /**
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]