[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PULL v2 5/7] aio: Introduce aio-epoll.c
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PULL v2 5/7] aio: Introduce aio-epoll.c |
Date: |
Fri, 13 Nov 2015 18:09:53 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.3.0 |
On 09/11/2015 11:08, Stefan Hajnoczi wrote:
> From: Fam Zheng <address@hidden>
>
> To minimize code duplication, epoll is hooked into aio-posix's
> aio_poll() instead of rolling its own. This approach also has both
> compile-time and run-time switchability.
>
> 1) When QEMU starts with a small number of fds in the event loop, ppoll
> is used.
>
> 2) When QEMU starts with a big number of fds, or when more devices are
> hot plugged, epoll kicks in when the number of fds hits the threshold.
>
> 3) Some fds may not support epoll, such as tty based stdio. In this
> case, it falls back to ppoll.
>
> A rough benchmark with scsi-disk on virtio-scsi dataplane (epoll gets
> enabled from 64 onward). Numbers are in MB/s.
>
> ===============================================
> | master | epoll
> | |
> scsi disks # | read randrw | read randrw
> -------------|----------------|----------------
> 1 | 86 36 | 92 45
> 8 | 87 43 | 86 41
> 64 | 71 32 | 70 38
> 128 | 48 24 | 58 31
> 256 | 37 19 | 57 28
> ===============================================
>
> To comply with aio_{disable,enable}_external, we always use ppoll when
> aio_external_disabled() is true.
>
> [Removed #ifdef CONFIG_EPOLL around AioContext epollfd field declaration
> since the field is also referenced outside CONFIG_EPOLL code.
> --Stefan]
>
> Signed-off-by: Fam Zheng <address@hidden>
> Message-id: address@hidden
> Signed-off-by: Stefan Hajnoczi <address@hidden>
> ---
> aio-posix.c | 184
> +++++++++++++++++++++++++++++++++++++++++++++++++++-
> include/block/aio.h | 5 ++
> 2 files changed, 188 insertions(+), 1 deletion(-)
>
> diff --git a/aio-posix.c b/aio-posix.c
> index 5bff3cd..06148a9 100644
> --- a/aio-posix.c
> +++ b/aio-posix.c
> @@ -17,6 +17,9 @@
> #include "block/block.h"
> #include "qemu/queue.h"
> #include "qemu/sockets.h"
> +#ifdef CONFIG_EPOLL
> +#include <sys/epoll.h>
> +#endif
>
> struct AioHandler
> {
> @@ -29,6 +32,162 @@ struct AioHandler
> QLIST_ENTRY(AioHandler) node;
> };
>
> +#ifdef CONFIG_EPOLL
> +
> +/* The fd number threashold to switch to epoll */
> +#define EPOLL_ENABLE_THRESHOLD 64
> +
> +static void aio_epoll_disable(AioContext *ctx)
> +{
> + ctx->epoll_available = false;
> + if (!ctx->epoll_enabled) {
> + return;
> + }
> + ctx->epoll_enabled = false;
> + close(ctx->epollfd);
> +}
> +
> +static inline int epoll_events_from_pfd(int pfd_events)
> +{
> + return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
> + (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
> + (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
> + (pfd_events & G_IO_ERR ? EPOLLERR : 0);
> +}
> +
> +static bool aio_epoll_try_enable(AioContext *ctx)
> +{
> + AioHandler *node;
> + struct epoll_event event;
> +
> + QLIST_FOREACH(node, &ctx->aio_handlers, node) {
> + int r;
> + if (node->deleted || !node->pfd.events) {
> + continue;
> + }
> + event.events = epoll_events_from_pfd(node->pfd.events);
> + event.data.ptr = node;
> + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
> + if (r) {
> + return false;
> + }
> + }
> + ctx->epoll_enabled = true;
> + return true;
> +}
> +
> +static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
> +{
> + struct epoll_event event;
> + int r;
> +
> + if (!ctx->epoll_enabled) {
> + return;
> + }
> + if (!node->pfd.events) {
Coverity says that node might have been freed by the time you call
aio_epoll_update. You need to pass node->pfd.fd and node->pfd.events by
value instead, I think, or move the call earlier in aio_set_fd_handler.
Paolo
> + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event);
> + if (r) {
> + aio_epoll_disable(ctx);
> + }
> + } else {
> + event.data.ptr = node;
> + event.events = epoll_events_from_pfd(node->pfd.events);
> + if (is_new) {
> + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
> + if (r) {
> + aio_epoll_disable(ctx);
> + }
> + } else {
> + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event);
> + if (r) {
> + aio_epoll_disable(ctx);
> + }
> + }
> + }
> +}
> +
> +static int aio_epoll(AioContext *ctx, GPollFD *pfds,
> + unsigned npfd, int64_t timeout)
> +{
> + AioHandler *node;
> + int i, ret = 0;
> + struct epoll_event events[128];
> +
> + assert(npfd == 1);
> + assert(pfds[0].fd == ctx->epollfd);
> + if (timeout > 0) {
> + ret = qemu_poll_ns(pfds, npfd, timeout);
> + }
> + if (timeout <= 0 || ret > 0) {
> + ret = epoll_wait(ctx->epollfd, events,
> + sizeof(events) / sizeof(events[0]),
> + timeout);
> + if (ret <= 0) {
> + goto out;
> + }
> + for (i = 0; i < ret; i++) {
> + int ev = events[i].events;
> + node = events[i].data.ptr;
> + node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) |
> + (ev & EPOLLOUT ? G_IO_OUT : 0) |
> + (ev & EPOLLHUP ? G_IO_HUP : 0) |
> + (ev & EPOLLERR ? G_IO_ERR : 0);
> + }
> + }
> +out:
> + return ret;
> +}
> +
> +static bool aio_epoll_enabled(AioContext *ctx)
> +{
> + /* Fall back to ppoll when external clients are disabled. */
> + return !aio_external_disabled(ctx) && ctx->epoll_enabled;
> +}
> +
> +static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
> + unsigned npfd, int64_t timeout)
> +{
> + if (!ctx->epoll_available) {
> + return false;
> + }
> + if (aio_epoll_enabled(ctx)) {
> + return true;
> + }
> + if (npfd >= EPOLL_ENABLE_THRESHOLD) {
> + if (aio_epoll_try_enable(ctx)) {
> + return true;
> + } else {
> + aio_epoll_disable(ctx);
> + }
> + }
> + return false;
> +}
> +
> +#else
> +
> +static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
> +{
> +}
> +
> +static int aio_epoll(AioContext *ctx, GPollFD *pfds,
> + unsigned npfd, int64_t timeout)
> +{
> + assert(false);
> +}
> +
> +static bool aio_epoll_enabled(AioContext *ctx)
> +{
> + return false;
> +}
> +
> +static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
> + unsigned npfd, int64_t timeout)
> +{
> + return false;
> +}
> +
> +#endif
> +
> static AioHandler *find_aio_handler(AioContext *ctx, int fd)
> {
> AioHandler *node;
> @@ -50,6 +209,7 @@ void aio_set_fd_handler(AioContext *ctx,
> void *opaque)
> {
> AioHandler *node;
> + bool is_new = false;
>
> node = find_aio_handler(ctx, fd);
>
> @@ -79,6 +239,7 @@ void aio_set_fd_handler(AioContext *ctx,
> QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
>
> g_source_add_poll(&ctx->source, &node->pfd);
> + is_new = true;
> }
> /* Update handler with latest information */
> node->io_read = io_read;
> @@ -90,6 +251,7 @@ void aio_set_fd_handler(AioContext *ctx,
> node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
> }
>
> + aio_epoll_update(ctx, node, is_new);
> aio_notify(ctx);
> }
>
> @@ -262,6 +424,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
> /* fill pollfds */
> QLIST_FOREACH(node, &ctx->aio_handlers, node) {
> if (!node->deleted && node->pfd.events
> + && !aio_epoll_enabled(ctx)
> && aio_node_check(ctx, node->is_external)) {
> add_pollfd(node);
> }
> @@ -273,7 +436,17 @@ bool aio_poll(AioContext *ctx, bool blocking)
> if (timeout) {
> aio_context_release(ctx);
> }
> - ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout);
> + if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
> + AioHandler epoll_handler;
> +
> + epoll_handler.pfd.fd = ctx->epollfd;
> + epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
> + npfd = 0;
> + add_pollfd(&epoll_handler);
> + ret = aio_epoll(ctx, pollfds, npfd, timeout);
> + } else {
> + ret = qemu_poll_ns(pollfds, npfd, timeout);
> + }
> if (blocking) {
> atomic_sub(&ctx->notify_me, 2);
> }
> @@ -305,4 +478,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
>
> void aio_context_setup(AioContext *ctx, Error **errp)
> {
> +#ifdef CONFIG_EPOLL
> + assert(!ctx->epollfd);
> + ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
> + if (ctx->epollfd == -1) {
> + ctx->epoll_available = false;
> + } else {
> + ctx->epoll_available = true;
> + }
> +#endif
> }
> diff --git a/include/block/aio.h b/include/block/aio.h
> index 9ffecf7..e086e3b 100644
> --- a/include/block/aio.h
> +++ b/include/block/aio.h
> @@ -124,6 +124,11 @@ struct AioContext {
> QEMUTimerListGroup tlg;
>
> int external_disable_cnt;
> +
> + /* epoll(7) state used when built with CONFIG_EPOLL */
> + int epollfd;
> + bool epoll_enabled;
> + bool epoll_available;
> };
>
> /**
>
- [Qemu-devel] [PULL v2 0/7] Block patches, Stefan Hajnoczi, 2015/11/09
- [Qemu-devel] [PULL v2 1/7] dataplane: simplify indirect descriptor read, Stefan Hajnoczi, 2015/11/09
- [Qemu-devel] [PULL v2 3/7] aio: Introduce aio_external_disabled, Stefan Hajnoczi, 2015/11/09
- [Qemu-devel] [PULL v2 2/7] dataplane: support non-contigious s/g, Stefan Hajnoczi, 2015/11/09
- [Qemu-devel] [PULL v2 4/7] aio: Introduce aio_context_setup, Stefan Hajnoczi, 2015/11/09
- [Qemu-devel] [PULL v2 5/7] aio: Introduce aio-epoll.c, Stefan Hajnoczi, 2015/11/09
- Re: [Qemu-devel] [PULL v2 5/7] aio: Introduce aio-epoll.c,
Paolo Bonzini <=
- [Qemu-devel] [PULL v2 7/7] blockdev: acquire AioContext in hmp_commit(), Stefan Hajnoczi, 2015/11/09
- [Qemu-devel] [PULL v2 6/7] monitor: add missed aio_context_acquire into vm_completion call, Stefan Hajnoczi, 2015/11/09
- Re: [Qemu-devel] [PULL v2 0/7] Block patches, Peter Maydell, 2015/11/09
- Re: [Qemu-devel] [PULL v2 0/7] Block patches, Peter Maydell, 2015/11/09
- Re: [Qemu-devel] [PULL v2 0/7] Block patches, Marc-André Lureau, 2015/11/09
- Re: [Qemu-devel] [PULL v2 0/7] Block patches, Peter Maydell, 2015/11/09
- Re: [Qemu-devel] [PULL v2 0/7] Block patches, Marc-André Lureau, 2015/11/09
- Re: [Qemu-devel] [PULL v2 0/7] Block patches, Peter Maydell, 2015/11/10
- Re: [Qemu-devel] [PULL v2 0/7] Block patches, Peter Maydell, 2015/11/11