diff options
Diffstat (limited to 'aio-posix.c')
-rw-r--r-- | aio-posix.c | 194 |
1 files changed, 192 insertions, 2 deletions
diff --git a/aio-posix.c b/aio-posix.c index 0467f23a63..482b316502 100644 --- a/aio-posix.c +++ b/aio-posix.c @@ -17,6 +17,9 @@ #include "block/block.h" #include "qemu/queue.h" #include "qemu/sockets.h" +#ifdef CONFIG_EPOLL +#include <sys/epoll.h> +#endif struct AioHandler { @@ -29,6 +32,162 @@ struct AioHandler QLIST_ENTRY(AioHandler) node; }; +#ifdef CONFIG_EPOLL + +/* The fd number threashold to switch to epoll */ +#define EPOLL_ENABLE_THRESHOLD 64 + +static void aio_epoll_disable(AioContext *ctx) +{ + ctx->epoll_available = false; + if (!ctx->epoll_enabled) { + return; + } + ctx->epoll_enabled = false; + close(ctx->epollfd); +} + +static inline int epoll_events_from_pfd(int pfd_events) +{ + return (pfd_events & G_IO_IN ? EPOLLIN : 0) | + (pfd_events & G_IO_OUT ? EPOLLOUT : 0) | + (pfd_events & G_IO_HUP ? EPOLLHUP : 0) | + (pfd_events & G_IO_ERR ? EPOLLERR : 0); +} + +static bool aio_epoll_try_enable(AioContext *ctx) +{ + AioHandler *node; + struct epoll_event event; + + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + int r; + if (node->deleted || !node->pfd.events) { + continue; + } + event.events = epoll_events_from_pfd(node->pfd.events); + event.data.ptr = node; + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event); + if (r) { + return false; + } + } + ctx->epoll_enabled = true; + return true; +} + +static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new) +{ + struct epoll_event event; + int r; + + if (!ctx->epoll_enabled) { + return; + } + if (!node->pfd.events) { + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event); + if (r) { + aio_epoll_disable(ctx); + } + } else { + event.data.ptr = node; + event.events = epoll_events_from_pfd(node->pfd.events); + if (is_new) { + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event); + if (r) { + aio_epoll_disable(ctx); + } + } else { + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event); + if (r) { + aio_epoll_disable(ctx); + } + } + } +} + +static int aio_epoll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + AioHandler *node; + int i, ret = 0; + struct epoll_event events[128]; + + assert(npfd == 1); + assert(pfds[0].fd == ctx->epollfd); + if (timeout > 0) { + ret = qemu_poll_ns(pfds, npfd, timeout); + } + if (timeout <= 0 || ret > 0) { + ret = epoll_wait(ctx->epollfd, events, + sizeof(events) / sizeof(events[0]), + timeout); + if (ret <= 0) { + goto out; + } + for (i = 0; i < ret; i++) { + int ev = events[i].events; + node = events[i].data.ptr; + node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) | + (ev & EPOLLOUT ? G_IO_OUT : 0) | + (ev & EPOLLHUP ? G_IO_HUP : 0) | + (ev & EPOLLERR ? G_IO_ERR : 0); + } + } +out: + return ret; +} + +static bool aio_epoll_enabled(AioContext *ctx) +{ + /* Fall back to ppoll when external clients are disabled. */ + return !aio_external_disabled(ctx) && ctx->epoll_enabled; +} + +static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + if (!ctx->epoll_available) { + return false; + } + if (aio_epoll_enabled(ctx)) { + return true; + } + if (npfd >= EPOLL_ENABLE_THRESHOLD) { + if (aio_epoll_try_enable(ctx)) { + return true; + } else { + aio_epoll_disable(ctx); + } + } + return false; +} + +#else + +static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new) +{ +} + +static int aio_epoll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + assert(false); +} + +static bool aio_epoll_enabled(AioContext *ctx) +{ + return false; +} + +static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + return false; +} + +#endif + static AioHandler *find_aio_handler(AioContext *ctx, int fd) { AioHandler *node; @@ -50,6 +209,8 @@ void aio_set_fd_handler(AioContext *ctx, void *opaque) { AioHandler *node; + bool is_new = false; + bool deleted = false; node = find_aio_handler(ctx, fd); @@ -68,7 +229,7 @@ void aio_set_fd_handler(AioContext *ctx, * releasing the walking_handlers lock. */ QLIST_REMOVE(node, node); - g_free(node); + deleted = true; } } } else { @@ -79,6 +240,7 @@ void aio_set_fd_handler(AioContext *ctx, QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node); g_source_add_poll(&ctx->source, &node->pfd); + is_new = true; } /* Update handler with latest information */ node->io_read = io_read; @@ -90,7 +252,11 @@ void aio_set_fd_handler(AioContext *ctx, node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0); } + aio_epoll_update(ctx, node, is_new); aio_notify(ctx); + if (deleted) { + g_free(node); + } } void aio_set_event_notifier(AioContext *ctx, @@ -262,6 +428,7 @@ bool aio_poll(AioContext *ctx, bool blocking) /* fill pollfds */ QLIST_FOREACH(node, &ctx->aio_handlers, node) { if (!node->deleted && node->pfd.events + && !aio_epoll_enabled(ctx) && aio_node_check(ctx, node->is_external)) { add_pollfd(node); } @@ -273,7 +440,17 @@ bool aio_poll(AioContext *ctx, bool blocking) if (timeout) { aio_context_release(ctx); } - ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout); + if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) { + AioHandler epoll_handler; + + epoll_handler.pfd.fd = ctx->epollfd; + epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR; + npfd = 0; + add_pollfd(&epoll_handler); + ret = aio_epoll(ctx, pollfds, npfd, timeout); + } else { + ret = qemu_poll_ns(pollfds, npfd, timeout); + } if (blocking) { atomic_sub(&ctx->notify_me, 2); } @@ -302,3 +479,16 @@ bool aio_poll(AioContext *ctx, bool blocking) return progress; } + +void aio_context_setup(AioContext *ctx, Error **errp) +{ +#ifdef CONFIG_EPOLL + assert(!ctx->epollfd); + ctx->epollfd = epoll_create1(EPOLL_CLOEXEC); + if (ctx->epollfd == -1) { + ctx->epoll_available = false; + } else { + ctx->epoll_available = true; + } +#endif +} |