From 0191253ceaf442e595cdd12ce80233b9de28cd13 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 24 Oct 2012 14:58:39 +0200
Subject: janitor: move iovector functions out of cutils.c

This removes the dependency of cutils.c on iov.c, and lets us remove
iov.o from several builds.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Makefile       |   2 +-
 Makefile.objs  |   4 +--
 cutils.c       | 103 ---------------------------------------------------------
 iov.c          | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/Makefile |   2 +-
 5 files changed, 107 insertions(+), 107 deletions(-)

diff --git a/Makefile b/Makefile
index 17e2d58ce5..ca716017cf 100644
--- a/Makefile
+++ b/Makefile
@@ -161,7 +161,7 @@ qemu-img.o: qemu-img-cmds.h
 
 tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \
 	qemu-timer-common.o main-loop.o notify.o \
-	iohandler.o cutils.o iov.o async.o error.o
+	iohandler.o cutils.o async.o error.o
 tools-obj-$(CONFIG_POSIX) += compatfd.o
 
 qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y)
diff --git a/Makefile.objs b/Makefile.objs
index 9eca179903..99a268e7bf 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -92,7 +92,7 @@ common-obj-y += ui/
 common-obj-y += bt-host.o bt-vhci.o
 
 common-obj-y += dma-helpers.o
-common-obj-y += iov.o acl.o
+common-obj-y += acl.o
 common-obj-$(CONFIG_POSIX) += compatfd.o
 common-obj-y += event_notifier.o
 common-obj-y += qemu-timer.o qemu-timer-common.o
@@ -113,7 +113,7 @@ endif
 user-obj-y =
 user-obj-y += envlist.o path.o
 user-obj-y += tcg-runtime.o host-utils.o
-user-obj-y += cutils.o iov.o cache-utils.o
+user-obj-y += cutils.o cache-utils.o
 user-obj-y += module.o
 user-obj-y += qemu-user.o
 user-obj-y += $(trace-obj-y)
diff --git a/cutils.c b/cutils.c
index 8edd8fa13c..6f9f799bd3 100644
--- a/cutils.c
+++ b/cutils.c
@@ -142,109 +142,6 @@ int qemu_fdatasync(int fd)
 #endif
 }
 
-/* io vectors */
-
-void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
-{
-    qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec));
-    qiov->niov = 0;
-    qiov->nalloc = alloc_hint;
-    qiov->size = 0;
-}
-
-void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
-{
-    int i;
-
-    qiov->iov = iov;
-    qiov->niov = niov;
-    qiov->nalloc = -1;
-    qiov->size = 0;
-    for (i = 0; i < niov; i++)
-        qiov->size += iov[i].iov_len;
-}
-
-void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
-{
-    assert(qiov->nalloc != -1);
-
-    if (qiov->niov == qiov->nalloc) {
-        qiov->nalloc = 2 * qiov->nalloc + 1;
-        qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec));
-    }
-    qiov->iov[qiov->niov].iov_base = base;
-    qiov->iov[qiov->niov].iov_len = len;
-    qiov->size += len;
-    ++qiov->niov;
-}
-
-/*
- * Concatenates (partial) iovecs from src to the end of dst.
- * It starts copying after skipping `soffset' bytes at the
- * beginning of src and adds individual vectors from src to
- * dst copies up to `sbytes' bytes total, or up to the end
- * of src if it comes first.  This way, it is okay to specify
- * very large value for `sbytes' to indicate "up to the end
- * of src".
- * Only vector pointers are processed, not the actual data buffers.
- */
-void qemu_iovec_concat(QEMUIOVector *dst,
-                       QEMUIOVector *src, size_t soffset, size_t sbytes)
-{
-    int i;
-    size_t done;
-    struct iovec *siov = src->iov;
-    assert(dst->nalloc != -1);
-    assert(src->size >= soffset);
-    for (i = 0, done = 0; done < sbytes && i < src->niov; i++) {
-        if (soffset < siov[i].iov_len) {
-            size_t len = MIN(siov[i].iov_len - soffset, sbytes - done);
-            qemu_iovec_add(dst, siov[i].iov_base + soffset, len);
-            done += len;
-            soffset = 0;
-        } else {
-            soffset -= siov[i].iov_len;
-        }
-    }
-    /* return done; */
-}
-
-void qemu_iovec_destroy(QEMUIOVector *qiov)
-{
-    assert(qiov->nalloc != -1);
-
-    qemu_iovec_reset(qiov);
-    g_free(qiov->iov);
-    qiov->nalloc = 0;
-    qiov->iov = NULL;
-}
-
-void qemu_iovec_reset(QEMUIOVector *qiov)
-{
-    assert(qiov->nalloc != -1);
-
-    qiov->niov = 0;
-    qiov->size = 0;
-}
-
-size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
-                         void *buf, size_t bytes)
-{
-    return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
-}
-
-size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
-                           const void *buf, size_t bytes)
-{
-    return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
-}
-
-size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
-                         int fillc, size_t bytes)
-{
-    return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
-}
-
 /*
  * Checks if a buffer is all zeroes
  *
diff --git a/iov.c b/iov.c
index c6a66f0afe..ae17e7dff6 100644
--- a/iov.c
+++ b/iov.c
@@ -228,3 +228,106 @@ void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
         fprintf(fp, "\n");
     }
 }
+
+/* io vectors */
+
+void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
+{
+    qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec));
+    qiov->niov = 0;
+    qiov->nalloc = alloc_hint;
+    qiov->size = 0;
+}
+
+void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
+{
+    int i;
+
+    qiov->iov = iov;
+    qiov->niov = niov;
+    qiov->nalloc = -1;
+    qiov->size = 0;
+    for (i = 0; i < niov; i++)
+        qiov->size += iov[i].iov_len;
+}
+
+void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
+{
+    assert(qiov->nalloc != -1);
+
+    if (qiov->niov == qiov->nalloc) {
+        qiov->nalloc = 2 * qiov->nalloc + 1;
+        qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec));
+    }
+    qiov->iov[qiov->niov].iov_base = base;
+    qiov->iov[qiov->niov].iov_len = len;
+    qiov->size += len;
+    ++qiov->niov;
+}
+
+/*
+ * Concatenates (partial) iovecs from src to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src if it comes first.  This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+void qemu_iovec_concat(QEMUIOVector *dst,
+                       QEMUIOVector *src, size_t soffset, size_t sbytes)
+{
+    int i;
+    size_t done;
+    struct iovec *siov = src->iov;
+    assert(dst->nalloc != -1);
+    assert(src->size >= soffset);
+    for (i = 0, done = 0; done < sbytes && i < src->niov; i++) {
+        if (soffset < siov[i].iov_len) {
+            size_t len = MIN(siov[i].iov_len - soffset, sbytes - done);
+            qemu_iovec_add(dst, siov[i].iov_base + soffset, len);
+            done += len;
+            soffset = 0;
+        } else {
+            soffset -= siov[i].iov_len;
+        }
+    }
+    /* return done; */
+}
+
+void qemu_iovec_destroy(QEMUIOVector *qiov)
+{
+    assert(qiov->nalloc != -1);
+
+    qemu_iovec_reset(qiov);
+    g_free(qiov->iov);
+    qiov->nalloc = 0;
+    qiov->iov = NULL;
+}
+
+void qemu_iovec_reset(QEMUIOVector *qiov)
+{
+    assert(qiov->nalloc != -1);
+
+    qiov->niov = 0;
+    qiov->size = 0;
+}
+
+size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
+                         void *buf, size_t bytes)
+{
+    return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
+                           const void *buf, size_t bytes)
+{
+    return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
+                         int fillc, size_t bytes)
+{
+    return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
+}
diff --git a/tests/Makefile b/tests/Makefile
index 86c9b79ebe..945c82311e 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -48,7 +48,7 @@ tests/check-qdict$(EXESUF): tests/check-qdict.o qdict.o qfloat.o qint.o qstring.
 tests/check-qlist$(EXESUF): tests/check-qlist.o qlist.o qint.o
 tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o
 tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) $(tools-obj-y)
-tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y)
+tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y) iov.o
 tests/test-iov$(EXESUF): tests/test-iov.o iov.o
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
-- 
cgit v1.2.3


From f3192e8fb28529dd4fd777f916c437f49098ad39 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Oct 2012 15:40:03 +0100
Subject: build: move cutils.o and qemu-timer-common.o to oslib-obj-y

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Makefile      | 3 +--
 Makefile.objs | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index ca716017cf..4bfeeab80f 100644
--- a/Makefile
+++ b/Makefile
@@ -160,8 +160,7 @@ endif
 qemu-img.o: qemu-img-cmds.h
 
 tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \
-	qemu-timer-common.o main-loop.o notify.o \
-	iohandler.o cutils.o async.o error.o
+	main-loop.o notify.o iohandler.o async.o error.o
 tools-obj-$(CONFIG_POSIX) += compatfd.o
 
 qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y)
diff --git a/Makefile.objs b/Makefile.objs
index 99a268e7bf..9e36166e8b 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -19,7 +19,7 @@ universal-obj-y += $(qom-obj-y)
 
 #######################################################################
 # oslib-obj-y is code depending on the OS (win32 vs posix)
-oslib-obj-y = osdep.o
+oslib-obj-y = osdep.o cutils.o qemu-timer-common.o
 oslib-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o
 oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o
 
@@ -41,7 +41,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 #######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img
 
-block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
+block-obj-y = iov.o cache-utils.o qemu-option.o module.o async.o
 block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o
 block-obj-y += qemu-progress.o qemu-sockets.o uri.o notify.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
@@ -113,7 +113,7 @@ endif
 user-obj-y =
 user-obj-y += envlist.o path.o
 user-obj-y += tcg-runtime.o host-utils.o
-user-obj-y += cutils.o cache-utils.o
+user-obj-y += cache-utils.o
 user-obj-y += module.o
 user-obj-y += qemu-user.o
 user-obj-y += $(trace-obj-y)
-- 
cgit v1.2.3


From 67d223be90178f7142b4f566358cea446af8df74 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Oct 2012 15:11:30 +0100
Subject: compiler: use weak aliases to provide default definitions

This is simpler and more portable.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch_init.h |  2 +-
 compiler.h  | 11 ++++-------
 qmp.c       |  3 ++-
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/arch_init.h b/arch_init.h
index d9c572aee8..5fc780c63d 100644
--- a/arch_init.h
+++ b/arch_init.h
@@ -34,6 +34,6 @@ int tcg_available(void);
 int kvm_available(void);
 int xen_available(void);
 
-CpuDefinitionInfoList GCC_WEAK_DECL *arch_query_cpu_definitions(Error **errp);
+CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp);
 
 #endif
diff --git a/compiler.h b/compiler.h
index c734a71c67..58865d65e8 100644
--- a/compiler.h
+++ b/compiler.h
@@ -50,16 +50,13 @@
 #   define __printf__ __gnu_printf__
 #  endif
 # endif
-#if defined(_WIN32)
-#define GCC_WEAK __attribute__((weak))
-#define GCC_WEAK_DECL GCC_WEAK
-#else
-#define GCC_WEAK __attribute__((weak))
-#define GCC_WEAK_DECL
-#endif
+# define QEMU_WEAK_ALIAS(newname, oldname) \
+        typeof(oldname) newname __attribute__((weak, alias (#oldname)))
 #else
 #define GCC_ATTR /**/
 #define GCC_FMT_ATTR(n, m)
+#define QEMU_WEAK_ALIAS(newname, oldname) \
+        _Pragma("weak " #newname "=" #oldname)
 #endif
 
 #endif /* COMPILER_H */
diff --git a/qmp.c b/qmp.c
index 31bc3bfdd1..df952b60bc 100644
--- a/qmp.c
+++ b/qmp.c
@@ -466,11 +466,12 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename,
     return prop_list;
 }
 
-CpuDefinitionInfoList GCC_WEAK *arch_query_cpu_definitions(Error **errp)
+static CpuDefinitionInfoList *default_arch_query_cpu_definitions(Error **errp)
 {
     error_set(errp, QERR_NOT_SUPPORTED);
     return NULL;
 }
+QEMU_WEAK_ALIAS(arch_query_cpu_definitions, default_arch_query_cpu_definitions);
 
 CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
 {
-- 
cgit v1.2.3


From d249e1fc4fda57399fe64b3c0290d5d585fdf2e5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Oct 2012 15:15:15 +0100
Subject: sockets: use weak aliases instead of qemu-tool.c

qemu-tool.c has its own (largeish) set of dependencies.  Weak aliases
can be placed directly where people use them, and do not contribute
to increasing the dependencies of generic utility files.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qemu-sockets.c | 7 +++++++
 qemu-tool.c    | 6 ------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/qemu-sockets.c b/qemu-sockets.c
index cfed9c5a5b..225cd0c162 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -967,3 +967,10 @@ int socket_init(void)
 #endif
     return 0;
 }
+
+static int default_monitor_get_fd(Monitor *mon, const char *name, Error **errp)
+{
+    error_setg(errp, "only QEMU supports file descriptor passing");
+    return -1;
+}
+QEMU_WEAK_ALIAS(monitor_get_fd, default_monitor_get_fd);
diff --git a/qemu-tool.c b/qemu-tool.c
index da4c05aaf7..f2f98138ce 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -38,12 +38,6 @@ const char *qemu_get_vm_name(void)
 
 Monitor *cur_mon;
 
-int monitor_get_fd(Monitor *mon, const char *name, Error **errp)
-{
-    error_setg(errp, "only QEMU supports file descriptor passing");
-    return -1;
-}
-
 void vm_stop(RunState state)
 {
     abort();
-- 
cgit v1.2.3


From 0100fbbe73f1455ac66ec172627ff251a1f25302 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Oct 2012 15:19:18 +0100
Subject: fdsets: use weak aliases instead of qemu-tool.c/qemu-user.c

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cutils.c      |  5 -----
 osdep.c       | 30 ++++++++++++++++++++++++++++++
 qemu-common.h |  1 -
 qemu-tool.c   | 20 --------------------
 qemu-user.c   | 20 --------------------
 5 files changed, 30 insertions(+), 46 deletions(-)

diff --git a/cutils.c b/cutils.c
index 6f9f799bd3..4f0692f78e 100644
--- a/cutils.c
+++ b/cutils.c
@@ -280,11 +280,6 @@ int qemu_parse_fd(const char *param)
     return fd;
 }
 
-int qemu_parse_fdset(const char *param)
-{
-    return qemu_parse_fd(param);
-}
-
 /* round down to the nearest power of 2*/
 int64_t pow2floor(int64_t value)
 {
diff --git a/osdep.c b/osdep.c
index 3b25297a25..0061f7473f 100644
--- a/osdep.c
+++ b/osdep.c
@@ -144,6 +144,11 @@ fail:
     errno = serrno;
     return -1;
 }
+
+static int qemu_parse_fdset(const char *param)
+{
+    return qemu_parse_fd(param);
+}
 #endif
 
 /*
@@ -404,3 +409,28 @@ bool fips_get_state(void)
 {
     return fips_enabled;
 }
+
+
+static int default_fdset_get_fd(int64_t fdset_id, int flags)
+{
+    return -1;
+}
+QEMU_WEAK_ALIAS(monitor_fdset_get_fd, default_fdset_get_fd);
+
+static int default_fdset_dup_fd_add(int64_t fdset_id, int dup_fd)
+{
+    return -1;
+}
+QEMU_WEAK_ALIAS(monitor_fdset_dup_fd_add, default_fdset_dup_fd_add);
+
+static int default_fdset_dup_fd_remove(int dup_fd)
+{
+    return -1;
+}
+QEMU_WEAK_ALIAS(monitor_fdset_dup_fd_remove, default_fdset_dup_fd_remove);
+
+static int default_fdset_dup_fd_find(int dup_fd)
+{
+    return -1;
+}
+QEMU_WEAK_ALIAS(monitor_fdset_dup_fd_find, default_fdset_dup_fd_find);
diff --git a/qemu-common.h b/qemu-common.h
index b54612b1a5..36ce522066 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -167,7 +167,6 @@ int qemu_fls(int i);
 int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
 int qemu_parse_fd(const char *param);
-int qemu_parse_fdset(const char *param);
 
 /*
  * strtosz() suffixes used to specify the default treatment of an
diff --git a/qemu-tool.c b/qemu-tool.c
index f2f98138ce..84273ae077 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -68,26 +68,6 @@ void monitor_protocol_event(MonitorEvent event, QObject *data)
 {
 }
 
-int monitor_fdset_get_fd(int64_t fdset_id, int flags)
-{
-    return -1;
-}
-
-int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd)
-{
-    return -1;
-}
-
-int monitor_fdset_dup_fd_remove(int dup_fd)
-{
-    return -1;
-}
-
-int monitor_fdset_dup_fd_find(int dup_fd)
-{
-    return -1;
-}
-
 int64_t cpu_get_clock(void)
 {
     return qemu_get_clock_ns(rt_clock);
diff --git a/qemu-user.c b/qemu-user.c
index 13fb9ae77b..08ccb0fe8e 100644
--- a/qemu-user.c
+++ b/qemu-user.c
@@ -35,23 +35,3 @@ void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
 void monitor_set_error(Monitor *mon, QError *qerror)
 {
 }
-
-int monitor_fdset_get_fd(int64_t fdset_id, int flags)
-{
-    return -1;
-}
-
-int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd)
-{
-    return -1;
-}
-
-int monitor_fdset_dup_fd_remove(int dup_fd)
-{
-    return -1;
-}
-
-int monitor_fdset_dup_fd_find(int dup_fd)
-{
-    return -1;
-}
-- 
cgit v1.2.3


From 462016d2da393b743ba97552521378e7de2a4c7f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Oct 2012 15:23:16 +0100
Subject: iohandler: add weak alias in qemu-sockets.c, for qemu-ga

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qemu-sockets.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/qemu-sockets.c b/qemu-sockets.c
index 225cd0c162..f2a637165b 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -974,3 +974,14 @@ static int default_monitor_get_fd(Monitor *mon, const char *name, Error **errp)
     return -1;
 }
 QEMU_WEAK_ALIAS(monitor_get_fd, default_monitor_get_fd);
+
+static int default_qemu_set_fd_handler2(int fd,
+                                        IOCanReadHandler *fd_read_poll,
+                                        IOHandler *fd_read,
+                                        IOHandler *fd_write,
+                                        void *opaque)
+
+{
+    abort();
+}
+QEMU_WEAK_ALIAS(qemu_set_fd_handler2, default_qemu_set_fd_handler2);
-- 
cgit v1.2.3


From dbb5f3802e20af9a9971aa98d27c58839ea79a94 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Oct 2012 18:55:03 +0100
Subject: win32: add weak version of qemu_fd_register

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 oslib-win32.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/oslib-win32.c b/oslib-win32.c
index 51b33e8b20..9ca83df011 100644
--- a/oslib-win32.c
+++ b/oslib-win32.c
@@ -150,3 +150,8 @@ int qemu_get_thread_id(void)
 {
     return GetCurrentThreadId();
 }
+
+static void default_qemu_fd_register(int fd)
+{
+}
+QEMU_WEAK_ALIAS(qemu_fd_register, default_qemu_fd_register);
-- 
cgit v1.2.3


From 744ca8e3754e6808c6b5331d287adc533fca0ad3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Oct 2012 15:26:28 +0100
Subject: qemu-timer: make initialization functions idempotent

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qemu-timer.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/qemu-timer.c b/qemu-timer.c
index 908a1030b6..b71e9a6e62 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -430,9 +430,11 @@ void qemu_unregister_clock_reset_notifier(QEMUClock *clock, Notifier *notifier)
 
 void init_clocks(void)
 {
-    rt_clock = qemu_new_clock(QEMU_CLOCK_REALTIME);
-    vm_clock = qemu_new_clock(QEMU_CLOCK_VIRTUAL);
-    host_clock = qemu_new_clock(QEMU_CLOCK_HOST);
+    if (!rt_clock) {
+        rt_clock = qemu_new_clock(QEMU_CLOCK_REALTIME);
+        vm_clock = qemu_new_clock(QEMU_CLOCK_VIRTUAL);
+        host_clock = qemu_new_clock(QEMU_CLOCK_HOST);
+    }
 }
 
 uint64_t qemu_timer_expire_time_ns(QEMUTimer *ts)
@@ -745,6 +747,10 @@ int init_timer_alarm(void)
     struct qemu_alarm_timer *t = NULL;
     int i, err = -1;
 
+    if (alarm_timer) {
+        return 0;
+    }
+
     for (i = 0; alarm_timers[i].name; i++) {
         t = &alarm_timers[i];
 
-- 
cgit v1.2.3


From 172061a0a0d98c974ea8d5ed715195237bc44225 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Oct 2012 15:28:36 +0100
Subject: main-loop: unify qemu_init_main_loop between QEMU and tools

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 main-loop.c |  5 ++++-
 main-loop.h | 10 ----------
 qemu-tool.c |  7 -------
 vl.c        |  5 -----
 4 files changed, 4 insertions(+), 23 deletions(-)

diff --git a/main-loop.c b/main-loop.c
index eb3b6e6253..baefe413d1 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -199,10 +199,13 @@ static int qemu_signal_init(void)
 }
 #endif
 
-int main_loop_init(void)
+int qemu_init_main_loop(void)
 {
     int ret;
 
+    init_clocks();
+    init_timer_alarm();
+
     qemu_mutex_lock_iothread();
     ret = qemu_signal_init();
     if (ret) {
diff --git a/main-loop.h b/main-loop.h
index dce1cd9d7c..91a0aff9c7 100644
--- a/main-loop.h
+++ b/main-loop.h
@@ -42,16 +42,6 @@
  */
 int qemu_init_main_loop(void);
 
-/**
- * main_loop_init: Initializes main loop
- *
- * Internal (but shared for compatibility reasons) initialization routine
- * for the main loop. This should not be used by applications directly,
- * use qemu_init_main_loop() instead.
- *
- */
-int main_loop_init(void);
-
 /**
  * main_loop_wait: Run one iteration of the main loop.
  *
diff --git a/qemu-tool.c b/qemu-tool.c
index 84273ae077..28a4e8d42a 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -92,13 +92,6 @@ void qemu_clock_warp(QEMUClock *clock)
 {
 }
 
-int qemu_init_main_loop(void)
-{
-    init_clocks();
-    init_timer_alarm();
-    return main_loop_init();
-}
-
 void slirp_update_timeout(uint32_t *timeout)
 {
 }
diff --git a/vl.c b/vl.c
index 9f99ef4763..b3186fa425 100644
--- a/vl.c
+++ b/vl.c
@@ -2357,11 +2357,6 @@ static void free_and_trace(gpointer mem)
     free(mem);
 }
 
-int qemu_init_main_loop(void)
-{
-    return main_loop_init();
-}
-
 int main(int argc, char **argv, char **envp)
 {
     int i;
-- 
cgit v1.2.3


From 3f4cdf151145f7eaa3480aea5d81d7def2f85a68 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Oct 2012 18:03:10 +0100
Subject: qemu-tool: do not depend on qemu-timer.c

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qemu-tool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qemu-tool.c b/qemu-tool.c
index 28a4e8d42a..b46631e422 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -70,7 +70,7 @@ void monitor_protocol_event(MonitorEvent event, QObject *data)
 
 int64_t cpu_get_clock(void)
 {
-    return qemu_get_clock_ns(rt_clock);
+    return get_clock_realtime();
 }
 
 int64_t cpu_get_icount(void)
-- 
cgit v1.2.3


From 63186e56c8b5f1c988bfb0d707e76f0f0dceef90 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 30 Oct 2012 09:30:17 +0100
Subject: build: opts-visitor is not really part of QAPI

It is only used by QEMU itself, do not build it into the tests.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qapi/Makefile.objs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/qapi/Makefile.objs b/qapi/Makefile.objs
index 5f5846e767..f9bd3b9910 100644
--- a/qapi/Makefile.objs
+++ b/qapi/Makefile.objs
@@ -1,3 +1,5 @@
 qapi-obj-y = qapi-visit-core.o qapi-dealloc-visitor.o qmp-input-visitor.o
 qapi-obj-y += qmp-output-visitor.o qmp-registry.o qmp-dispatch.o
-qapi-obj-y += string-input-visitor.o string-output-visitor.o opts-visitor.o
+qapi-obj-y += string-input-visitor.o string-output-visitor.o
+
+common-obj-y += opts-visitor.o
-- 
cgit v1.2.3


From 136594f19aa6370e77a50bd9bba5db77def6ec8f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Oct 2012 15:46:15 +0100
Subject: build: do not include main loop where it is not actually used

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Makefile       | 6 +++---
 Makefile.objs  | 5 ++---
 tests/Makefile | 8 ++++----
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index 4bfeeab80f..3ff63df732 100644
--- a/Makefile
+++ b/Makefile
@@ -160,7 +160,7 @@ endif
 qemu-img.o: qemu-img-cmds.h
 
 tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \
-	main-loop.o notify.o iohandler.o async.o error.o
+	main-loop.o iohandler.o error.o
 tools-obj-$(CONFIG_POSIX) += compatfd.o
 
 qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y)
@@ -169,7 +169,7 @@ qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y)
 
 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o
 
-vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) $(tools-obj-y) qemu-timer-common.o libcacard/vscclient.o
+vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) libcacard/vscclient.o
 	$(call quiet-command,$(CC) $(LDFLAGS) -o $@ $^ $(libcacard_libs) $(LIBS),"  LINK  $@")
 
 fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/virtio-9p-marshal.o oslib-posix.o $(trace-obj-y)
@@ -212,7 +212,7 @@ $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
 QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
 $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)
 
-qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(tools-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y)
+qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(oslib-obj-y) $(trace-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y)
 
 QEMULIBS=libuser libdis libdis-user
 
diff --git a/Makefile.objs b/Makefile.objs
index 9e36166e8b..54daa9f110 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -228,9 +228,8 @@ universal-obj-y += $(qapi-obj-y)
 ######################################################################
 # guest agent
 
-qga-obj-y = qga/ qemu-ga.o module.o
-qga-obj-$(CONFIG_WIN32) += oslib-win32.o
-qga-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-sockets.o qemu-option.o
+qga-obj-y = qga/ qemu-ga.o module.o qemu-tool.o
+qga-obj-$(CONFIG_POSIX) += qemu-sockets.o qemu-option.o
 
 vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
 
diff --git a/tests/Makefile b/tests/Makefile
index 945c82311e..9bf0765de3 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -36,7 +36,7 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \
 	tests/test-qmp-input-visitor.o tests/test-qmp-input-strict.o \
 	tests/test-qmp-commands.o tests/test-visitor-serialization.o
 
-test-qapi-obj-y =  $(qobject-obj-y) $(qapi-obj-y) $(tools-obj-y)
+test-qapi-obj-y =  $(qobject-obj-y) $(qapi-obj-y) qemu-tool.o
 test-qapi-obj-y += tests/test-qapi-visit.o tests/test-qapi-types.o
 test-qapi-obj-y += module.o
 
@@ -47,8 +47,8 @@ tests/check-qstring$(EXESUF): tests/check-qstring.o qstring.o
 tests/check-qdict$(EXESUF): tests/check-qdict.o qdict.o qfloat.o qint.o qstring.o qbool.o qlist.o
 tests/check-qlist$(EXESUF): tests/check-qlist.o qlist.o qint.o
 tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o
-tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) $(tools-obj-y)
-tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y) iov.o
+tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) qemu-tool.o
+tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y) $(block-obj-y) iov.o
 tests/test-iov$(EXESUF): tests/test-iov.o iov.o
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
@@ -81,7 +81,7 @@ TARGETS=$(patsubst %-softmmu,%, $(filter %-softmmu,$(TARGET_DIRS)))
 QTEST_TARGETS=$(foreach TARGET,$(TARGETS), $(if $(check-qtest-$(TARGET)-y), $(TARGET),))
 check-qtest-$(CONFIG_POSIX)=$(foreach TARGET,$(TARGETS), $(check-qtest-$(TARGET)-y))
 
-qtest-obj-y = tests/libqtest.o $(oslib-obj-y) $(tools-obj-y)
+qtest-obj-y = tests/libqtest.o $(oslib-obj-y)
 $(check-qtest-y): $(qtest-obj-y)
 
 .PHONY: check-help
-- 
cgit v1.2.3


From fc97a652de3e54394ca4d0e5e5d689fd8aba8b6f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 May 2010 17:26:13 +0200
Subject: event_notifier: add Win32 implementation

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Makefile.objs          |  3 ++-
 event_notifier-posix.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
 event_notifier-win32.c | 59 ++++++++++++++++++++++++++++++++++++++++++++
 event_notifier.c       | 67 --------------------------------------------------
 event_notifier.h       | 17 +++++++++++--
 qemu-os-win32.h        |  1 -
 6 files changed, 143 insertions(+), 71 deletions(-)
 create mode 100644 event_notifier-posix.c
 create mode 100644 event_notifier-win32.c
 delete mode 100644 event_notifier.c

diff --git a/Makefile.objs b/Makefile.objs
index 54daa9f110..5b39c33560 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -94,7 +94,8 @@ common-obj-y += bt-host.o bt-vhci.o
 common-obj-y += dma-helpers.o
 common-obj-y += acl.o
 common-obj-$(CONFIG_POSIX) += compatfd.o
-common-obj-y += event_notifier.o
+common-obj-$(CONFIG_POSIX) += event_notifier-posix.o
+common-obj-$(CONFIG_WIN32) += event_notifier-win32.o
 common-obj-y += qemu-timer.o qemu-timer-common.o
 common-obj-y += qtest.o
 common-obj-y += vl.o
diff --git a/event_notifier-posix.c b/event_notifier-posix.c
new file mode 100644
index 0000000000..2c207e1399
--- /dev/null
+++ b/event_notifier-posix.c
@@ -0,0 +1,67 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "event_notifier.h"
+#include "qemu-char.h"
+
+#ifdef CONFIG_EVENTFD
+#include <sys/eventfd.h>
+#endif
+
+void event_notifier_init_fd(EventNotifier *e, int fd)
+{
+    e->fd = fd;
+}
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+#ifdef CONFIG_EVENTFD
+    int fd = eventfd(!!active, EFD_NONBLOCK | EFD_CLOEXEC);
+    if (fd < 0)
+        return -errno;
+    e->fd = fd;
+    return 0;
+#else
+    return -ENOSYS;
+#endif
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+    close(e->fd);
+}
+
+int event_notifier_get_fd(EventNotifier *e)
+{
+    return e->fd;
+}
+
+int event_notifier_set_handler(EventNotifier *e,
+                               EventNotifierHandler *handler)
+{
+    return qemu_set_fd_handler(e->fd, (IOHandler *)handler, NULL, e);
+}
+
+int event_notifier_set(EventNotifier *e)
+{
+    uint64_t value = 1;
+    int r = write(e->fd, &value, sizeof(value));
+    return r == sizeof(value);
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+    uint64_t value;
+    int r = read(e->fd, &value, sizeof(value));
+    return r == sizeof(value);
+}
diff --git a/event_notifier-win32.c b/event_notifier-win32.c
new file mode 100644
index 0000000000..c723dadf31
--- /dev/null
+++ b/event_notifier-win32.c
@@ -0,0 +1,59 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "event_notifier.h"
+#include "main-loop.h"
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+    e->event = CreateEvent(NULL, FALSE, FALSE, NULL);
+    assert(e->event);
+    return 0;
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+    CloseHandle(e->event);
+}
+
+HANDLE event_notifier_get_handle(EventNotifier *e)
+{
+    return e->event;
+}
+
+int event_notifier_set_handler(EventNotifier *e,
+                               EventNotifierHandler *handler)
+{
+    if (handler) {
+        return qemu_add_wait_object(e->event, (IOHandler *)handler, e);
+    } else {
+        qemu_del_wait_object(e->event, (IOHandler *)handler, e);
+        return 0;
+    }
+}
+
+int event_notifier_set(EventNotifier *e)
+{
+    SetEvent(e->event);
+    return 0;
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+    int ret = WaitForSingleObject(e->event, 0);
+    if (ret == WAIT_OBJECT_0) {
+        ResetEvent(e->event);
+        return true;
+    }
+    return false;
+}
diff --git a/event_notifier.c b/event_notifier.c
deleted file mode 100644
index 2c207e1399..0000000000
--- a/event_notifier.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * event notifier support
- *
- * Copyright Red Hat, Inc. 2010
- *
- * Authors:
- *  Michael S. Tsirkin <mst@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu-common.h"
-#include "event_notifier.h"
-#include "qemu-char.h"
-
-#ifdef CONFIG_EVENTFD
-#include <sys/eventfd.h>
-#endif
-
-void event_notifier_init_fd(EventNotifier *e, int fd)
-{
-    e->fd = fd;
-}
-
-int event_notifier_init(EventNotifier *e, int active)
-{
-#ifdef CONFIG_EVENTFD
-    int fd = eventfd(!!active, EFD_NONBLOCK | EFD_CLOEXEC);
-    if (fd < 0)
-        return -errno;
-    e->fd = fd;
-    return 0;
-#else
-    return -ENOSYS;
-#endif
-}
-
-void event_notifier_cleanup(EventNotifier *e)
-{
-    close(e->fd);
-}
-
-int event_notifier_get_fd(EventNotifier *e)
-{
-    return e->fd;
-}
-
-int event_notifier_set_handler(EventNotifier *e,
-                               EventNotifierHandler *handler)
-{
-    return qemu_set_fd_handler(e->fd, (IOHandler *)handler, NULL, e);
-}
-
-int event_notifier_set(EventNotifier *e)
-{
-    uint64_t value = 1;
-    int r = write(e->fd, &value, sizeof(value));
-    return r == sizeof(value);
-}
-
-int event_notifier_test_and_clear(EventNotifier *e)
-{
-    uint64_t value;
-    int r = read(e->fd, &value, sizeof(value));
-    return r == sizeof(value);
-}
diff --git a/event_notifier.h b/event_notifier.h
index f0ec2f2171..b283a497ce 100644
--- a/event_notifier.h
+++ b/event_notifier.h
@@ -15,18 +15,31 @@
 
 #include "qemu-common.h"
 
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
 struct EventNotifier {
+#ifdef _WIN32
+    HANDLE event;
+#else
     int fd;
+#endif
 };
 
 typedef void EventNotifierHandler(EventNotifier *);
 
-void event_notifier_init_fd(EventNotifier *, int fd);
 int event_notifier_init(EventNotifier *, int active);
 void event_notifier_cleanup(EventNotifier *);
-int event_notifier_get_fd(EventNotifier *);
 int event_notifier_set(EventNotifier *);
 int event_notifier_test_and_clear(EventNotifier *);
 int event_notifier_set_handler(EventNotifier *, EventNotifierHandler *);
 
+#ifdef CONFIG_POSIX
+void event_notifier_init_fd(EventNotifier *, int fd);
+int event_notifier_get_fd(EventNotifier *);
+#else
+HANDLE event_notifier_get_handle(EventNotifier *);
+#endif
+
 #endif
diff --git a/qemu-os-win32.h b/qemu-os-win32.h
index 8ba466dbfb..d0e9234d24 100644
--- a/qemu-os-win32.h
+++ b/qemu-os-win32.h
@@ -28,7 +28,6 @@
 
 #include <windows.h>
 #include <winsock2.h>
-#include "main-loop.h"
 
 /* Workaround for older versions of MinGW. */
 #ifndef ECONNREFUSED
-- 
cgit v1.2.3


From d0cc2fbfa607678866475383c508be84818ceb64 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 May 2010 17:17:04 +0200
Subject: event_notifier: enable it to use pipes

This takes the eventfd emulation code from the main loop.  When the
EventNotifier is used for the main loop too, we need this compatibility
code.

Without CONFIG_EVENTFD, event_notifier_get_fd is only usable for the
"read" side of the notifier, for example to set a select() handler.

The return value of event_notifier_set changes to the cleaner 0/-errno.
No caller is actually checking the return value.

Reviewed-by: Anthony Liguori <anthony@codemonkey.ws>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 event_notifier-posix.c | 85 ++++++++++++++++++++++++++++++++++++++++----------
 event_notifier.h       |  3 +-
 2 files changed, 71 insertions(+), 17 deletions(-)

diff --git a/event_notifier-posix.c b/event_notifier-posix.c
index 2c207e1399..6f3239a3fc 100644
--- a/event_notifier-posix.c
+++ b/event_notifier-posix.c
@@ -20,48 +20,101 @@
 
 void event_notifier_init_fd(EventNotifier *e, int fd)
 {
-    e->fd = fd;
+    e->rfd = fd;
+    e->wfd = fd;
 }
 
 int event_notifier_init(EventNotifier *e, int active)
 {
+    int fds[2];
+    int ret;
+
 #ifdef CONFIG_EVENTFD
-    int fd = eventfd(!!active, EFD_NONBLOCK | EFD_CLOEXEC);
-    if (fd < 0)
-        return -errno;
-    e->fd = fd;
-    return 0;
+    ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
 #else
-    return -ENOSYS;
+    ret = -1;
+    errno = ENOSYS;
 #endif
+    if (ret >= 0) {
+        e->rfd = e->wfd = ret;
+    } else {
+        if (errno != ENOSYS) {
+            return -errno;
+        }
+        if (qemu_pipe(fds) < 0) {
+            return -errno;
+        }
+        ret = fcntl_setfl(fds[0], O_NONBLOCK);
+        if (ret < 0) {
+            ret = -errno;
+            goto fail;
+        }
+        ret = fcntl_setfl(fds[1], O_NONBLOCK);
+        if (ret < 0) {
+            ret = -errno;
+            goto fail;
+        }
+        e->rfd = fds[0];
+        e->wfd = fds[1];
+    }
+    if (active) {
+        event_notifier_set(e);
+    }
+    return 0;
+
+fail:
+    close(fds[0]);
+    close(fds[1]);
+    return ret;
 }
 
 void event_notifier_cleanup(EventNotifier *e)
 {
-    close(e->fd);
+    if (e->rfd != e->wfd) {
+        close(e->rfd);
+    }
+    close(e->wfd);
 }
 
 int event_notifier_get_fd(EventNotifier *e)
 {
-    return e->fd;
+    return e->rfd;
 }
 
 int event_notifier_set_handler(EventNotifier *e,
                                EventNotifierHandler *handler)
 {
-    return qemu_set_fd_handler(e->fd, (IOHandler *)handler, NULL, e);
+    return qemu_set_fd_handler(e->rfd, (IOHandler *)handler, NULL, e);
 }
 
 int event_notifier_set(EventNotifier *e)
 {
-    uint64_t value = 1;
-    int r = write(e->fd, &value, sizeof(value));
-    return r == sizeof(value);
+    static const uint64_t value = 1;
+    ssize_t ret;
+
+    do {
+        ret = write(e->wfd, &value, sizeof(value));
+    } while (ret < 0 && errno == EINTR);
+
+    /* EAGAIN is fine, a read must be pending.  */
+    if (ret < 0 && errno != EAGAIN) {
+        return -errno;
+    }
+    return 0;
 }
 
 int event_notifier_test_and_clear(EventNotifier *e)
 {
-    uint64_t value;
-    int r = read(e->fd, &value, sizeof(value));
-    return r == sizeof(value);
+    int value;
+    ssize_t len;
+    char buffer[512];
+
+    /* Drain the notify pipe.  For eventfd, only 8 bytes will be read.  */
+    value = 0;
+    do {
+        len = read(e->rfd, buffer, sizeof(buffer));
+        value |= (len > 0);
+    } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
+
+    return value;
 }
diff --git a/event_notifier.h b/event_notifier.h
index b283a497ce..88b57af7ce 100644
--- a/event_notifier.h
+++ b/event_notifier.h
@@ -23,7 +23,8 @@ struct EventNotifier {
 #ifdef _WIN32
     HANDLE event;
 #else
-    int fd;
+    int rfd;
+    int wfd;
 #endif
 };
 
-- 
cgit v1.2.3


From 1c53786fbdbdf20a3a6c556e09abb4d63ee7843e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 30 Oct 2012 00:17:12 +0100
Subject: vl: init main loop earlier

Otherwise, chardevs will not be able to create a bottom half as soon
as that will require an AioContext.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 vl.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vl.c b/vl.c
index b3186fa425..f84e969a03 100644
--- a/vl.c
+++ b/vl.c
@@ -3311,6 +3311,12 @@ int main(int argc, char **argv, char **envp)
     }
     loc_set_none();
 
+    qemu_init_cpu_loop();
+    if (qemu_init_main_loop()) {
+        fprintf(stderr, "qemu_init_main_loop failed\n");
+        exit(1);
+    }
+
     if (qemu_opts_foreach(qemu_find_opts("sandbox"), parse_sandbox, NULL, 0)) {
         exit(1);
     }
@@ -3463,12 +3469,6 @@ int main(int argc, char **argv, char **envp)
 
     configure_accelerator();
 
-    qemu_init_cpu_loop();
-    if (qemu_init_main_loop()) {
-        fprintf(stderr, "qemu_init_main_loop failed\n");
-        exit(1);
-    }
-
     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
     if (machine_opts) {
         kernel_filename = qemu_opt_get(machine_opts, "kernel");
-- 
cgit v1.2.3


From b078dc3cfec2d6d037caef91204ebf0a78e7ac06 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 13 Sep 2012 13:43:38 +0200
Subject: aio: change qemu_aio_set_fd_handler to return void

Reviewed-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 aio.c      | 12 +++++-------
 qemu-aio.h | 10 +++++-----
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/aio.c b/aio.c
index c738a4e15d..e062aab2af 100644
--- a/aio.c
+++ b/aio.c
@@ -53,11 +53,11 @@ static AioHandler *find_aio_handler(int fd)
     return NULL;
 }
 
-int qemu_aio_set_fd_handler(int fd,
-                            IOHandler *io_read,
-                            IOHandler *io_write,
-                            AioFlushHandler *io_flush,
-                            void *opaque)
+void qemu_aio_set_fd_handler(int fd,
+                             IOHandler *io_read,
+                             IOHandler *io_write,
+                             AioFlushHandler *io_flush,
+                             void *opaque)
 {
     AioHandler *node;
 
@@ -93,8 +93,6 @@ int qemu_aio_set_fd_handler(int fd,
     }
 
     qemu_set_fd_handler2(fd, NULL, io_read, io_write, opaque);
-
-    return 0;
 }
 
 void qemu_aio_flush(void)
diff --git a/qemu-aio.h b/qemu-aio.h
index bfdd35f02c..27a7e21220 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -60,10 +60,10 @@ bool qemu_aio_wait(void);
  * Code that invokes AIO completion functions should rely on this function
  * instead of qemu_set_fd_handler[2].
  */
-int qemu_aio_set_fd_handler(int fd,
-                            IOHandler *io_read,
-                            IOHandler *io_write,
-                            AioFlushHandler *io_flush,
-                            void *opaque);
+void qemu_aio_set_fd_handler(int fd,
+                             IOHandler *io_read,
+                             IOHandler *io_write,
+                             AioFlushHandler *io_flush,
+                             void *opaque);
 
 #endif
-- 
cgit v1.2.3


From 9958c351eee5b34051fd8061fe24f490ceca1334 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 9 Jun 2012 03:44:00 +0200
Subject: aio: provide platform-independent API

This adds to aio.c a platform-independent API based on EventNotifiers, that
can be used by both POSIX and Win32.

Reviewed-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Makefile.objs |  4 ++--
 aio.c         |  9 +++++++++
 qemu-aio.h    | 19 ++++++++++++++++++-
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/Makefile.objs b/Makefile.objs
index 5b39c33560..98046fc64a 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -46,6 +46,8 @@ block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o
 block-obj-y += qemu-progress.o qemu-sockets.o uri.o notify.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
+block-obj-$(CONFIG_POSIX) += event_notifier-posix.o
+block-obj-$(CONFIG_WIN32) += event_notifier-win32.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 block-obj-y += block/
 block-obj-y += $(qapi-obj-y) qapi-types.o qapi-visit.o
@@ -94,8 +96,6 @@ common-obj-y += bt-host.o bt-vhci.o
 common-obj-y += dma-helpers.o
 common-obj-y += acl.o
 common-obj-$(CONFIG_POSIX) += compatfd.o
-common-obj-$(CONFIG_POSIX) += event_notifier-posix.o
-common-obj-$(CONFIG_WIN32) += event_notifier-win32.o
 common-obj-y += qemu-timer.o qemu-timer-common.o
 common-obj-y += qtest.o
 common-obj-y += vl.o
diff --git a/aio.c b/aio.c
index e062aab2af..44214e1ffc 100644
--- a/aio.c
+++ b/aio.c
@@ -95,6 +95,15 @@ void qemu_aio_set_fd_handler(int fd,
     qemu_set_fd_handler2(fd, NULL, io_read, io_write, opaque);
 }
 
+void qemu_aio_set_event_notifier(EventNotifier *notifier,
+                                 EventNotifierHandler *io_read,
+                                 AioFlushEventNotifierHandler *io_flush)
+{
+    qemu_aio_set_fd_handler(event_notifier_get_fd(notifier),
+                            (IOHandler *)io_read, NULL,
+                            (AioFlushHandler *)io_flush, notifier);
+}
+
 void qemu_aio_flush(void)
 {
     while (qemu_aio_wait());
diff --git a/qemu-aio.h b/qemu-aio.h
index 27a7e21220..dc416a5239 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -16,6 +16,7 @@
 
 #include "qemu-common.h"
 #include "qemu-char.h"
+#include "event_notifier.h"
 
 typedef struct BlockDriverAIOCB BlockDriverAIOCB;
 typedef void BlockDriverCompletionFunc(void *opaque, int ret);
@@ -39,7 +40,7 @@ void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
 void qemu_aio_release(void *p);
 
 /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
-typedef int (AioFlushHandler)(void *opaque);
+typedef int (AioFlushEventNotifierHandler)(EventNotifier *e);
 
 /* Flush any pending AIO operation. This function will block until all
  * outstanding AIO operations have been completed or cancelled. */
@@ -53,6 +54,10 @@ void qemu_aio_flush(void);
  * Return whether there is still any pending AIO operation.  */
 bool qemu_aio_wait(void);
 
+#ifdef CONFIG_POSIX
+/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
+typedef int (AioFlushHandler)(void *opaque);
+
 /* Register a file descriptor and associated callbacks.  Behaves very similarly
  * to qemu_set_fd_handler2.  Unlike qemu_set_fd_handler2, these callbacks will
  * be invoked when using either qemu_aio_wait() or qemu_aio_flush().
@@ -65,5 +70,17 @@ void qemu_aio_set_fd_handler(int fd,
                              IOHandler *io_write,
                              AioFlushHandler *io_flush,
                              void *opaque);
+#endif
+
+/* Register an event notifier and associated callbacks.  Behaves very similarly
+ * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
+ * will be invoked when using either qemu_aio_wait() or qemu_aio_flush().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of event_notifier_set_handler.
+ */
+void qemu_aio_set_event_notifier(EventNotifier *notifier,
+                                 EventNotifierHandler *io_read,
+                                 AioFlushEventNotifierHandler *io_flush);
 
 #endif
-- 
cgit v1.2.3


From f627aab1ccea119fd94ca9e9df120cea6aab0c67 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Oct 2012 23:45:23 +0100
Subject: aio: introduce AioContext, move bottom halves there

Start introducing AioContext, which will let us remove globals from
aio.c/async.c, and introduce multiple I/O threads.

The bottom half functions now take an additional AioContext argument.
A bottom half is created with a specific AioContext that remains the
same throughout the lifetime.  qemu_bh_new is just a wrapper that
uses a global context.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 aio.c                 |  2 --
 async.c               | 30 +++++++++----------
 hw/hw.h               |  1 +
 iohandler.c           |  1 +
 linux-aio.c           |  1 +
 main-loop.c           | 18 +++++++++++-
 main-loop.h           | 55 ++---------------------------------
 qemu-aio.h            | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 qemu-char.h           |  1 +
 qemu-common.h         |  1 +
 qemu-coroutine-lock.c |  2 +-
 11 files changed, 119 insertions(+), 72 deletions(-)

diff --git a/aio.c b/aio.c
index 44214e1ffc..7e3fe708d2 100644
--- a/aio.c
+++ b/aio.c
@@ -18,8 +18,6 @@
 #include "qemu-queue.h"
 #include "qemu_socket.h"
 
-typedef struct AioHandler AioHandler;
-
 /* The list of registered AIO handlers */
 static QLIST_HEAD(, AioHandler) aio_handlers;
 
diff --git a/async.c b/async.c
index 85cc6410c5..189ee1beb5 100644
--- a/async.c
+++ b/async.c
@@ -26,9 +26,6 @@
 #include "qemu-aio.h"
 #include "main-loop.h"
 
-/* Anchor of the list of Bottom Halves belonging to the context */
-static struct QEMUBH *first_bh;
-
 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
 
@@ -41,27 +38,26 @@ struct QEMUBH {
     bool deleted;
 };
 
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
 {
     QEMUBH *bh;
     bh = g_malloc0(sizeof(QEMUBH));
     bh->cb = cb;
     bh->opaque = opaque;
-    bh->next = first_bh;
-    first_bh = bh;
+    bh->next = ctx->first_bh;
+    ctx->first_bh = bh;
     return bh;
 }
 
-int qemu_bh_poll(void)
+int aio_bh_poll(AioContext *ctx)
 {
     QEMUBH *bh, **bhp, *next;
     int ret;
-    static int nesting = 0;
 
-    nesting++;
+    ctx->walking_bh++;
 
     ret = 0;
-    for (bh = first_bh; bh; bh = next) {
+    for (bh = ctx->first_bh; bh; bh = next) {
         next = bh->next;
         if (!bh->deleted && bh->scheduled) {
             bh->scheduled = 0;
@@ -72,11 +68,11 @@ int qemu_bh_poll(void)
         }
     }
 
-    nesting--;
+    ctx->walking_bh--;
 
     /* remove deleted bhs */
-    if (!nesting) {
-        bhp = &first_bh;
+    if (!ctx->walking_bh) {
+        bhp = &ctx->first_bh;
         while (*bhp) {
             bh = *bhp;
             if (bh->deleted) {
@@ -120,11 +116,11 @@ void qemu_bh_delete(QEMUBH *bh)
     bh->deleted = 1;
 }
 
-void qemu_bh_update_timeout(uint32_t *timeout)
+void aio_bh_update_timeout(AioContext *ctx, uint32_t *timeout)
 {
     QEMUBH *bh;
 
-    for (bh = first_bh; bh; bh = bh->next) {
+    for (bh = ctx->first_bh; bh; bh = bh->next) {
         if (!bh->deleted && bh->scheduled) {
             if (bh->idle) {
                 /* idle bottom halves will be polled at least
@@ -140,3 +136,7 @@ void qemu_bh_update_timeout(uint32_t *timeout)
     }
 }
 
+AioContext *aio_context_new(void)
+{
+    return g_new0(AioContext, 1);
+}
diff --git a/hw/hw.h b/hw/hw.h
index b337ee3042..f530f6f41a 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -10,6 +10,7 @@
 
 #include "ioport.h"
 #include "irq.h"
+#include "qemu-aio.h"
 #include "qemu-file.h"
 #include "vmstate.h"
 #include "qemu-log.h"
diff --git a/iohandler.c b/iohandler.c
index a2d871bb91..60460a6f88 100644
--- a/iohandler.c
+++ b/iohandler.c
@@ -26,6 +26,7 @@
 #include "qemu-common.h"
 #include "qemu-char.h"
 #include "qemu-queue.h"
+#include "qemu-aio.h"
 #include "main-loop.h"
 
 #ifndef _WIN32
diff --git a/linux-aio.c b/linux-aio.c
index ce9b5d4be8..f3d8ef33ca 100644
--- a/linux-aio.c
+++ b/linux-aio.c
@@ -9,6 +9,7 @@
  */
 #include "qemu-common.h"
 #include "qemu-aio.h"
+#include "qemu-queue.h"
 #include "block/raw-posix-aio.h"
 
 #include <sys/eventfd.h>
diff --git a/main-loop.c b/main-loop.c
index baefe413d1..40fdbd3770 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -26,6 +26,7 @@
 #include "qemu-timer.h"
 #include "slirp/slirp.h"
 #include "main-loop.h"
+#include "qemu-aio.h"
 
 #ifndef _WIN32
 
@@ -199,6 +200,8 @@ static int qemu_signal_init(void)
 }
 #endif
 
+static AioContext *qemu_aio_context;
+
 int qemu_init_main_loop(void)
 {
     int ret;
@@ -218,6 +221,7 @@ int qemu_init_main_loop(void)
         return ret;
     }
 
+    qemu_aio_context = aio_context_new();
     return 0;
 }
 
@@ -481,7 +485,7 @@ int main_loop_wait(int nonblocking)
     if (nonblocking) {
         timeout = 0;
     } else {
-        qemu_bh_update_timeout(&timeout);
+        aio_bh_update_timeout(qemu_aio_context, &timeout);
     }
 
     /* poll any events */
@@ -510,3 +514,15 @@ int main_loop_wait(int nonblocking)
 
     return ret;
 }
+
+/* Functions to operate on the main QEMU AioContext.  */
+
+QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+{
+    return aio_bh_new(qemu_aio_context, cb, opaque);
+}
+
+int qemu_bh_poll(void)
+{
+    return aio_bh_poll(qemu_aio_context);
+}
diff --git a/main-loop.h b/main-loop.h
index 91a0aff9c7..1d1a56b858 100644
--- a/main-loop.h
+++ b/main-loop.h
@@ -25,6 +25,8 @@
 #ifndef QEMU_MAIN_LOOP_H
 #define QEMU_MAIN_LOOP_H 1
 
+#include "qemu-aio.h"
+
 #define SIG_IPI SIGUSR1
 
 /**
@@ -163,7 +165,6 @@ void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
 
 typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
 typedef int IOCanReadHandler(void *opaque);
-typedef void IOHandler(void *opaque);
 
 /**
  * qemu_set_fd_handler2: Register a file descriptor with the main loop
@@ -244,56 +245,6 @@ int qemu_set_fd_handler(int fd,
                         IOHandler *fd_write,
                         void *opaque);
 
-typedef struct QEMUBH QEMUBH;
-typedef void QEMUBHFunc(void *opaque);
-
-/**
- * qemu_bh_new: Allocate a new bottom half structure.
- *
- * Bottom halves are lightweight callbacks whose invocation is guaranteed
- * to be wait-free, thread-safe and signal-safe.  The #QEMUBH structure
- * is opaque and must be allocated prior to its use.
- */
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
-
-/**
- * qemu_bh_schedule: Schedule a bottom half.
- *
- * Scheduling a bottom half interrupts the main loop and causes the
- * execution of the callback that was passed to qemu_bh_new.
- *
- * Bottom halves that are scheduled from a bottom half handler are instantly
- * invoked.  This can create an infinite loop if a bottom half handler
- * schedules itself.
- *
- * @bh: The bottom half to be scheduled.
- */
-void qemu_bh_schedule(QEMUBH *bh);
-
-/**
- * qemu_bh_cancel: Cancel execution of a bottom half.
- *
- * Canceling execution of a bottom half undoes the effect of calls to
- * qemu_bh_schedule without freeing its resources yet.  While cancellation
- * itself is also wait-free and thread-safe, it can of course race with the
- * loop that executes bottom halves unless you are holding the iothread
- * mutex.  This makes it mostly useless if you are not holding the mutex.
- *
- * @bh: The bottom half to be canceled.
- */
-void qemu_bh_cancel(QEMUBH *bh);
-
-/**
- *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
- *
- * Deleting a bottom half frees the memory that was allocated for it by
- * qemu_bh_new.  It also implies canceling the bottom half if it was
- * scheduled.
- *
- * @bh: The bottom half to be deleted.
- */
-void qemu_bh_delete(QEMUBH *bh);
-
 #ifdef CONFIG_POSIX
 /**
  * qemu_add_child_watch: Register a child process for reaping.
@@ -349,8 +300,8 @@ void qemu_fd_register(int fd);
 void qemu_iohandler_fill(int *pnfds, fd_set *readfds, fd_set *writefds, fd_set *xfds);
 void qemu_iohandler_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds, int rc);
 
+QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
 void qemu_bh_schedule_idle(QEMUBH *bh);
 int qemu_bh_poll(void);
-void qemu_bh_update_timeout(uint32_t *timeout);
 
 #endif
diff --git a/qemu-aio.h b/qemu-aio.h
index dc416a5239..2ed6ad3723 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -15,7 +15,6 @@
 #define QEMU_AIO_H
 
 #include "qemu-common.h"
-#include "qemu-char.h"
 #include "event_notifier.h"
 
 typedef struct BlockDriverAIOCB BlockDriverAIOCB;
@@ -39,9 +38,87 @@ void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
                    BlockDriverCompletionFunc *cb, void *opaque);
 void qemu_aio_release(void *p);
 
+typedef struct AioHandler AioHandler;
+typedef void QEMUBHFunc(void *opaque);
+typedef void IOHandler(void *opaque);
+
+typedef struct AioContext {
+    /* Anchor of the list of Bottom Halves belonging to the context */
+    struct QEMUBH *first_bh;
+
+    /* A simple lock used to protect the first_bh list, and ensure that
+     * no callbacks are removed while we're walking and dispatching callbacks.
+     */
+    int walking_bh;
+} AioContext;
+
 /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
 typedef int (AioFlushEventNotifierHandler)(EventNotifier *e);
 
+/**
+ * aio_context_new: Allocate a new AioContext.
+ *
+ * AioContext provide a mini event-loop that can be waited on synchronously.
+ * They also provide bottom halves, a service to execute a piece of code
+ * as soon as possible.
+ */
+AioContext *aio_context_new(void);
+
+/**
+ * aio_bh_new: Allocate a new bottom half structure.
+ *
+ * Bottom halves are lightweight callbacks whose invocation is guaranteed
+ * to be wait-free, thread-safe and signal-safe.  The #QEMUBH structure
+ * is opaque and must be allocated prior to its use.
+ */
+QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+
+/**
+ * aio_bh_poll: Poll bottom halves for an AioContext.
+ *
+ * These are internal functions used by the QEMU main loop.
+ */
+int aio_bh_poll(AioContext *ctx);
+void aio_bh_update_timeout(AioContext *ctx, uint32_t *timeout);
+
+/**
+ * qemu_bh_schedule: Schedule a bottom half.
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked.  This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ */
+void qemu_bh_schedule(QEMUBH *bh);
+
+/**
+ * qemu_bh_cancel: Cancel execution of a bottom half.
+ *
+ * Canceling execution of a bottom half undoes the effect of calls to
+ * qemu_bh_schedule without freeing its resources yet.  While cancellation
+ * itself is also wait-free and thread-safe, it can of course race with the
+ * loop that executes bottom halves unless you are holding the iothread
+ * mutex.  This makes it mostly useless if you are not holding the mutex.
+ *
+ * @bh: The bottom half to be canceled.
+ */
+void qemu_bh_cancel(QEMUBH *bh);
+
+/**
+ *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
+ *
+ * Deleting a bottom half frees the memory that was allocated for it by
+ * qemu_bh_new.  It also implies canceling the bottom half if it was
+ * scheduled.
+ *
+ * @bh: The bottom half to be deleted.
+ */
+void qemu_bh_delete(QEMUBH *bh);
+
 /* Flush any pending AIO operation. This function will block until all
  * outstanding AIO operations have been completed or cancelled. */
 void qemu_aio_flush(void);
diff --git a/qemu-char.h b/qemu-char.h
index 486644b3bd..5087168bd7 100644
--- a/qemu-char.h
+++ b/qemu-char.h
@@ -5,6 +5,7 @@
 #include "qemu-queue.h"
 #include "qemu-option.h"
 #include "qemu-config.h"
+#include "qemu-aio.h"
 #include "qobject.h"
 #include "qstring.h"
 #include "main-loop.h"
diff --git a/qemu-common.h b/qemu-common.h
index 36ce522066..24e13ccb5e 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -14,6 +14,7 @@
 
 typedef struct QEMUTimer QEMUTimer;
 typedef struct QEMUFile QEMUFile;
+typedef struct QEMUBH QEMUBH;
 typedef struct DeviceState DeviceState;
 
 struct Monitor;
diff --git a/qemu-coroutine-lock.c b/qemu-coroutine-lock.c
index 26ad76bf50..9dda3f86c9 100644
--- a/qemu-coroutine-lock.c
+++ b/qemu-coroutine-lock.c
@@ -26,7 +26,7 @@
 #include "qemu-coroutine.h"
 #include "qemu-coroutine-int.h"
 #include "qemu-queue.h"
-#include "main-loop.h"
+#include "qemu-aio.h"
 #include "trace.h"
 
 static QTAILQ_HEAD(, Coroutine) unlock_bh_queue =
-- 
cgit v1.2.3


From a915f4bc977c4f3aab08a78023c1303664d1c606 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 13 Sep 2012 12:28:51 +0200
Subject: aio: add I/O handlers to the AioContext interface

With this patch, I/O handlers (including event notifier handlers) can be
attached to a single AioContext.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 aio.c       | 68 ++++++++++++++++++++++++-------------------------------------
 async.c     |  6 ++++++
 main-loop.c | 33 ++++++++++++++++++++++++++++++
 qemu-aio.h  | 42 +++++++++++++++++++++++++++++++-------
 4 files changed, 101 insertions(+), 48 deletions(-)

diff --git a/aio.c b/aio.c
index 7e3fe708d2..c89f1e95c1 100644
--- a/aio.c
+++ b/aio.c
@@ -18,15 +18,6 @@
 #include "qemu-queue.h"
 #include "qemu_socket.h"
 
-/* The list of registered AIO handlers */
-static QLIST_HEAD(, AioHandler) aio_handlers;
-
-/* This is a simple lock used to protect the aio_handlers list.  Specifically,
- * it's used to ensure that no callbacks are removed while we're walking and
- * dispatching callbacks.
- */
-static int walking_handlers;
-
 struct AioHandler
 {
     int fd;
@@ -38,11 +29,11 @@ struct AioHandler
     QLIST_ENTRY(AioHandler) node;
 };
 
-static AioHandler *find_aio_handler(int fd)
+static AioHandler *find_aio_handler(AioContext *ctx, int fd)
 {
     AioHandler *node;
 
-    QLIST_FOREACH(node, &aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
         if (node->fd == fd)
             if (!node->deleted)
                 return node;
@@ -51,21 +42,22 @@ static AioHandler *find_aio_handler(int fd)
     return NULL;
 }
 
-void qemu_aio_set_fd_handler(int fd,
-                             IOHandler *io_read,
-                             IOHandler *io_write,
-                             AioFlushHandler *io_flush,
-                             void *opaque)
+void aio_set_fd_handler(AioContext *ctx,
+                        int fd,
+                        IOHandler *io_read,
+                        IOHandler *io_write,
+                        AioFlushHandler *io_flush,
+                        void *opaque)
 {
     AioHandler *node;
 
-    node = find_aio_handler(fd);
+    node = find_aio_handler(ctx, fd);
 
     /* Are we deleting the fd handler? */
     if (!io_read && !io_write) {
         if (node) {
             /* If the lock is held, just mark the node as deleted */
-            if (walking_handlers)
+            if (ctx->walking_handlers)
                 node->deleted = 1;
             else {
                 /* Otherwise, delete it for real.  We can't just mark it as
@@ -81,7 +73,7 @@ void qemu_aio_set_fd_handler(int fd,
             /* Alloc and insert if it's not already there */
             node = g_malloc0(sizeof(AioHandler));
             node->fd = fd;
-            QLIST_INSERT_HEAD(&aio_handlers, node, node);
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
         }
         /* Update handler with latest information */
         node->io_read = io_read;
@@ -89,25 +81,19 @@ void qemu_aio_set_fd_handler(int fd,
         node->io_flush = io_flush;
         node->opaque = opaque;
     }
-
-    qemu_set_fd_handler2(fd, NULL, io_read, io_write, opaque);
-}
-
-void qemu_aio_set_event_notifier(EventNotifier *notifier,
-                                 EventNotifierHandler *io_read,
-                                 AioFlushEventNotifierHandler *io_flush)
-{
-    qemu_aio_set_fd_handler(event_notifier_get_fd(notifier),
-                            (IOHandler *)io_read, NULL,
-                            (AioFlushHandler *)io_flush, notifier);
 }
 
-void qemu_aio_flush(void)
+void aio_set_event_notifier(AioContext *ctx,
+                            EventNotifier *notifier,
+                            EventNotifierHandler *io_read,
+                            AioFlushEventNotifierHandler *io_flush)
 {
-    while (qemu_aio_wait());
+    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
+                       (IOHandler *)io_read, NULL,
+                       (AioFlushHandler *)io_flush, notifier);
 }
 
-bool qemu_aio_wait(void)
+bool aio_wait(AioContext *ctx)
 {
     AioHandler *node;
     fd_set rdfds, wrfds;
@@ -120,18 +106,18 @@ bool qemu_aio_wait(void)
      * Do not call select in this case, because it is possible that the caller
      * does not need a complete flush (as is the case for qemu_aio_wait loops).
      */
-    if (qemu_bh_poll()) {
+    if (aio_bh_poll(ctx)) {
         return true;
     }
 
-    walking_handlers++;
+    ctx->walking_handlers++;
 
     FD_ZERO(&rdfds);
     FD_ZERO(&wrfds);
 
     /* fill fd sets */
     busy = false;
-    QLIST_FOREACH(node, &aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
         /* If there aren't pending AIO operations, don't invoke callbacks.
          * Otherwise, if there are no AIO requests, qemu_aio_wait() would
          * wait indefinitely.
@@ -152,7 +138,7 @@ bool qemu_aio_wait(void)
         }
     }
 
-    walking_handlers--;
+    ctx->walking_handlers--;
 
     /* No AIO operations?  Get us out of here */
     if (!busy) {
@@ -166,11 +152,11 @@ bool qemu_aio_wait(void)
     if (ret > 0) {
         /* we have to walk very carefully in case
          * qemu_aio_set_fd_handler is called while we're walking */
-        node = QLIST_FIRST(&aio_handlers);
+        node = QLIST_FIRST(&ctx->aio_handlers);
         while (node) {
             AioHandler *tmp;
 
-            walking_handlers++;
+            ctx->walking_handlers++;
 
             if (!node->deleted &&
                 FD_ISSET(node->fd, &rdfds) &&
@@ -186,9 +172,9 @@ bool qemu_aio_wait(void)
             tmp = node;
             node = QLIST_NEXT(node, node);
 
-            walking_handlers--;
+            ctx->walking_handlers--;
 
-            if (!walking_handlers && tmp->deleted) {
+            if (!ctx->walking_handlers && tmp->deleted) {
                 QLIST_REMOVE(tmp, node);
                 g_free(tmp);
             }
diff --git a/async.c b/async.c
index 189ee1beb5..c99db79ac7 100644
--- a/async.c
+++ b/async.c
@@ -136,7 +136,13 @@ void aio_bh_update_timeout(AioContext *ctx, uint32_t *timeout)
     }
 }
 
+
 AioContext *aio_context_new(void)
 {
     return g_new0(AioContext, 1);
 }
+
+void aio_flush(AioContext *ctx)
+{
+    while (aio_wait(ctx));
+}
diff --git a/main-loop.c b/main-loop.c
index 40fdbd3770..8f0117e7aa 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -526,3 +526,36 @@ int qemu_bh_poll(void)
 {
     return aio_bh_poll(qemu_aio_context);
 }
+
+void qemu_aio_flush(void)
+{
+    aio_flush(qemu_aio_context);
+}
+
+bool qemu_aio_wait(void)
+{
+    return aio_wait(qemu_aio_context);
+}
+
+void qemu_aio_set_fd_handler(int fd,
+                             IOHandler *io_read,
+                             IOHandler *io_write,
+                             AioFlushHandler *io_flush,
+                             void *opaque)
+{
+    aio_set_fd_handler(qemu_aio_context, fd, io_read, io_write, io_flush,
+                       opaque);
+
+    qemu_set_fd_handler2(fd, NULL, io_read, io_write, opaque);
+}
+
+#ifdef CONFIG_POSIX
+void qemu_aio_set_event_notifier(EventNotifier *notifier,
+                                 EventNotifierHandler *io_read,
+                                 AioFlushEventNotifierHandler *io_flush)
+{
+    qemu_aio_set_fd_handler(event_notifier_get_fd(notifier),
+                            (IOHandler *)io_read, NULL,
+                            (AioFlushHandler *)io_flush, notifier);
+}
+#endif
diff --git a/qemu-aio.h b/qemu-aio.h
index 2ed6ad3723..f8a93d8fb9 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -15,6 +15,7 @@
 #define QEMU_AIO_H
 
 #include "qemu-common.h"
+#include "qemu-queue.h"
 #include "event_notifier.h"
 
 typedef struct BlockDriverAIOCB BlockDriverAIOCB;
@@ -43,6 +44,15 @@ typedef void QEMUBHFunc(void *opaque);
 typedef void IOHandler(void *opaque);
 
 typedef struct AioContext {
+    /* The list of registered AIO handlers */
+    QLIST_HEAD(, AioHandler) aio_handlers;
+
+    /* This is a simple lock used to protect the aio_handlers list.
+     * Specifically, it's used to ensure that no callbacks are removed while
+     * we're walking and dispatching callbacks.
+     */
+    int walking_handlers;
+
     /* Anchor of the list of Bottom Halves belonging to the context */
     struct QEMUBH *first_bh;
 
@@ -121,7 +131,7 @@ void qemu_bh_delete(QEMUBH *bh);
 
 /* Flush any pending AIO operation. This function will block until all
  * outstanding AIO operations have been completed or cancelled. */
-void qemu_aio_flush(void);
+void aio_flush(AioContext *ctx);
 
 /* Wait for a single AIO completion to occur.  This function will wait
  * until a single AIO event has completed and it will ensure something
@@ -129,7 +139,7 @@ void qemu_aio_flush(void);
  * result of executing I/O completion or bh callbacks.
  *
  * Return whether there is still any pending AIO operation.  */
-bool qemu_aio_wait(void);
+bool aio_wait(AioContext *ctx);
 
 #ifdef CONFIG_POSIX
 /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
@@ -142,11 +152,12 @@ typedef int (AioFlushHandler)(void *opaque);
  * Code that invokes AIO completion functions should rely on this function
  * instead of qemu_set_fd_handler[2].
  */
-void qemu_aio_set_fd_handler(int fd,
-                             IOHandler *io_read,
-                             IOHandler *io_write,
-                             AioFlushHandler *io_flush,
-                             void *opaque);
+void aio_set_fd_handler(AioContext *ctx,
+                        int fd,
+                        IOHandler *io_read,
+                        IOHandler *io_write,
+                        AioFlushHandler *io_flush,
+                        void *opaque);
 #endif
 
 /* Register an event notifier and associated callbacks.  Behaves very similarly
@@ -156,8 +167,25 @@ void qemu_aio_set_fd_handler(int fd,
  * Code that invokes AIO completion functions should rely on this function
  * instead of event_notifier_set_handler.
  */
+void aio_set_event_notifier(AioContext *ctx,
+                            EventNotifier *notifier,
+                            EventNotifierHandler *io_read,
+                            AioFlushEventNotifierHandler *io_flush);
+
+/* Functions to operate on the main QEMU AioContext.  */
+
+void qemu_aio_flush(void);
+bool qemu_aio_wait(void);
 void qemu_aio_set_event_notifier(EventNotifier *notifier,
                                  EventNotifierHandler *io_read,
                                  AioFlushEventNotifierHandler *io_flush);
 
+#ifdef CONFIG_POSIX
+void qemu_aio_set_fd_handler(int fd,
+                             IOHandler *io_read,
+                             IOHandler *io_write,
+                             AioFlushHandler *io_flush,
+                             void *opaque);
+#endif
+
 #endif
-- 
cgit v1.2.3


From 4231c88d27d9e46e6ad6e6b7bbb6e442bcf9cd05 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 26 Sep 2012 15:21:36 +0200
Subject: aio: test node->deleted before calling io_flush

Otherwise, there could be a case where io_flush accesses freed
memory because it should not have been called.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 aio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aio.c b/aio.c
index c89f1e95c1..734d2cfa0b 100644
--- a/aio.c
+++ b/aio.c
@@ -122,7 +122,7 @@ bool aio_wait(AioContext *ctx)
          * Otherwise, if there are no AIO requests, qemu_aio_wait() would
          * wait indefinitely.
          */
-        if (node->io_flush) {
+        if (!node->deleted && node->io_flush) {
             if (node->io_flush(node->opaque) == 0) {
                 continue;
             }
-- 
cgit v1.2.3


From 7c0628b20e7c56b7e04abb8b5f8d7da3f7cb87e8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 Sep 2012 14:37:53 +0200
Subject: aio: add non-blocking variant of aio_wait

This will be used when polling the GSource attached to an AioContext.

Reviewed-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 aio.c       | 20 +++++++++++++++-----
 async.c     |  2 +-
 main-loop.c |  2 +-
 qemu-aio.h  | 21 +++++++++++++++------
 4 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/aio.c b/aio.c
index 734d2cfa0b..1d5e0c62ce 100644
--- a/aio.c
+++ b/aio.c
@@ -93,13 +93,16 @@ void aio_set_event_notifier(AioContext *ctx,
                        (AioFlushHandler *)io_flush, notifier);
 }
 
-bool aio_wait(AioContext *ctx)
+bool aio_poll(AioContext *ctx, bool blocking)
 {
+    static struct timeval tv0;
     AioHandler *node;
     fd_set rdfds, wrfds;
     int max_fd = -1;
     int ret;
-    bool busy;
+    bool busy, progress;
+
+    progress = false;
 
     /*
      * If there are callbacks left that have been queued, we need to call then.
@@ -107,6 +110,11 @@ bool aio_wait(AioContext *ctx)
      * does not need a complete flush (as is the case for qemu_aio_wait loops).
      */
     if (aio_bh_poll(ctx)) {
+        blocking = false;
+        progress = true;
+    }
+
+    if (progress && !blocking) {
         return true;
     }
 
@@ -142,11 +150,11 @@ bool aio_wait(AioContext *ctx)
 
     /* No AIO operations?  Get us out of here */
     if (!busy) {
-        return false;
+        return progress;
     }
 
     /* wait until next event */
-    ret = select(max_fd, &rdfds, &wrfds, NULL, NULL);
+    ret = select(max_fd, &rdfds, &wrfds, NULL, blocking ? NULL : &tv0);
 
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
@@ -161,11 +169,13 @@ bool aio_wait(AioContext *ctx)
             if (!node->deleted &&
                 FD_ISSET(node->fd, &rdfds) &&
                 node->io_read) {
+                progress = true;
                 node->io_read(node->opaque);
             }
             if (!node->deleted &&
                 FD_ISSET(node->fd, &wrfds) &&
                 node->io_write) {
+                progress = true;
                 node->io_write(node->opaque);
             }
 
@@ -181,5 +191,5 @@ bool aio_wait(AioContext *ctx)
         }
     }
 
-    return true;
+    return progress;
 }
diff --git a/async.c b/async.c
index c99db79ac7..513bdd7aa2 100644
--- a/async.c
+++ b/async.c
@@ -144,5 +144,5 @@ AioContext *aio_context_new(void)
 
 void aio_flush(AioContext *ctx)
 {
-    while (aio_wait(ctx));
+    while (aio_poll(ctx, true));
 }
diff --git a/main-loop.c b/main-loop.c
index 8f0117e7aa..1fdc3bdf2e 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -534,7 +534,7 @@ void qemu_aio_flush(void)
 
 bool qemu_aio_wait(void)
 {
-    return aio_wait(qemu_aio_context);
+    return aio_poll(qemu_aio_context, true);
 }
 
 void qemu_aio_set_fd_handler(int fd,
diff --git a/qemu-aio.h b/qemu-aio.h
index f8a93d8fb9..f19201e7ca 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -133,13 +133,22 @@ void qemu_bh_delete(QEMUBH *bh);
  * outstanding AIO operations have been completed or cancelled. */
 void aio_flush(AioContext *ctx);
 
-/* Wait for a single AIO completion to occur.  This function will wait
- * until a single AIO event has completed and it will ensure something
- * has moved before returning. This can issue new pending aio as
- * result of executing I/O completion or bh callbacks.
+/* Progress in completing AIO work to occur.  This can issue new pending
+ * aio as a result of executing I/O completion or bh callbacks.
  *
- * Return whether there is still any pending AIO operation.  */
-bool aio_wait(AioContext *ctx);
+ * If there is no pending AIO operation or completion (bottom half),
+ * return false.  If there are pending bottom halves, return true.
+ *
+ * If there are no pending bottom halves, but there are pending AIO
+ * operations, it may not be possible to make any progress without
+ * blocking.  If @blocking is true, this function will wait until one
+ * or more AIO events have completed, to ensure something has moved
+ * before returning.
+ *
+ * If @blocking is false, this function will also return false if the
+ * function cannot make any progress without blocking.
+ */
+bool aio_poll(AioContext *ctx, bool blocking);
 
 #ifdef CONFIG_POSIX
 /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
-- 
cgit v1.2.3


From cd9ba1ebcf0439457f22b75b38533f6634f23c5f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 Sep 2012 14:57:22 +0200
Subject: aio: prepare for introducing GSource-based dispatch

This adds a GPollFD to each AioHandler.  It will then be possible to
attach these GPollFDs to a GSource, and from there to the main loop.
aio_wait examines the GPollFDs and avoids calling select() if any
is set (similar to what it does if bottom halves are available).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 aio.c      | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++---------
 qemu-aio.h |  7 +++++
 2 files changed, 87 insertions(+), 13 deletions(-)

diff --git a/aio.c b/aio.c
index 1d5e0c62ce..44247224e2 100644
--- a/aio.c
+++ b/aio.c
@@ -20,7 +20,7 @@
 
 struct AioHandler
 {
-    int fd;
+    GPollFD pfd;
     IOHandler *io_read;
     IOHandler *io_write;
     AioFlushHandler *io_flush;
@@ -34,7 +34,7 @@ static AioHandler *find_aio_handler(AioContext *ctx, int fd)
     AioHandler *node;
 
     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (node->fd == fd)
+        if (node->pfd.fd == fd)
             if (!node->deleted)
                 return node;
     }
@@ -57,9 +57,10 @@ void aio_set_fd_handler(AioContext *ctx,
     if (!io_read && !io_write) {
         if (node) {
             /* If the lock is held, just mark the node as deleted */
-            if (ctx->walking_handlers)
+            if (ctx->walking_handlers) {
                 node->deleted = 1;
-            else {
+                node->pfd.revents = 0;
+            } else {
                 /* Otherwise, delete it for real.  We can't just mark it as
                  * deleted because deleted nodes are only cleaned up after
                  * releasing the walking_handlers lock.
@@ -72,7 +73,7 @@ void aio_set_fd_handler(AioContext *ctx,
         if (node == NULL) {
             /* Alloc and insert if it's not already there */
             node = g_malloc0(sizeof(AioHandler));
-            node->fd = fd;
+            node->pfd.fd = fd;
             QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
         }
         /* Update handler with latest information */
@@ -80,6 +81,9 @@ void aio_set_fd_handler(AioContext *ctx,
         node->io_write = io_write;
         node->io_flush = io_flush;
         node->opaque = opaque;
+
+        node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP : 0);
+        node->pfd.events |= (io_write ? G_IO_OUT : 0);
     }
 }
 
@@ -93,6 +97,32 @@ void aio_set_event_notifier(AioContext *ctx,
                        (AioFlushHandler *)io_flush, notifier);
 }
 
+bool aio_pending(AioContext *ctx)
+{
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        int revents;
+
+        /*
+         * FIXME: right now we cannot get G_IO_HUP and G_IO_ERR because
+         * main-loop.c is still select based (due to the slirp legacy).
+         * If main-loop.c ever switches to poll, G_IO_ERR should be
+         * tested too.  Dispatching G_IO_ERR to both handlers should be
+         * okay, since handlers need to be ready for spurious wakeups.
+         */
+        revents = node->pfd.revents & node->pfd.events;
+        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
+            return true;
+        }
+        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 bool aio_poll(AioContext *ctx, bool blocking)
 {
     static struct timeval tv0;
@@ -114,6 +144,43 @@ bool aio_poll(AioContext *ctx, bool blocking)
         progress = true;
     }
 
+    /*
+     * Then dispatch any pending callbacks from the GSource.
+     *
+     * We have to walk very carefully in case qemu_aio_set_fd_handler is
+     * called while we're walking.
+     */
+    node = QLIST_FIRST(&ctx->aio_handlers);
+    while (node) {
+        AioHandler *tmp;
+        int revents;
+
+        ctx->walking_handlers++;
+
+        revents = node->pfd.revents & node->pfd.events;
+        node->pfd.revents = 0;
+
+        /* See comment in aio_pending.  */
+        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
+            node->io_read(node->opaque);
+            progress = true;
+        }
+        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
+            node->io_write(node->opaque);
+            progress = true;
+        }
+
+        tmp = node;
+        node = QLIST_NEXT(node, node);
+
+        ctx->walking_handlers--;
+
+        if (!ctx->walking_handlers && tmp->deleted) {
+            QLIST_REMOVE(tmp, node);
+            g_free(tmp);
+        }
+    }
+
     if (progress && !blocking) {
         return true;
     }
@@ -137,12 +204,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
             busy = true;
         }
         if (!node->deleted && node->io_read) {
-            FD_SET(node->fd, &rdfds);
-            max_fd = MAX(max_fd, node->fd + 1);
+            FD_SET(node->pfd.fd, &rdfds);
+            max_fd = MAX(max_fd, node->pfd.fd + 1);
         }
         if (!node->deleted && node->io_write) {
-            FD_SET(node->fd, &wrfds);
-            max_fd = MAX(max_fd, node->fd + 1);
+            FD_SET(node->pfd.fd, &wrfds);
+            max_fd = MAX(max_fd, node->pfd.fd + 1);
         }
     }
 
@@ -167,16 +234,16 @@ bool aio_poll(AioContext *ctx, bool blocking)
             ctx->walking_handlers++;
 
             if (!node->deleted &&
-                FD_ISSET(node->fd, &rdfds) &&
+                FD_ISSET(node->pfd.fd, &rdfds) &&
                 node->io_read) {
-                progress = true;
                 node->io_read(node->opaque);
+                progress = true;
             }
             if (!node->deleted &&
-                FD_ISSET(node->fd, &wrfds) &&
+                FD_ISSET(node->pfd.fd, &wrfds) &&
                 node->io_write) {
-                progress = true;
                 node->io_write(node->opaque);
+                progress = true;
             }
 
             tmp = node;
diff --git a/qemu-aio.h b/qemu-aio.h
index f19201e7ca..ac248962fe 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -133,6 +133,13 @@ void qemu_bh_delete(QEMUBH *bh);
  * outstanding AIO operations have been completed or cancelled. */
 void aio_flush(AioContext *ctx);
 
+/* Return whether there are any pending callbacks from the GSource
+ * attached to the AioContext.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_pending(AioContext *ctx);
+
 /* Progress in completing AIO work to occur.  This can issue new pending
  * aio as a result of executing I/O completion or bh callbacks.
  *
-- 
cgit v1.2.3


From f42b22077bc63a482d7a8755b54e33475ab78541 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 9 Jun 2012 04:01:51 +0200
Subject: aio: add Win32 implementation

The Win32 implementation will only accept EventNotifiers, thus a few
drivers are disabled under Windows.  EventNotifiers are a good match
for the GSource implementation, too, because the Win32 port of glib
allows to place their HANDLEs in a GPollFD.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Makefile.objs       |   6 +-
 aio-posix.c         | 262 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 aio-win32.c         | 209 +++++++++++++++++++++++++++++++++++++++++
 aio.c               | 262 ----------------------------------------------------
 block/Makefile.objs |   6 +-
 main-loop.c         |   2 +-
 6 files changed, 480 insertions(+), 267 deletions(-)
 create mode 100644 aio-posix.c
 create mode 100644 aio-win32.c
 delete mode 100644 aio.c

diff --git a/Makefile.objs b/Makefile.objs
index 98046fc64a..a8ade04c02 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -42,12 +42,12 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 # block-obj-y is code used by both qemu system emulation and qemu-img
 
 block-obj-y = iov.o cache-utils.o qemu-option.o module.o async.o
-block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o
+block-obj-y += nbd.o block.o blockjob.o aes.o qemu-config.o
 block-obj-y += qemu-progress.o qemu-sockets.o uri.o notify.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
-block-obj-$(CONFIG_POSIX) += event_notifier-posix.o
-block-obj-$(CONFIG_WIN32) += event_notifier-win32.o
+block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o
+block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 block-obj-y += block/
 block-obj-y += $(qapi-obj-y) qapi-types.o qapi-visit.o
diff --git a/aio-posix.c b/aio-posix.c
new file mode 100644
index 0000000000..44247224e2
--- /dev/null
+++ b/aio-posix.c
@@ -0,0 +1,262 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "block.h"
+#include "qemu-queue.h"
+#include "qemu_socket.h"
+
+struct AioHandler
+{
+    GPollFD pfd;
+    IOHandler *io_read;
+    IOHandler *io_write;
+    AioFlushHandler *io_flush;
+    int deleted;
+    void *opaque;
+    QLIST_ENTRY(AioHandler) node;
+};
+
+static AioHandler *find_aio_handler(AioContext *ctx, int fd)
+{
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->pfd.fd == fd)
+            if (!node->deleted)
+                return node;
+    }
+
+    return NULL;
+}
+
+void aio_set_fd_handler(AioContext *ctx,
+                        int fd,
+                        IOHandler *io_read,
+                        IOHandler *io_write,
+                        AioFlushHandler *io_flush,
+                        void *opaque)
+{
+    AioHandler *node;
+
+    node = find_aio_handler(ctx, fd);
+
+    /* Are we deleting the fd handler? */
+    if (!io_read && !io_write) {
+        if (node) {
+            /* If the lock is held, just mark the node as deleted */
+            if (ctx->walking_handlers) {
+                node->deleted = 1;
+                node->pfd.revents = 0;
+            } else {
+                /* Otherwise, delete it for real.  We can't just mark it as
+                 * deleted because deleted nodes are only cleaned up after
+                 * releasing the walking_handlers lock.
+                 */
+                QLIST_REMOVE(node, node);
+                g_free(node);
+            }
+        }
+    } else {
+        if (node == NULL) {
+            /* Alloc and insert if it's not already there */
+            node = g_malloc0(sizeof(AioHandler));
+            node->pfd.fd = fd;
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+        }
+        /* Update handler with latest information */
+        node->io_read = io_read;
+        node->io_write = io_write;
+        node->io_flush = io_flush;
+        node->opaque = opaque;
+
+        node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP : 0);
+        node->pfd.events |= (io_write ? G_IO_OUT : 0);
+    }
+}
+
+void aio_set_event_notifier(AioContext *ctx,
+                            EventNotifier *notifier,
+                            EventNotifierHandler *io_read,
+                            AioFlushEventNotifierHandler *io_flush)
+{
+    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
+                       (IOHandler *)io_read, NULL,
+                       (AioFlushHandler *)io_flush, notifier);
+}
+
+bool aio_pending(AioContext *ctx)
+{
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        int revents;
+
+        /*
+         * FIXME: right now we cannot get G_IO_HUP and G_IO_ERR because
+         * main-loop.c is still select based (due to the slirp legacy).
+         * If main-loop.c ever switches to poll, G_IO_ERR should be
+         * tested too.  Dispatching G_IO_ERR to both handlers should be
+         * okay, since handlers need to be ready for spurious wakeups.
+         */
+        revents = node->pfd.revents & node->pfd.events;
+        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
+            return true;
+        }
+        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool aio_poll(AioContext *ctx, bool blocking)
+{
+    static struct timeval tv0;
+    AioHandler *node;
+    fd_set rdfds, wrfds;
+    int max_fd = -1;
+    int ret;
+    bool busy, progress;
+
+    progress = false;
+
+    /*
+     * If there are callbacks left that have been queued, we need to call then.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        blocking = false;
+        progress = true;
+    }
+
+    /*
+     * Then dispatch any pending callbacks from the GSource.
+     *
+     * We have to walk very carefully in case qemu_aio_set_fd_handler is
+     * called while we're walking.
+     */
+    node = QLIST_FIRST(&ctx->aio_handlers);
+    while (node) {
+        AioHandler *tmp;
+        int revents;
+
+        ctx->walking_handlers++;
+
+        revents = node->pfd.revents & node->pfd.events;
+        node->pfd.revents = 0;
+
+        /* See comment in aio_pending.  */
+        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
+            node->io_read(node->opaque);
+            progress = true;
+        }
+        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
+            node->io_write(node->opaque);
+            progress = true;
+        }
+
+        tmp = node;
+        node = QLIST_NEXT(node, node);
+
+        ctx->walking_handlers--;
+
+        if (!ctx->walking_handlers && tmp->deleted) {
+            QLIST_REMOVE(tmp, node);
+            g_free(tmp);
+        }
+    }
+
+    if (progress && !blocking) {
+        return true;
+    }
+
+    ctx->walking_handlers++;
+
+    FD_ZERO(&rdfds);
+    FD_ZERO(&wrfds);
+
+    /* fill fd sets */
+    busy = false;
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        /* If there aren't pending AIO operations, don't invoke callbacks.
+         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+         * wait indefinitely.
+         */
+        if (!node->deleted && node->io_flush) {
+            if (node->io_flush(node->opaque) == 0) {
+                continue;
+            }
+            busy = true;
+        }
+        if (!node->deleted && node->io_read) {
+            FD_SET(node->pfd.fd, &rdfds);
+            max_fd = MAX(max_fd, node->pfd.fd + 1);
+        }
+        if (!node->deleted && node->io_write) {
+            FD_SET(node->pfd.fd, &wrfds);
+            max_fd = MAX(max_fd, node->pfd.fd + 1);
+        }
+    }
+
+    ctx->walking_handlers--;
+
+    /* No AIO operations?  Get us out of here */
+    if (!busy) {
+        return progress;
+    }
+
+    /* wait until next event */
+    ret = select(max_fd, &rdfds, &wrfds, NULL, blocking ? NULL : &tv0);
+
+    /* if we have any readable fds, dispatch event */
+    if (ret > 0) {
+        /* we have to walk very carefully in case
+         * qemu_aio_set_fd_handler is called while we're walking */
+        node = QLIST_FIRST(&ctx->aio_handlers);
+        while (node) {
+            AioHandler *tmp;
+
+            ctx->walking_handlers++;
+
+            if (!node->deleted &&
+                FD_ISSET(node->pfd.fd, &rdfds) &&
+                node->io_read) {
+                node->io_read(node->opaque);
+                progress = true;
+            }
+            if (!node->deleted &&
+                FD_ISSET(node->pfd.fd, &wrfds) &&
+                node->io_write) {
+                node->io_write(node->opaque);
+                progress = true;
+            }
+
+            tmp = node;
+            node = QLIST_NEXT(node, node);
+
+            ctx->walking_handlers--;
+
+            if (!ctx->walking_handlers && tmp->deleted) {
+                QLIST_REMOVE(tmp, node);
+                g_free(tmp);
+            }
+        }
+    }
+
+    return progress;
+}
diff --git a/aio-win32.c b/aio-win32.c
new file mode 100644
index 0000000000..9881fdbca7
--- /dev/null
+++ b/aio-win32.c
@@ -0,0 +1,209 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM Corp., 2008
+ * Copyright Red Hat Inc., 2012
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Paolo Bonzini     <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "block.h"
+#include "qemu-queue.h"
+#include "qemu_socket.h"
+
+struct AioHandler {
+    EventNotifier *e;
+    EventNotifierHandler *io_notify;
+    AioFlushEventNotifierHandler *io_flush;
+    GPollFD pfd;
+    int deleted;
+    QLIST_ENTRY(AioHandler) node;
+};
+
+void aio_set_event_notifier(AioContext *ctx,
+                            EventNotifier *e,
+                            EventNotifierHandler *io_notify,
+                            AioFlushEventNotifierHandler *io_flush)
+{
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->e == e && !node->deleted) {
+            break;
+        }
+    }
+
+    /* Are we deleting the fd handler? */
+    if (!io_notify) {
+        if (node) {
+            /* If the lock is held, just mark the node as deleted */
+            if (ctx->walking_handlers) {
+                node->deleted = 1;
+                node->pfd.revents = 0;
+            } else {
+                /* Otherwise, delete it for real.  We can't just mark it as
+                 * deleted because deleted nodes are only cleaned up after
+                 * releasing the walking_handlers lock.
+                 */
+                QLIST_REMOVE(node, node);
+                g_free(node);
+            }
+        }
+    } else {
+        if (node == NULL) {
+            /* Alloc and insert if it's not already there */
+            node = g_malloc0(sizeof(AioHandler));
+            node->e = e;
+            node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
+            node->pfd.events = G_IO_IN;
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+        }
+        /* Update handler with latest information */
+        node->io_notify = io_notify;
+        node->io_flush = io_flush;
+    }
+}
+
+bool aio_pending(AioContext *ctx)
+{
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->pfd.revents && node->io_notify) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool aio_poll(AioContext *ctx, bool blocking)
+{
+    AioHandler *node;
+    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
+    bool busy, progress;
+    int count;
+
+    progress = false;
+
+    /*
+     * If there are callbacks left that have been queued, we need to call then.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        blocking = false;
+        progress = true;
+    }
+
+    /*
+     * Then dispatch any pending callbacks from the GSource.
+     *
+     * We have to walk very carefully in case qemu_aio_set_fd_handler is
+     * called while we're walking.
+     */
+    node = QLIST_FIRST(&ctx->aio_handlers);
+    while (node) {
+        AioHandler *tmp;
+
+        ctx->walking_handlers++;
+
+        if (node->pfd.revents && node->io_notify) {
+            node->pfd.revents = 0;
+            node->io_notify(node->e);
+            progress = true;
+        }
+
+        tmp = node;
+        node = QLIST_NEXT(node, node);
+
+        ctx->walking_handlers--;
+
+        if (!ctx->walking_handlers && tmp->deleted) {
+            QLIST_REMOVE(tmp, node);
+            g_free(tmp);
+        }
+    }
+
+    if (progress && !blocking) {
+        return true;
+    }
+
+    ctx->walking_handlers++;
+
+    /* fill fd sets */
+    busy = false;
+    count = 0;
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        /* If there aren't pending AIO operations, don't invoke callbacks.
+         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+         * wait indefinitely.
+         */
+        if (!node->deleted && node->io_flush) {
+            if (node->io_flush(node->e) == 0) {
+                continue;
+            }
+            busy = true;
+        }
+        if (!node->deleted && node->io_notify) {
+            events[count++] = event_notifier_get_handle(node->e);
+        }
+    }
+
+    ctx->walking_handlers--;
+
+    /* No AIO operations?  Get us out of here */
+    if (!busy) {
+        return progress;
+    }
+
+    /* wait until next event */
+    for (;;) {
+        int timeout = blocking ? INFINITE : 0;
+        int ret = WaitForMultipleObjects(count, events, FALSE, timeout);
+
+        /* if we have any signaled events, dispatch event */
+        if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
+            break;
+        }
+
+        blocking = false;
+
+        /* we have to walk very carefully in case
+         * qemu_aio_set_fd_handler is called while we're walking */
+        node = QLIST_FIRST(&ctx->aio_handlers);
+        while (node) {
+            AioHandler *tmp;
+
+            ctx->walking_handlers++;
+
+            if (!node->deleted &&
+                event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] &&
+                node->io_notify) {
+                node->io_notify(node->e);
+                progress = true;
+            }
+
+            tmp = node;
+            node = QLIST_NEXT(node, node);
+
+            ctx->walking_handlers--;
+
+            if (!ctx->walking_handlers && tmp->deleted) {
+                QLIST_REMOVE(tmp, node);
+                g_free(tmp);
+            }
+        }
+    }
+
+    return progress;
+}
diff --git a/aio.c b/aio.c
deleted file mode 100644
index 44247224e2..0000000000
--- a/aio.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * QEMU aio implementation
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu-common.h"
-#include "block.h"
-#include "qemu-queue.h"
-#include "qemu_socket.h"
-
-struct AioHandler
-{
-    GPollFD pfd;
-    IOHandler *io_read;
-    IOHandler *io_write;
-    AioFlushHandler *io_flush;
-    int deleted;
-    void *opaque;
-    QLIST_ENTRY(AioHandler) node;
-};
-
-static AioHandler *find_aio_handler(AioContext *ctx, int fd)
-{
-    AioHandler *node;
-
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (node->pfd.fd == fd)
-            if (!node->deleted)
-                return node;
-    }
-
-    return NULL;
-}
-
-void aio_set_fd_handler(AioContext *ctx,
-                        int fd,
-                        IOHandler *io_read,
-                        IOHandler *io_write,
-                        AioFlushHandler *io_flush,
-                        void *opaque)
-{
-    AioHandler *node;
-
-    node = find_aio_handler(ctx, fd);
-
-    /* Are we deleting the fd handler? */
-    if (!io_read && !io_write) {
-        if (node) {
-            /* If the lock is held, just mark the node as deleted */
-            if (ctx->walking_handlers) {
-                node->deleted = 1;
-                node->pfd.revents = 0;
-            } else {
-                /* Otherwise, delete it for real.  We can't just mark it as
-                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the walking_handlers lock.
-                 */
-                QLIST_REMOVE(node, node);
-                g_free(node);
-            }
-        }
-    } else {
-        if (node == NULL) {
-            /* Alloc and insert if it's not already there */
-            node = g_malloc0(sizeof(AioHandler));
-            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
-        }
-        /* Update handler with latest information */
-        node->io_read = io_read;
-        node->io_write = io_write;
-        node->io_flush = io_flush;
-        node->opaque = opaque;
-
-        node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP : 0);
-        node->pfd.events |= (io_write ? G_IO_OUT : 0);
-    }
-}
-
-void aio_set_event_notifier(AioContext *ctx,
-                            EventNotifier *notifier,
-                            EventNotifierHandler *io_read,
-                            AioFlushEventNotifierHandler *io_flush)
-{
-    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-                       (IOHandler *)io_read, NULL,
-                       (AioFlushHandler *)io_flush, notifier);
-}
-
-bool aio_pending(AioContext *ctx)
-{
-    AioHandler *node;
-
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        int revents;
-
-        /*
-         * FIXME: right now we cannot get G_IO_HUP and G_IO_ERR because
-         * main-loop.c is still select based (due to the slirp legacy).
-         * If main-loop.c ever switches to poll, G_IO_ERR should be
-         * tested too.  Dispatching G_IO_ERR to both handlers should be
-         * okay, since handlers need to be ready for spurious wakeups.
-         */
-        revents = node->pfd.revents & node->pfd.events;
-        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
-            return true;
-        }
-        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-bool aio_poll(AioContext *ctx, bool blocking)
-{
-    static struct timeval tv0;
-    AioHandler *node;
-    fd_set rdfds, wrfds;
-    int max_fd = -1;
-    int ret;
-    bool busy, progress;
-
-    progress = false;
-
-    /*
-     * If there are callbacks left that have been queued, we need to call then.
-     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for qemu_aio_wait loops).
-     */
-    if (aio_bh_poll(ctx)) {
-        blocking = false;
-        progress = true;
-    }
-
-    /*
-     * Then dispatch any pending callbacks from the GSource.
-     *
-     * We have to walk very carefully in case qemu_aio_set_fd_handler is
-     * called while we're walking.
-     */
-    node = QLIST_FIRST(&ctx->aio_handlers);
-    while (node) {
-        AioHandler *tmp;
-        int revents;
-
-        ctx->walking_handlers++;
-
-        revents = node->pfd.revents & node->pfd.events;
-        node->pfd.revents = 0;
-
-        /* See comment in aio_pending.  */
-        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
-            node->io_read(node->opaque);
-            progress = true;
-        }
-        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
-            node->io_write(node->opaque);
-            progress = true;
-        }
-
-        tmp = node;
-        node = QLIST_NEXT(node, node);
-
-        ctx->walking_handlers--;
-
-        if (!ctx->walking_handlers && tmp->deleted) {
-            QLIST_REMOVE(tmp, node);
-            g_free(tmp);
-        }
-    }
-
-    if (progress && !blocking) {
-        return true;
-    }
-
-    ctx->walking_handlers++;
-
-    FD_ZERO(&rdfds);
-    FD_ZERO(&wrfds);
-
-    /* fill fd sets */
-    busy = false;
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        /* If there aren't pending AIO operations, don't invoke callbacks.
-         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
-         * wait indefinitely.
-         */
-        if (!node->deleted && node->io_flush) {
-            if (node->io_flush(node->opaque) == 0) {
-                continue;
-            }
-            busy = true;
-        }
-        if (!node->deleted && node->io_read) {
-            FD_SET(node->pfd.fd, &rdfds);
-            max_fd = MAX(max_fd, node->pfd.fd + 1);
-        }
-        if (!node->deleted && node->io_write) {
-            FD_SET(node->pfd.fd, &wrfds);
-            max_fd = MAX(max_fd, node->pfd.fd + 1);
-        }
-    }
-
-    ctx->walking_handlers--;
-
-    /* No AIO operations?  Get us out of here */
-    if (!busy) {
-        return progress;
-    }
-
-    /* wait until next event */
-    ret = select(max_fd, &rdfds, &wrfds, NULL, blocking ? NULL : &tv0);
-
-    /* if we have any readable fds, dispatch event */
-    if (ret > 0) {
-        /* we have to walk very carefully in case
-         * qemu_aio_set_fd_handler is called while we're walking */
-        node = QLIST_FIRST(&ctx->aio_handlers);
-        while (node) {
-            AioHandler *tmp;
-
-            ctx->walking_handlers++;
-
-            if (!node->deleted &&
-                FD_ISSET(node->pfd.fd, &rdfds) &&
-                node->io_read) {
-                node->io_read(node->opaque);
-                progress = true;
-            }
-            if (!node->deleted &&
-                FD_ISSET(node->pfd.fd, &wrfds) &&
-                node->io_write) {
-                node->io_write(node->opaque);
-                progress = true;
-            }
-
-            tmp = node;
-            node = QLIST_NEXT(node, node);
-
-            ctx->walking_handlers--;
-
-            if (!ctx->walking_handlers && tmp->deleted) {
-                QLIST_REMOVE(tmp, node);
-                g_free(tmp);
-            }
-        }
-    }
-
-    return progress;
-}
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 554f429d05..684765bf63 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -2,13 +2,17 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
-block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-obj-y += parallels.o blkdebug.o blkverify.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
+
+ifeq ($(CONFIG_POSIX),y)
+block-obj-y += nbd.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+endif
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/main-loop.c b/main-loop.c
index 1fdc3bdf2e..a86c275149 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -537,6 +537,7 @@ bool qemu_aio_wait(void)
     return aio_poll(qemu_aio_context, true);
 }
 
+#ifdef CONFIG_POSIX
 void qemu_aio_set_fd_handler(int fd,
                              IOHandler *io_read,
                              IOHandler *io_write,
@@ -549,7 +550,6 @@ void qemu_aio_set_fd_handler(int fd,
     qemu_set_fd_handler2(fd, NULL, io_read, io_write, opaque);
 }
 
-#ifdef CONFIG_POSIX
 void qemu_aio_set_event_notifier(EventNotifier *notifier,
                                  EventNotifierHandler *io_read,
                                  AioFlushEventNotifierHandler *io_flush)
-- 
cgit v1.2.3


From e3713e001fb7d4d82f6de82800c1463e758e4289 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 Sep 2012 14:57:41 +0200
Subject: aio: make AioContexts GSources

This lets AioContexts be used (optionally) with a glib main loop.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 aio-posix.c |  4 ++++
 aio-win32.c |  4 ++++
 async.c     | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 qemu-aio.h  | 23 ++++++++++++++++++++++
 4 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/aio-posix.c b/aio-posix.c
index 44247224e2..65b26073f0 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -56,6 +56,8 @@ void aio_set_fd_handler(AioContext *ctx,
     /* Are we deleting the fd handler? */
     if (!io_read && !io_write) {
         if (node) {
+            g_source_remove_poll(&ctx->source, &node->pfd);
+
             /* If the lock is held, just mark the node as deleted */
             if (ctx->walking_handlers) {
                 node->deleted = 1;
@@ -75,6 +77,8 @@ void aio_set_fd_handler(AioContext *ctx,
             node = g_malloc0(sizeof(AioHandler));
             node->pfd.fd = fd;
             QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+
+            g_source_add_poll(&ctx->source, &node->pfd);
         }
         /* Update handler with latest information */
         node->io_read = io_read;
diff --git a/aio-win32.c b/aio-win32.c
index 9881fdbca7..e460bd848a 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -45,6 +45,8 @@ void aio_set_event_notifier(AioContext *ctx,
     /* Are we deleting the fd handler? */
     if (!io_notify) {
         if (node) {
+            g_source_remove_poll(&ctx->source, &node->pfd);
+
             /* If the lock is held, just mark the node as deleted */
             if (ctx->walking_handlers) {
                 node->deleted = 1;
@@ -66,6 +68,8 @@ void aio_set_event_notifier(AioContext *ctx,
             node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
             node->pfd.events = G_IO_IN;
             QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+
+            g_source_add_poll(&ctx->source, &node->pfd);
         }
         /* Update handler with latest information */
         node->io_notify = io_notify;
diff --git a/async.c b/async.c
index 513bdd7aa2..4ffdd986f1 100644
--- a/async.c
+++ b/async.c
@@ -136,10 +136,73 @@ void aio_bh_update_timeout(AioContext *ctx, uint32_t *timeout)
     }
 }
 
+static gboolean
+aio_ctx_prepare(GSource *source, gint    *timeout)
+{
+    AioContext *ctx = (AioContext *) source;
+    uint32_t wait = -1;
+    aio_bh_update_timeout(ctx, &wait);
+
+    if (wait != -1) {
+        *timeout = MIN(*timeout, wait);
+        return wait == 0;
+    }
+
+    return false;
+}
+
+static gboolean
+aio_ctx_check(GSource *source)
+{
+    AioContext *ctx = (AioContext *) source;
+    QEMUBH *bh;
+
+    for (bh = ctx->first_bh; bh; bh = bh->next) {
+        if (!bh->deleted && bh->scheduled) {
+            return true;
+	}
+    }
+    return aio_pending(ctx);
+}
+
+static gboolean
+aio_ctx_dispatch(GSource     *source,
+                 GSourceFunc  callback,
+                 gpointer     user_data)
+{
+    AioContext *ctx = (AioContext *) source;
+
+    assert(callback == NULL);
+    aio_poll(ctx, false);
+    return true;
+}
+
+static GSourceFuncs aio_source_funcs = {
+    aio_ctx_prepare,
+    aio_ctx_check,
+    aio_ctx_dispatch,
+    NULL
+};
+
+GSource *aio_get_g_source(AioContext *ctx)
+{
+    g_source_ref(&ctx->source);
+    return &ctx->source;
+}
 
 AioContext *aio_context_new(void)
 {
-    return g_new0(AioContext, 1);
+    return (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
+}
+
+void aio_context_ref(AioContext *ctx)
+{
+    g_source_ref(&ctx->source);
+}
+
+void aio_context_unref(AioContext *ctx)
+{
+    g_source_unref(&ctx->source);
 }
 
 void aio_flush(AioContext *ctx)
diff --git a/qemu-aio.h b/qemu-aio.h
index ac248962fe..aedf66cfa1 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -44,6 +44,8 @@ typedef void QEMUBHFunc(void *opaque);
 typedef void IOHandler(void *opaque);
 
 typedef struct AioContext {
+    GSource source;
+
     /* The list of registered AIO handlers */
     QLIST_HEAD(, AioHandler) aio_handlers;
 
@@ -74,6 +76,22 @@ typedef int (AioFlushEventNotifierHandler)(EventNotifier *e);
  */
 AioContext *aio_context_new(void);
 
+/**
+ * aio_context_ref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Add a reference to an AioContext.
+ */
+void aio_context_ref(AioContext *ctx);
+
+/**
+ * aio_context_unref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Drop a reference to an AioContext.
+ */
+void aio_context_unref(AioContext *ctx);
+
 /**
  * aio_bh_new: Allocate a new bottom half structure.
  *
@@ -188,6 +206,11 @@ void aio_set_event_notifier(AioContext *ctx,
                             EventNotifierHandler *io_read,
                             AioFlushEventNotifierHandler *io_flush);
 
+/* Return a GSource that lets the main loop poll the file descriptors attached
+ * to this AioContext.
+ */
+GSource *aio_get_g_source(AioContext *ctx);
+
 /* Functions to operate on the main QEMU AioContext.  */
 
 void qemu_aio_flush(void);
-- 
cgit v1.2.3


From 2f4dc3c1b2a453a8255d9b97c7cb87860123e495 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 Sep 2012 18:44:14 +0200
Subject: aio: add aio_notify

With this change async.c does not rely anymore on any service from
main-loop.c, i.e. it is completely self-contained.

Reviewed-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 async.c    | 30 ++++++++++++++++++++++++++----
 qemu-aio.h | 18 ++++++++++++++++++
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/async.c b/async.c
index 4ffdd986f1..564526f57f 100644
--- a/async.c
+++ b/async.c
@@ -30,6 +30,7 @@
 /* bottom halves (can be seen as timers which expire ASAP) */
 
 struct QEMUBH {
+    AioContext *ctx;
     QEMUBHFunc *cb;
     void *opaque;
     QEMUBH *next;
@@ -42,6 +43,7 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
 {
     QEMUBH *bh;
     bh = g_malloc0(sizeof(QEMUBH));
+    bh->ctx = ctx;
     bh->cb = cb;
     bh->opaque = opaque;
     bh->next = ctx->first_bh;
@@ -101,8 +103,7 @@ void qemu_bh_schedule(QEMUBH *bh)
         return;
     bh->scheduled = 1;
     bh->idle = 0;
-    /* stop the currently executing CPU to execute the BH ASAP */
-    qemu_notify_event();
+    aio_notify(bh->ctx);
 }
 
 void qemu_bh_cancel(QEMUBH *bh)
@@ -177,11 +178,20 @@ aio_ctx_dispatch(GSource     *source,
     return true;
 }
 
+static void
+aio_ctx_finalize(GSource     *source)
+{
+    AioContext *ctx = (AioContext *) source;
+
+    aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
+    event_notifier_cleanup(&ctx->notifier);
+}
+
 static GSourceFuncs aio_source_funcs = {
     aio_ctx_prepare,
     aio_ctx_check,
     aio_ctx_dispatch,
-    NULL
+    aio_ctx_finalize
 };
 
 GSource *aio_get_g_source(AioContext *ctx)
@@ -190,9 +200,21 @@ GSource *aio_get_g_source(AioContext *ctx)
     return &ctx->source;
 }
 
+void aio_notify(AioContext *ctx)
+{
+    event_notifier_set(&ctx->notifier);
+}
+
 AioContext *aio_context_new(void)
 {
-    return (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
+    AioContext *ctx;
+    ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
+    event_notifier_init(&ctx->notifier, false);
+    aio_set_event_notifier(ctx, &ctx->notifier, 
+                           (EventNotifierHandler *)
+                           event_notifier_test_and_clear, NULL);
+
+    return ctx;
 }
 
 void aio_context_ref(AioContext *ctx)
diff --git a/qemu-aio.h b/qemu-aio.h
index aedf66cfa1..2354617ed1 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -62,6 +62,9 @@ typedef struct AioContext {
      * no callbacks are removed while we're walking and dispatching callbacks.
      */
     int walking_bh;
+
+    /* Used for aio_notify.  */
+    EventNotifier notifier;
 } AioContext;
 
 /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
@@ -101,6 +104,21 @@ void aio_context_unref(AioContext *ctx);
  */
 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
 
+/**
+ * aio_notify: Force processing of pending events.
+ *
+ * Similar to signaling a condition variable, aio_notify forces
+ * aio_wait to exit, so that the next call will re-examine pending events.
+ * The caller of aio_notify will usually call aio_wait again very soon,
+ * or go through another iteration of the GLib main loop.  Hence, aio_notify
+ * also has the side effect of recalculating the sets of file descriptors
+ * that the main loop waits for.
+ *
+ * Calling aio_notify is rarely necessary, because for example scheduling
+ * a bottom half calls it already.
+ */
+void aio_notify(AioContext *ctx);
+
 /**
  * aio_bh_poll: Poll bottom halves for an AioContext.
  *
-- 
cgit v1.2.3


From 7ed2b24ce17f8fb7e36e4e8d113f2a30cbea142f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Sep 2012 10:22:39 +0200
Subject: aio: call aio_notify after setting I/O handlers

In the current code, this is done by qemu_set_fd_handler2, which is
called by qemu_aio_set_fd_handler.  We need to keep the same behavior
even after removing the call to qemu_set_fd_handler2.

Reviewed-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 aio-posix.c | 2 ++
 aio-win32.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/aio-posix.c b/aio-posix.c
index 65b26073f0..05cc84e121 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -89,6 +89,8 @@ void aio_set_fd_handler(AioContext *ctx,
         node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP : 0);
         node->pfd.events |= (io_write ? G_IO_OUT : 0);
     }
+
+    aio_notify(ctx);
 }
 
 void aio_set_event_notifier(AioContext *ctx,
diff --git a/aio-win32.c b/aio-win32.c
index e460bd848a..a84eb71246 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -75,6 +75,8 @@ void aio_set_event_notifier(AioContext *ctx,
         node->io_notify = io_notify;
         node->io_flush = io_flush;
     }
+
+    aio_notify(ctx);
 }
 
 bool aio_pending(AioContext *ctx)
-- 
cgit v1.2.3


From 82cbbdc6a0958b49c77639a60906e30d02e6bb7b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 Sep 2012 15:07:08 +0200
Subject: main-loop: use GSource to poll AIO file descriptors

This lets us remove the hooks for the main loop in async.c.

Reviewed-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 main-loop.c | 23 ++++++-----------------
 main-loop.h |  1 -
 2 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/main-loop.c b/main-loop.c
index a86c275149..365c9d3261 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -205,6 +205,7 @@ static AioContext *qemu_aio_context;
 int qemu_init_main_loop(void)
 {
     int ret;
+    GSource *src;
 
     init_clocks();
     init_timer_alarm();
@@ -222,6 +223,9 @@ int qemu_init_main_loop(void)
     }
 
     qemu_aio_context = aio_context_new();
+    src = aio_get_g_source(qemu_aio_context);
+    g_source_attach(src, NULL);
+    g_source_unref(src);
     return 0;
 }
 
@@ -484,8 +488,6 @@ int main_loop_wait(int nonblocking)
 
     if (nonblocking) {
         timeout = 0;
-    } else {
-        aio_bh_update_timeout(qemu_aio_context, &timeout);
     }
 
     /* poll any events */
@@ -508,10 +510,6 @@ int main_loop_wait(int nonblocking)
 
     qemu_run_all_timers();
 
-    /* Check bottom-halves last in case any of the earlier events triggered
-       them.  */
-    qemu_bh_poll();
-
     return ret;
 }
 
@@ -522,11 +520,6 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
     return aio_bh_new(qemu_aio_context, cb, opaque);
 }
 
-int qemu_bh_poll(void)
-{
-    return aio_bh_poll(qemu_aio_context);
-}
-
 void qemu_aio_flush(void)
 {
     aio_flush(qemu_aio_context);
@@ -546,16 +539,12 @@ void qemu_aio_set_fd_handler(int fd,
 {
     aio_set_fd_handler(qemu_aio_context, fd, io_read, io_write, io_flush,
                        opaque);
-
-    qemu_set_fd_handler2(fd, NULL, io_read, io_write, opaque);
 }
+#endif
 
 void qemu_aio_set_event_notifier(EventNotifier *notifier,
                                  EventNotifierHandler *io_read,
                                  AioFlushEventNotifierHandler *io_flush)
 {
-    qemu_aio_set_fd_handler(event_notifier_get_fd(notifier),
-                            (IOHandler *)io_read, NULL,
-                            (AioFlushHandler *)io_flush, notifier);
+    aio_set_event_notifier(qemu_aio_context, notifier, io_read, io_flush);
 }
-#endif
diff --git a/main-loop.h b/main-loop.h
index 1d1a56b858..326c74269c 100644
--- a/main-loop.h
+++ b/main-loop.h
@@ -302,6 +302,5 @@ void qemu_iohandler_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds, int rc
 
 QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
 void qemu_bh_schedule_idle(QEMUBH *bh);
-int qemu_bh_poll(void);
 
 #endif
-- 
cgit v1.2.3


From 4c8d0d27676778febad3802a95218d5ceaca171e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 May 2010 17:27:14 +0200
Subject: main-loop: use aio_notify for qemu_notify_event

Reviewed-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 main-loop.c | 106 +++++-------------------------------------------------------
 1 file changed, 8 insertions(+), 98 deletions(-)

diff --git a/main-loop.c b/main-loop.c
index 365c9d3261..e43c7c8e8d 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -32,70 +32,6 @@
 
 #include "compatfd.h"
 
-static int io_thread_fd = -1;
-
-void qemu_notify_event(void)
-{
-    /* Write 8 bytes to be compatible with eventfd.  */
-    static const uint64_t val = 1;
-    ssize_t ret;
-
-    if (io_thread_fd == -1) {
-        return;
-    }
-    do {
-        ret = write(io_thread_fd, &val, sizeof(val));
-    } while (ret < 0 && errno == EINTR);
-
-    /* EAGAIN is fine, a read must be pending.  */
-    if (ret < 0 && errno != EAGAIN) {
-        fprintf(stderr, "qemu_notify_event: write() failed: %s\n",
-                strerror(errno));
-        exit(1);
-    }
-}
-
-static void qemu_event_read(void *opaque)
-{
-    int fd = (intptr_t)opaque;
-    ssize_t len;
-    char buffer[512];
-
-    /* Drain the notify pipe.  For eventfd, only 8 bytes will be read.  */
-    do {
-        len = read(fd, buffer, sizeof(buffer));
-    } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
-}
-
-static int qemu_event_init(void)
-{
-    int err;
-    int fds[2];
-
-    err = qemu_eventfd(fds);
-    if (err == -1) {
-        return -errno;
-    }
-    err = fcntl_setfl(fds[0], O_NONBLOCK);
-    if (err < 0) {
-        goto fail;
-    }
-    err = fcntl_setfl(fds[1], O_NONBLOCK);
-    if (err < 0) {
-        goto fail;
-    }
-    qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
-                         (void *)(intptr_t)fds[0]);
-
-    io_thread_fd = fds[1];
-    return 0;
-
-fail:
-    close(fds[0]);
-    close(fds[1]);
-    return err;
-}
-
 /* If we have signalfd, we mask out the signals we want to handle and then
  * use signalfd to listen for them.  We rely on whatever the current signal
  * handler is to dispatch the signals when we receive them.
@@ -165,43 +101,22 @@ static int qemu_signal_init(void)
 
 #else /* _WIN32 */
 
-static HANDLE qemu_event_handle = NULL;
-
-static void dummy_event_handler(void *opaque)
-{
-}
-
-static int qemu_event_init(void)
+static int qemu_signal_init(void)
 {
-    qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
-    if (!qemu_event_handle) {
-        fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError());
-        return -1;
-    }
-    qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
     return 0;
 }
+#endif
+
+static AioContext *qemu_aio_context;
 
 void qemu_notify_event(void)
 {
-    if (!qemu_event_handle) {
+    if (!qemu_aio_context) {
         return;
     }
-    if (!SetEvent(qemu_event_handle)) {
-        fprintf(stderr, "qemu_notify_event: SetEvent failed: %ld\n",
-                GetLastError());
-        exit(1);
-    }
+    aio_notify(qemu_aio_context);
 }
 
-static int qemu_signal_init(void)
-{
-    return 0;
-}
-#endif
-
-static AioContext *qemu_aio_context;
-
 int qemu_init_main_loop(void)
 {
     int ret;
@@ -216,12 +131,6 @@ int qemu_init_main_loop(void)
         return ret;
     }
 
-    /* Note eventfd must be drained before signalfd handlers run */
-    ret = qemu_event_init();
-    if (ret) {
-        return ret;
-    }
-
     qemu_aio_context = aio_context_new();
     src = aio_get_g_source(qemu_aio_context);
     g_source_attach(src, NULL);
@@ -411,7 +320,8 @@ void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
 
 void qemu_fd_register(int fd)
 {
-    WSAEventSelect(fd, qemu_event_handle, FD_READ | FD_ACCEPT | FD_CLOSE |
+    WSAEventSelect(fd, event_notifier_get_handle(&qemu_aio_context->notifier),
+                   FD_READ | FD_ACCEPT | FD_CLOSE |
                    FD_CONNECT | FD_WRITE | FD_OOB);
 }
 
-- 
cgit v1.2.3


From 22bfa75eafc21522afbb265091faa9cc0649e9fb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 Sep 2012 15:11:48 +0200
Subject: aio: clean up now-unused functions

Some cleanups can now be made, now that the main loop does not anymore need
hooks into the bottom half code.

Reviewed-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 async.c       | 23 +++++++----------------
 oslib-posix.c | 31 -------------------------------
 qemu-aio.h    |  1 -
 qemu-common.h |  1 -
 4 files changed, 7 insertions(+), 49 deletions(-)

diff --git a/async.c b/async.c
index 564526f57f..04f9dcbb4d 100644
--- a/async.c
+++ b/async.c
@@ -117,16 +117,20 @@ void qemu_bh_delete(QEMUBH *bh)
     bh->deleted = 1;
 }
 
-void aio_bh_update_timeout(AioContext *ctx, uint32_t *timeout)
+static gboolean
+aio_ctx_prepare(GSource *source, gint    *timeout)
 {
+    AioContext *ctx = (AioContext *) source;
     QEMUBH *bh;
+    bool scheduled = false;
 
     for (bh = ctx->first_bh; bh; bh = bh->next) {
         if (!bh->deleted && bh->scheduled) {
+            scheduled = true;
             if (bh->idle) {
                 /* idle bottom halves will be polled at least
                  * every 10ms */
-                *timeout = MIN(10, *timeout);
+                *timeout = 10;
             } else {
                 /* non-idle bottom halves will be executed
                  * immediately */
@@ -135,21 +139,8 @@ void aio_bh_update_timeout(AioContext *ctx, uint32_t *timeout)
             }
         }
     }
-}
-
-static gboolean
-aio_ctx_prepare(GSource *source, gint    *timeout)
-{
-    AioContext *ctx = (AioContext *) source;
-    uint32_t wait = -1;
-    aio_bh_update_timeout(ctx, &wait);
-
-    if (wait != -1) {
-        *timeout = MIN(*timeout, wait);
-        return wait == 0;
-    }
 
-    return false;
+    return scheduled;
 }
 
 static gboolean
diff --git a/oslib-posix.c b/oslib-posix.c
index dbeb6272b8..9db9c3d8af 100644
--- a/oslib-posix.c
+++ b/oslib-posix.c
@@ -61,9 +61,6 @@ static int running_on_valgrind = -1;
 #ifdef CONFIG_LINUX
 #include <sys/syscall.h>
 #endif
-#ifdef CONFIG_EVENTFD
-#include <sys/eventfd.h>
-#endif
 
 int qemu_get_thread_id(void)
 {
@@ -183,34 +180,6 @@ int qemu_pipe(int pipefd[2])
     return ret;
 }
 
-/*
- * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
- */
-int qemu_eventfd(int fds[2])
-{
-#ifdef CONFIG_EVENTFD
-    int ret;
-
-    ret = eventfd(0, 0);
-    if (ret >= 0) {
-        fds[0] = ret;
-        fds[1] = dup(ret);
-        if (fds[1] == -1) {
-            close(ret);
-            return -1;
-        }
-        qemu_set_cloexec(ret);
-        qemu_set_cloexec(fds[1]);
-        return 0;
-    }
-    if (errno != ENOSYS) {
-        return -1;
-    }
-#endif
-
-    return qemu_pipe(fds);
-}
-
 int qemu_utimens(const char *path, const struct timespec *times)
 {
     struct timeval tv[2], tv_now;
diff --git a/qemu-aio.h b/qemu-aio.h
index 2354617ed1..1b7eb6ef5b 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -125,7 +125,6 @@ void aio_notify(AioContext *ctx);
  * These are internal functions used by the QEMU main loop.
  */
 int aio_bh_poll(AioContext *ctx);
-void aio_bh_update_timeout(AioContext *ctx, uint32_t *timeout);
 
 /**
  * qemu_bh_schedule: Schedule a bottom half.
diff --git a/qemu-common.h b/qemu-common.h
index 24e13ccb5e..c3328d230a 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -218,7 +218,6 @@ ssize_t qemu_recv_full(int fd, void *buf, size_t count, int flags)
     QEMU_WARN_UNUSED_RESULT;
 
 #ifndef _WIN32
-int qemu_eventfd(int pipefd[2]);
 int qemu_pipe(int pipefd[2]);
 #endif
 
-- 
cgit v1.2.3


From c90caf25e2b6945ae13560476a5ecd7992e9f945 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 24 Feb 2012 08:39:02 +0100
Subject: linux-aio: use event notifiers

Since linux-aio already uses an eventfd, converting it to use the
EventNotifier-based API simplifies the code even though it is not
meant to be portable.

Reviewed-by: Anthony Liguori <anthony@codemonkey.ws>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 linux-aio.c | 49 +++++++++++++++++++------------------------------
 1 file changed, 19 insertions(+), 30 deletions(-)

diff --git a/linux-aio.c b/linux-aio.c
index f3d8ef33ca..d1afb460c2 100644
--- a/linux-aio.c
+++ b/linux-aio.c
@@ -11,8 +11,8 @@
 #include "qemu-aio.h"
 #include "qemu-queue.h"
 #include "block/raw-posix-aio.h"
+#include "event_notifier.h"
 
-#include <sys/eventfd.h>
 #include <libaio.h>
 
 /*
@@ -38,7 +38,7 @@ struct qemu_laiocb {
 
 struct qemu_laio_state {
     io_context_t ctx;
-    int efd;
+    EventNotifier e;
     int count;
 };
 
@@ -77,29 +77,17 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
     qemu_aio_release(laiocb);
 }
 
-static void qemu_laio_completion_cb(void *opaque)
+static void qemu_laio_completion_cb(EventNotifier *e)
 {
-    struct qemu_laio_state *s = opaque;
+    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
 
-    while (1) {
+    while (event_notifier_test_and_clear(&s->e)) {
         struct io_event events[MAX_EVENTS];
-        uint64_t val;
-        ssize_t ret;
         struct timespec ts = { 0 };
         int nevents, i;
 
         do {
-            ret = read(s->efd, &val, sizeof(val));
-        } while (ret == -1 && errno == EINTR);
-
-        if (ret == -1 && errno == EAGAIN)
-            break;
-
-        if (ret != 8)
-            break;
-
-        do {
-            nevents = io_getevents(s->ctx, val, MAX_EVENTS, events, &ts);
+            nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
         } while (nevents == -EINTR);
 
         for (i = 0; i < nevents; i++) {
@@ -113,9 +101,9 @@ static void qemu_laio_completion_cb(void *opaque)
     }
 }
 
-static int qemu_laio_flush_cb(void *opaque)
+static int qemu_laio_flush_cb(EventNotifier *e)
 {
-    struct qemu_laio_state *s = opaque;
+    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
 
     return (s->count > 0) ? 1 : 0;
 }
@@ -147,8 +135,9 @@ static void laio_cancel(BlockDriverAIOCB *blockacb)
      * We might be able to do this slightly more optimal by removing the
      * O_NONBLOCK flag.
      */
-    while (laiocb->ret == -EINPROGRESS)
-        qemu_laio_completion_cb(laiocb->ctx);
+    while (laiocb->ret == -EINPROGRESS) {
+        qemu_laio_completion_cb(&laiocb->ctx->e);
+    }
 }
 
 static AIOPool laio_pool = {
@@ -187,7 +176,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
                         __func__, type);
         goto out_free_aiocb;
     }
-    io_set_eventfd(&laiocb->iocb, s->efd);
+    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
     s->count++;
 
     if (io_submit(s->ctx, 1, &iocbs) < 0)
@@ -206,21 +195,21 @@ void *laio_init(void)
     struct qemu_laio_state *s;
 
     s = g_malloc0(sizeof(*s));
-    s->efd = eventfd(0, 0);
-    if (s->efd == -1)
+    if (event_notifier_init(&s->e, false) < 0) {
         goto out_free_state;
-    fcntl(s->efd, F_SETFL, O_NONBLOCK);
+    }
 
-    if (io_setup(MAX_EVENTS, &s->ctx) != 0)
+    if (io_setup(MAX_EVENTS, &s->ctx) != 0) {
         goto out_close_efd;
+    }
 
-    qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL,
-        qemu_laio_flush_cb, s);
+    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
+                                qemu_laio_flush_cb);
 
     return s;
 
 out_close_efd:
-    close(s->efd);
+    event_notifier_cleanup(&s->e);
 out_free_state:
     g_free(s);
     return NULL;
-- 
cgit v1.2.3


From 38b14db34e16bb0ae1f28b7ddccb6aa11a2a96a1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 8 Aug 2011 14:36:41 +0200
Subject: qemu-thread: add QemuSemaphore

The new thread pool will use semaphores instead of condition
variables, because QemuCond does not have qemu_cond_timedwait.
(I also like it more this way).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qemu-thread-posix.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 qemu-thread-posix.h |  5 ++++
 qemu-thread-win32.c | 35 +++++++++++++++++++++++
 qemu-thread-win32.h |  4 +++
 qemu-thread.h       |  7 +++++
 5 files changed, 131 insertions(+)

diff --git a/qemu-thread-posix.c b/qemu-thread-posix.c
index 8fbabdac36..6a3d3a12a8 100644
--- a/qemu-thread-posix.c
+++ b/qemu-thread-posix.c
@@ -17,6 +17,9 @@
 #include <signal.h>
 #include <stdint.h>
 #include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/time.h>
 #include "qemu-thread.h"
 
 static void error_exit(int err, const char *msg)
@@ -115,6 +118,83 @@ void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
         error_exit(err, __func__);
 }
 
+void qemu_sem_init(QemuSemaphore *sem, int init)
+{
+    int rc;
+
+    rc = sem_init(&sem->sem, 0, init);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+}
+
+void qemu_sem_destroy(QemuSemaphore *sem)
+{
+    int rc;
+
+    rc = sem_destroy(&sem->sem);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+}
+
+void qemu_sem_post(QemuSemaphore *sem)
+{
+    int rc;
+
+    rc = sem_post(&sem->sem);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+}
+
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
+{
+    int rc;
+
+    if (ms <= 0) {
+        /* This is cheaper than sem_timedwait.  */
+        do {
+            rc = sem_trywait(&sem->sem);
+        } while (rc == -1 && errno == EINTR);
+        if (rc == -1 && errno == EAGAIN) {
+            return -1;
+        }
+    } else {
+        struct timeval tv;
+        struct timespec ts;
+        gettimeofday(&tv, NULL);
+        ts.tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000;
+        ts.tv_sec = tv.tv_sec + ms / 1000;
+        if (ts.tv_nsec >= 1000000000) {
+            ts.tv_sec++;
+            ts.tv_nsec -= 1000000000;
+        }
+        do {
+            rc = sem_timedwait(&sem->sem, &ts);
+        } while (rc == -1 && errno == EINTR);
+        if (rc == -1 && errno == ETIMEDOUT) {
+            return -1;
+        }
+    }
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+    return 0;
+}
+
+void qemu_sem_wait(QemuSemaphore *sem)
+{
+    int rc;
+
+    do {
+        rc = sem_wait(&sem->sem);
+    } while (rc == -1 && errno == EINTR);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+}
+
 void qemu_thread_create(QemuThread *thread,
                        void *(*start_routine)(void*),
                        void *arg, int mode)
diff --git a/qemu-thread-posix.h b/qemu-thread-posix.h
index ee4618e620..2542c15200 100644
--- a/qemu-thread-posix.h
+++ b/qemu-thread-posix.h
@@ -1,6 +1,7 @@
 #ifndef __QEMU_THREAD_POSIX_H
 #define __QEMU_THREAD_POSIX_H 1
 #include "pthread.h"
+#include <semaphore.h>
 
 struct QemuMutex {
     pthread_mutex_t lock;
@@ -10,6 +11,10 @@ struct QemuCond {
     pthread_cond_t cond;
 };
 
+struct QemuSemaphore {
+    sem_t sem;
+};
+
 struct QemuThread {
     pthread_t thread;
 };
diff --git a/qemu-thread-win32.c b/qemu-thread-win32.c
index 177b398cc4..4b3db60f5c 100644
--- a/qemu-thread-win32.c
+++ b/qemu-thread-win32.c
@@ -192,6 +192,41 @@ void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
     qemu_mutex_lock(mutex);
 }
 
+void qemu_sem_init(QemuSemaphore *sem, int init)
+{
+    /* Manual reset.  */
+    sem->sema = CreateSemaphore(NULL, init, LONG_MAX, NULL);
+}
+
+void qemu_sem_destroy(QemuSemaphore *sem)
+{
+    CloseHandle(sem->sema);
+}
+
+void qemu_sem_post(QemuSemaphore *sem)
+{
+    ReleaseSemaphore(sem->sema, 1, NULL);
+}
+
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
+{
+    int rc = WaitForSingleObject(sem->sema, ms);
+    if (rc == WAIT_OBJECT_0) {
+        return 0;
+    }
+    if (rc != WAIT_TIMEOUT) {
+        error_exit(GetLastError(), __func__);
+    }
+    return -1;
+}
+
+void qemu_sem_wait(QemuSemaphore *sem)
+{
+    if (WaitForSingleObject(sem->sema, INFINITE) != WAIT_OBJECT_0) {
+        error_exit(GetLastError(), __func__);
+    }
+}
+
 struct QemuThreadData {
     /* Passed to win32_start_routine.  */
     void             *(*start_routine)(void *);
diff --git a/qemu-thread-win32.h b/qemu-thread-win32.h
index b9d1be8478..13adb958f0 100644
--- a/qemu-thread-win32.h
+++ b/qemu-thread-win32.h
@@ -13,6 +13,10 @@ struct QemuCond {
     HANDLE continue_event;
 };
 
+struct QemuSemaphore {
+    HANDLE sema;
+};
+
 typedef struct QemuThreadData QemuThreadData;
 struct QemuThread {
     QemuThreadData *data;
diff --git a/qemu-thread.h b/qemu-thread.h
index 05fdaaf50e..3ee2f6b1f9 100644
--- a/qemu-thread.h
+++ b/qemu-thread.h
@@ -6,6 +6,7 @@
 
 typedef struct QemuMutex QemuMutex;
 typedef struct QemuCond QemuCond;
+typedef struct QemuSemaphore QemuSemaphore;
 typedef struct QemuThread QemuThread;
 
 #ifdef _WIN32
@@ -38,6 +39,12 @@ void qemu_cond_signal(QemuCond *cond);
 void qemu_cond_broadcast(QemuCond *cond);
 void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex);
 
+void qemu_sem_init(QemuSemaphore *sem, int init);
+void qemu_sem_post(QemuSemaphore *sem);
+void qemu_sem_wait(QemuSemaphore *sem);
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms);
+void qemu_sem_destroy(QemuSemaphore *sem);
+
 void qemu_thread_create(QemuThread *thread,
                         void *(*start_routine)(void *),
                         void *arg, int mode);
-- 
cgit v1.2.3


From d354c7eccf5466ec2715a03d3f33dbfd6680dcc5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 23 Feb 2012 13:23:34 +0100
Subject: aio: add generic thread-pool facility

Add a generic thread-pool.  The code is roughly based on posix-aio-compat.c,
with some changes, especially the following:

- use QemuSemaphore instead of QemuCond;

- separate the state of the thread from the return code of the worker
function.  The return code is totally opaque for the thread pool;

- do not busy wait when doing cancellation.

A more generic threadpool (but still specific to I/O so that in the future
it can use special scheduling classes or PI mutexes) can have many uses:
it allows more flexibility in raw-posix.c and can more easily be extended
to Win32, and it will also be used to do an msync of the persistent bitmap.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Makefile.objs |   2 +-
 thread-pool.c | 282 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 thread-pool.h |  34 +++++++
 trace-events  |   5 ++
 4 files changed, 322 insertions(+), 1 deletion(-)
 create mode 100644 thread-pool.c
 create mode 100644 thread-pool.h

diff --git a/Makefile.objs b/Makefile.objs
index a8ade04c02..f8ae0316b8 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -43,7 +43,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 
 block-obj-y = iov.o cache-utils.o qemu-option.o module.o async.o
 block-obj-y += nbd.o block.o blockjob.o aes.o qemu-config.o
-block-obj-y += qemu-progress.o qemu-sockets.o uri.o notify.o
+block-obj-y += thread-pool.o qemu-progress.o qemu-sockets.o uri.o notify.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o
diff --git a/thread-pool.c b/thread-pool.c
new file mode 100644
index 0000000000..80749b77e0
--- /dev/null
+++ b/thread-pool.c
@@ -0,0 +1,282 @@
+/*
+ * QEMU block layer thread pool
+ *
+ * Copyright IBM, Corp. 2008
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Paolo Bonzini     <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#include "qemu-common.h"
+#include "qemu-queue.h"
+#include "qemu-thread.h"
+#include "osdep.h"
+#include "qemu-coroutine.h"
+#include "trace.h"
+#include "block_int.h"
+#include "event_notifier.h"
+#include "thread-pool.h"
+
+static void do_spawn_thread(void);
+
+typedef struct ThreadPoolElement ThreadPoolElement;
+
+enum ThreadState {
+    THREAD_QUEUED,
+    THREAD_ACTIVE,
+    THREAD_DONE,
+    THREAD_CANCELED,
+};
+
+struct ThreadPoolElement {
+    BlockDriverAIOCB common;
+    ThreadPoolFunc *func;
+    void *arg;
+    enum ThreadState state;
+    int ret;
+
+    /* Access to this list is protected by lock.  */
+    QTAILQ_ENTRY(ThreadPoolElement) reqs;
+
+    /* Access to this list is protected by the global mutex.  */
+    QLIST_ENTRY(ThreadPoolElement) all;
+};
+
+static EventNotifier notifier;
+static QemuMutex lock;
+static QemuCond check_cancel;
+static QemuSemaphore sem;
+static int max_threads = 64;
+static QEMUBH *new_thread_bh;
+
+/* The following variables are protected by the global mutex.  */
+static QLIST_HEAD(, ThreadPoolElement) head;
+
+/* The following variables are protected by lock.  */
+static QTAILQ_HEAD(, ThreadPoolElement) request_list;
+static int cur_threads;
+static int idle_threads;
+static int new_threads;     /* backlog of threads we need to create */
+static int pending_threads; /* threads created but not running yet */
+static int pending_cancellations; /* whether we need a cond_broadcast */
+
+static void *worker_thread(void *unused)
+{
+    qemu_mutex_lock(&lock);
+    pending_threads--;
+    do_spawn_thread();
+
+    while (1) {
+        ThreadPoolElement *req;
+        int ret;
+
+        do {
+            idle_threads++;
+            qemu_mutex_unlock(&lock);
+            ret = qemu_sem_timedwait(&sem, 10000);
+            qemu_mutex_lock(&lock);
+            idle_threads--;
+        } while (ret == -1 && !QTAILQ_EMPTY(&request_list));
+        if (ret == -1) {
+            break;
+        }
+
+        req = QTAILQ_FIRST(&request_list);
+        QTAILQ_REMOVE(&request_list, req, reqs);
+        req->state = THREAD_ACTIVE;
+        qemu_mutex_unlock(&lock);
+
+        ret = req->func(req->arg);
+
+        qemu_mutex_lock(&lock);
+        req->state = THREAD_DONE;
+        req->ret = ret;
+        if (pending_cancellations) {
+            qemu_cond_broadcast(&check_cancel);
+        }
+
+        event_notifier_set(&notifier);
+    }
+
+    cur_threads--;
+    qemu_mutex_unlock(&lock);
+    return NULL;
+}
+
+static void do_spawn_thread(void)
+{
+    QemuThread t;
+
+    /* Runs with lock taken.  */
+    if (!new_threads) {
+        return;
+    }
+
+    new_threads--;
+    pending_threads++;
+
+    qemu_thread_create(&t, worker_thread, NULL, QEMU_THREAD_DETACHED);
+}
+
+static void spawn_thread_bh_fn(void *opaque)
+{
+    qemu_mutex_lock(&lock);
+    do_spawn_thread();
+    qemu_mutex_unlock(&lock);
+}
+
+static void spawn_thread(void)
+{
+    cur_threads++;
+    new_threads++;
+    /* If there are threads being created, they will spawn new workers, so
+     * we don't spend time creating many threads in a loop holding a mutex or
+     * starving the current vcpu.
+     *
+     * If there are no idle threads, ask the main thread to create one, so we
+     * inherit the correct affinity instead of the vcpu affinity.
+     */
+    if (!pending_threads) {
+        qemu_bh_schedule(new_thread_bh);
+    }
+}
+
+static void event_notifier_ready(EventNotifier *notifier)
+{
+    ThreadPoolElement *elem, *next;
+
+    event_notifier_test_and_clear(notifier);
+restart:
+    QLIST_FOREACH_SAFE(elem, &head, all, next) {
+        if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
+            continue;
+        }
+        if (elem->state == THREAD_DONE) {
+            trace_thread_pool_complete(elem, elem->common.opaque, elem->ret);
+        }
+        if (elem->state == THREAD_DONE && elem->common.cb) {
+            qemu_mutex_lock(&lock);
+            int ret = elem->ret;
+            qemu_mutex_unlock(&lock);
+            QLIST_REMOVE(elem, all);
+            elem->common.cb(elem->common.opaque, ret);
+            qemu_aio_release(elem);
+            goto restart;
+        } else {
+            /* remove the request */
+            QLIST_REMOVE(elem, all);
+            qemu_aio_release(elem);
+        }
+    }
+}
+
+static int thread_pool_active(EventNotifier *notifier)
+{
+    return !QLIST_EMPTY(&head);
+}
+
+static void thread_pool_cancel(BlockDriverAIOCB *acb)
+{
+    ThreadPoolElement *elem = (ThreadPoolElement *)acb;
+
+    trace_thread_pool_cancel(elem, elem->common.opaque);
+
+    qemu_mutex_lock(&lock);
+    if (elem->state == THREAD_QUEUED &&
+        /* No thread has yet started working on elem. we can try to "steal"
+         * the item from the worker if we can get a signal from the
+         * semaphore.  Because this is non-blocking, we can do it with
+         * the lock taken and ensure that elem will remain THREAD_QUEUED.
+         */
+        qemu_sem_timedwait(&sem, 0) == 0) {
+        QTAILQ_REMOVE(&request_list, elem, reqs);
+        elem->state = THREAD_CANCELED;
+        event_notifier_set(&notifier);
+    } else {
+        pending_cancellations++;
+        while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
+            qemu_cond_wait(&check_cancel, &lock);
+        }
+        pending_cancellations--;
+    }
+    qemu_mutex_unlock(&lock);
+}
+
+static AIOPool thread_pool_cb_pool = {
+    .aiocb_size         = sizeof(ThreadPoolElement),
+    .cancel             = thread_pool_cancel,
+};
+
+BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    ThreadPoolElement *req;
+
+    req = qemu_aio_get(&thread_pool_cb_pool, NULL, cb, opaque);
+    req->func = func;
+    req->arg = arg;
+    req->state = THREAD_QUEUED;
+
+    QLIST_INSERT_HEAD(&head, req, all);
+
+    trace_thread_pool_submit(req, arg);
+
+    qemu_mutex_lock(&lock);
+    if (idle_threads == 0 && cur_threads < max_threads) {
+        spawn_thread();
+    }
+    QTAILQ_INSERT_TAIL(&request_list, req, reqs);
+    qemu_mutex_unlock(&lock);
+    qemu_sem_post(&sem);
+    return &req->common;
+}
+
+typedef struct ThreadPoolCo {
+    Coroutine *co;
+    int ret;
+} ThreadPoolCo;
+
+static void thread_pool_co_cb(void *opaque, int ret)
+{
+    ThreadPoolCo *co = opaque;
+
+    co->ret = ret;
+    qemu_coroutine_enter(co->co, NULL);
+}
+
+int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg)
+{
+    ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS };
+    assert(qemu_in_coroutine());
+    thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc);
+    qemu_coroutine_yield();
+    return tpc.ret;
+}
+
+void thread_pool_submit(ThreadPoolFunc *func, void *arg)
+{
+    thread_pool_submit_aio(func, arg, NULL, NULL);
+}
+
+static void thread_pool_init(void)
+{
+    QLIST_INIT(&head);
+    event_notifier_init(&notifier, false);
+    qemu_mutex_init(&lock);
+    qemu_cond_init(&check_cancel);
+    qemu_sem_init(&sem, 0);
+    qemu_aio_set_event_notifier(&notifier, event_notifier_ready,
+                                thread_pool_active);
+
+    QTAILQ_INIT(&request_list);
+    new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL);
+}
+
+block_init(thread_pool_init)
diff --git a/thread-pool.h b/thread-pool.h
new file mode 100644
index 0000000000..378a4ac9f9
--- /dev/null
+++ b/thread-pool.h
@@ -0,0 +1,34 @@
+/*
+ * QEMU block layer thread pool
+ *
+ * Copyright IBM, Corp. 2008
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Paolo Bonzini     <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef QEMU_THREAD_POOL_H
+#define QEMU_THREAD_POOL_H 1
+
+#include "qemu-common.h"
+#include "qemu-queue.h"
+#include "qemu-thread.h"
+#include "qemu-coroutine.h"
+#include "block_int.h"
+
+typedef int ThreadPoolFunc(void *opaque);
+
+BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
+     BlockDriverCompletionFunc *cb, void *opaque);
+int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
+void thread_pool_submit(ThreadPoolFunc *func, void *arg);
+
+#endif
diff --git a/trace-events b/trace-events
index e2d4580d4c..58c18ebb6c 100644
--- a/trace-events
+++ b/trace-events
@@ -90,6 +90,11 @@ virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
 virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
 virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
 
+# thread-pool.c
+thread_pool_submit(void *req, void *opaque) "req %p opaque %p"
+thread_pool_complete(void *req, void *opaque, int ret) "req %p opaque %p ret %d"
+thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
+
 # posix-aio-compat.c
 paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
 paio_complete(void *acb, void *opaque, int ret) "acb %p opaque %p ret %d"
-- 
cgit v1.2.3


From 19d092cf9ba3c01b0e22ef65c499ae7ddc28d0e8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 31 Oct 2012 10:09:11 +0100
Subject: threadpool: do not take lock in event_notifier_ready

The ordering is:

    worker thread                         consumer thread
    -------------------------------------------------------------------
    write ret                             event_notifier_test_and_clear
    wmb()                                 read state
    write state                           rmb()
    event_notifier_set                    read ret

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 thread-pool.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/thread-pool.c b/thread-pool.c
index 80749b77e0..651b32419b 100644
--- a/thread-pool.c
+++ b/thread-pool.c
@@ -39,6 +39,11 @@ struct ThreadPoolElement {
     BlockDriverAIOCB common;
     ThreadPoolFunc *func;
     void *arg;
+
+    /* Moving state out of THREAD_QUEUED is protected by lock.  After
+     * that, only the worker thread can write to it.  Reads and writes
+     * of state and ret are ordered with memory barriers.
+     */
     enum ThreadState state;
     int ret;
 
@@ -95,9 +100,12 @@ static void *worker_thread(void *unused)
 
         ret = req->func(req->arg);
 
-        qemu_mutex_lock(&lock);
-        req->state = THREAD_DONE;
         req->ret = ret;
+        /* Write ret before state.  */
+        smp_wmb();
+        req->state = THREAD_DONE;
+
+        qemu_mutex_lock(&lock);
         if (pending_cancellations) {
             qemu_cond_broadcast(&check_cancel);
         }
@@ -162,11 +170,10 @@ restart:
             trace_thread_pool_complete(elem, elem->common.opaque, elem->ret);
         }
         if (elem->state == THREAD_DONE && elem->common.cb) {
-            qemu_mutex_lock(&lock);
-            int ret = elem->ret;
-            qemu_mutex_unlock(&lock);
             QLIST_REMOVE(elem, all);
-            elem->common.cb(elem->common.opaque, ret);
+            /* Read state before ret.  */
+            smp_rmb();
+            elem->common.cb(elem->common.opaque, elem->ret);
             qemu_aio_release(elem);
             goto restart;
         } else {
-- 
cgit v1.2.3


From 47e6b251a5e9a47c406f2f2c0b01bb88854c98ec Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 24 May 2012 18:03:13 +0200
Subject: block: switch posix-aio-compat to threadpool

This is not meant for portability, but to remove code duplication.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/raw-posix-aio.h |   1 -
 block/raw-posix.c     |  12 +-
 posix-aio-compat.c    | 427 +++++---------------------------------------------
 3 files changed, 41 insertions(+), 399 deletions(-)

diff --git a/block/raw-posix-aio.h b/block/raw-posix-aio.h
index ba118f616b..6725135dd4 100644
--- a/block/raw-posix-aio.h
+++ b/block/raw-posix-aio.h
@@ -28,7 +28,6 @@
 
 
 /* posix-aio-compat.c - thread pool based implementation */
-int paio_init(void);
 BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int type);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 28d439fa81..9ae2c505a8 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -266,14 +266,10 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
     }
     s->fd = fd;
 
-    /* We're falling back to POSIX AIO in some cases so init always */
-    if (paio_init() < 0) {
-        goto out_close;
-    }
-
 #ifdef CONFIG_LINUX_AIO
     if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
-        goto out_close;
+        qemu_close(fd);
+        return -errno;
     }
 #endif
 
@@ -284,10 +280,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
 #endif
 
     return 0;
-
-out_close:
-    qemu_close(fd);
-    return -errno;
 }
 
 static int raw_open(BlockDriverState *bs, const char *filename, int flags)
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 96e4daf505..4a1e3d3662 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -28,13 +28,12 @@
 #include "sysemu.h"
 #include "qemu-common.h"
 #include "trace.h"
+#include "thread-pool.h"
 #include "block_int.h"
 #include "iov.h"
 
 #include "block/raw-posix-aio.h"
 
-static void do_spawn_thread(void);
-
 struct qemu_paiocb {
     BlockDriverAIOCB common;
     int aio_fildes;
@@ -46,82 +45,15 @@ struct qemu_paiocb {
     size_t aio_nbytes;
 #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
     off_t aio_offset;
-
-    QTAILQ_ENTRY(qemu_paiocb) node;
     int aio_type;
-    ssize_t ret;
-    int active;
-    struct qemu_paiocb *next;
 };
 
-typedef struct PosixAioState {
-    int rfd, wfd;
-    struct qemu_paiocb *first_aio;
-} PosixAioState;
-
-
-static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
-static pthread_t thread_id;
-static pthread_attr_t attr;
-static int max_threads = 64;
-static int cur_threads = 0;
-static int idle_threads = 0;
-static int new_threads = 0;     /* backlog of threads we need to create */
-static int pending_threads = 0; /* threads created but not running yet */
-static QEMUBH *new_thread_bh;
-static QTAILQ_HEAD(, qemu_paiocb) request_list;
-
 #ifdef CONFIG_PREADV
 static int preadv_present = 1;
 #else
 static int preadv_present = 0;
 #endif
 
-static void die2(int err, const char *what)
-{
-    fprintf(stderr, "%s failed: %s\n", what, strerror(err));
-    abort();
-}
-
-static void die(const char *what)
-{
-    die2(errno, what);
-}
-
-static void mutex_lock(pthread_mutex_t *mutex)
-{
-    int ret = pthread_mutex_lock(mutex);
-    if (ret) die2(ret, "pthread_mutex_lock");
-}
-
-static void mutex_unlock(pthread_mutex_t *mutex)
-{
-    int ret = pthread_mutex_unlock(mutex);
-    if (ret) die2(ret, "pthread_mutex_unlock");
-}
-
-static int cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
-                           struct timespec *ts)
-{
-    int ret = pthread_cond_timedwait(cond, mutex, ts);
-    if (ret && ret != ETIMEDOUT) die2(ret, "pthread_cond_timedwait");
-    return ret;
-}
-
-static void cond_signal(pthread_cond_t *cond)
-{
-    int ret = pthread_cond_signal(cond);
-    if (ret) die2(ret, "pthread_cond_signal");
-}
-
-static void thread_create(pthread_t *thread, pthread_attr_t *attr,
-                          void *(*start_routine)(void*), void *arg)
-{
-    int ret = pthread_create(thread, attr, start_routine, arg);
-    if (ret) die2(ret, "pthread_create");
-}
-
 static ssize_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
 {
     int ret;
@@ -310,286 +242,54 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
     return nbytes;
 }
 
-static void posix_aio_notify_event(void);
-
-static void *aio_thread(void *unused)
+static int aio_worker(void *arg)
 {
-    mutex_lock(&lock);
-    pending_threads--;
-    mutex_unlock(&lock);
-    do_spawn_thread();
-
-    while (1) {
-        struct qemu_paiocb *aiocb;
-        ssize_t ret = 0;
-        qemu_timeval tv;
-        struct timespec ts;
-
-        qemu_gettimeofday(&tv);
-        ts.tv_sec = tv.tv_sec + 10;
-        ts.tv_nsec = 0;
-
-        mutex_lock(&lock);
-
-        while (QTAILQ_EMPTY(&request_list) &&
-               !(ret == ETIMEDOUT)) {
-            idle_threads++;
-            ret = cond_timedwait(&cond, &lock, &ts);
-            idle_threads--;
+    struct qemu_paiocb *aiocb = arg;
+    ssize_t ret = 0;
+
+    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
+    case QEMU_AIO_READ:
+        ret = handle_aiocb_rw(aiocb);
+        if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
+            /* A short read means that we have reached EOF. Pad the buffer
+             * with zeros for bytes after EOF. */
+            iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
+                       0, aiocb->aio_nbytes - ret);
+
+            ret = aiocb->aio_nbytes;
         }
-
-        if (QTAILQ_EMPTY(&request_list))
-            break;
-
-        aiocb = QTAILQ_FIRST(&request_list);
-        QTAILQ_REMOVE(&request_list, aiocb, node);
-        aiocb->active = 1;
-        mutex_unlock(&lock);
-
-        switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
-        case QEMU_AIO_READ:
-            ret = handle_aiocb_rw(aiocb);
-            if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
-                /* A short read means that we have reached EOF. Pad the buffer
-                 * with zeros for bytes after EOF. */
-                iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
-                           0, aiocb->aio_nbytes - ret);
-
-                ret = aiocb->aio_nbytes;
-            }
-            break;
-        case QEMU_AIO_WRITE:
-            ret = handle_aiocb_rw(aiocb);
-            break;
-        case QEMU_AIO_FLUSH:
-            ret = handle_aiocb_flush(aiocb);
-            break;
-        case QEMU_AIO_IOCTL:
-            ret = handle_aiocb_ioctl(aiocb);
-            break;
-        default:
-            fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
+        if (ret == aiocb->aio_nbytes) {
+            ret = 0;
+        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
             ret = -EINVAL;
-            break;
         }
-
-        mutex_lock(&lock);
-        aiocb->ret = ret;
-        mutex_unlock(&lock);
-
-        posix_aio_notify_event();
-    }
-
-    cur_threads--;
-    mutex_unlock(&lock);
-
-    return NULL;
-}
-
-static void do_spawn_thread(void)
-{
-    sigset_t set, oldset;
-
-    mutex_lock(&lock);
-    if (!new_threads) {
-        mutex_unlock(&lock);
-        return;
-    }
-
-    new_threads--;
-    pending_threads++;
-
-    mutex_unlock(&lock);
-
-    /* block all signals */
-    if (sigfillset(&set)) die("sigfillset");
-    if (sigprocmask(SIG_SETMASK, &set, &oldset)) die("sigprocmask");
-
-    thread_create(&thread_id, &attr, aio_thread, NULL);
-
-    if (sigprocmask(SIG_SETMASK, &oldset, NULL)) die("sigprocmask restore");
-}
-
-static void spawn_thread_bh_fn(void *opaque)
-{
-    do_spawn_thread();
-}
-
-static void spawn_thread(void)
-{
-    cur_threads++;
-    new_threads++;
-    /* If there are threads being created, they will spawn new workers, so
-     * we don't spend time creating many threads in a loop holding a mutex or
-     * starving the current vcpu.
-     *
-     * If there are no idle threads, ask the main thread to create one, so we
-     * inherit the correct affinity instead of the vcpu affinity.
-     */
-    if (!pending_threads) {
-        qemu_bh_schedule(new_thread_bh);
-    }
-}
-
-static void qemu_paio_submit(struct qemu_paiocb *aiocb)
-{
-    aiocb->ret = -EINPROGRESS;
-    aiocb->active = 0;
-    mutex_lock(&lock);
-    if (idle_threads == 0 && cur_threads < max_threads)
-        spawn_thread();
-    QTAILQ_INSERT_TAIL(&request_list, aiocb, node);
-    mutex_unlock(&lock);
-    cond_signal(&cond);
-}
-
-static ssize_t qemu_paio_return(struct qemu_paiocb *aiocb)
-{
-    ssize_t ret;
-
-    mutex_lock(&lock);
-    ret = aiocb->ret;
-    mutex_unlock(&lock);
-
-    return ret;
-}
-
-static int qemu_paio_error(struct qemu_paiocb *aiocb)
-{
-    ssize_t ret = qemu_paio_return(aiocb);
-
-    if (ret < 0)
-        ret = -ret;
-    else
-        ret = 0;
-
-    return ret;
-}
-
-static void posix_aio_read(void *opaque)
-{
-    PosixAioState *s = opaque;
-    struct qemu_paiocb *acb, **pacb;
-    int ret;
-    ssize_t len;
-
-    /* read all bytes from signal pipe */
-    for (;;) {
-        char bytes[16];
-
-        len = read(s->rfd, bytes, sizeof(bytes));
-        if (len == -1 && errno == EINTR)
-            continue; /* try again */
-        if (len == sizeof(bytes))
-            continue; /* more to read */
         break;
-    }
-
-    for(;;) {
-        pacb = &s->first_aio;
-        for(;;) {
-            acb = *pacb;
-            if (!acb)
-                return;
-
-            ret = qemu_paio_error(acb);
-            if (ret == ECANCELED) {
-                /* remove the request */
-                *pacb = acb->next;
-                qemu_aio_release(acb);
-            } else if (ret != EINPROGRESS) {
-                /* end of aio */
-                if (ret == 0) {
-                    ret = qemu_paio_return(acb);
-                    if (ret == acb->aio_nbytes)
-                        ret = 0;
-                    else
-                        ret = -EINVAL;
-                } else {
-                    ret = -ret;
-                }
-
-                trace_paio_complete(acb, acb->common.opaque, ret);
-
-                /* remove the request */
-                *pacb = acb->next;
-                /* call the callback */
-                acb->common.cb(acb->common.opaque, ret);
-                qemu_aio_release(acb);
-                break;
-            } else {
-                pacb = &acb->next;
-            }
-        }
-    }
-}
-
-static int posix_aio_flush(void *opaque)
-{
-    PosixAioState *s = opaque;
-    return !!s->first_aio;
-}
-
-static PosixAioState *posix_aio_state;
-
-static void posix_aio_notify_event(void)
-{
-    char byte = 0;
-    ssize_t ret;
-
-    ret = write(posix_aio_state->wfd, &byte, sizeof(byte));
-    if (ret < 0 && errno != EAGAIN)
-        die("write()");
-}
-
-static void paio_remove(struct qemu_paiocb *acb)
-{
-    struct qemu_paiocb **pacb;
-
-    /* remove the callback from the queue */
-    pacb = &posix_aio_state->first_aio;
-    for(;;) {
-        if (*pacb == NULL) {
-            fprintf(stderr, "paio_remove: aio request not found!\n");
-            break;
-        } else if (*pacb == acb) {
-            *pacb = acb->next;
-            qemu_aio_release(acb);
-            break;
+    case QEMU_AIO_WRITE:
+        ret = handle_aiocb_rw(aiocb);
+        if (ret == aiocb->aio_nbytes) {
+            ret = 0;
+        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
+            ret = -EINVAL;
         }
-        pacb = &(*pacb)->next;
-    }
-}
-
-static void paio_cancel(BlockDriverAIOCB *blockacb)
-{
-    struct qemu_paiocb *acb = (struct qemu_paiocb *)blockacb;
-    int active = 0;
-
-    trace_paio_cancel(acb, acb->common.opaque);
-
-    mutex_lock(&lock);
-    if (!acb->active) {
-        QTAILQ_REMOVE(&request_list, acb, node);
-        acb->ret = -ECANCELED;
-    } else if (acb->ret == -EINPROGRESS) {
-        active = 1;
-    }
-    mutex_unlock(&lock);
-
-    if (active) {
-        /* fail safe: if the aio could not be canceled, we wait for
-           it */
-        while (qemu_paio_error(acb) == EINPROGRESS)
-            ;
+        break;
+    case QEMU_AIO_FLUSH:
+        ret = handle_aiocb_flush(aiocb);
+        break;
+    case QEMU_AIO_IOCTL:
+        ret = handle_aiocb_ioctl(aiocb);
+        break;
+    default:
+        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
+        ret = -EINVAL;
+        break;
     }
 
-    paio_remove(acb);
+    qemu_aio_release(aiocb);
+    return ret;
 }
 
 static AIOPool raw_aio_pool = {
     .aiocb_size         = sizeof(struct qemu_paiocb),
-    .cancel             = paio_cancel,
 };
 
 BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
@@ -609,12 +309,8 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
     acb->aio_nbytes = nb_sectors * 512;
     acb->aio_offset = sector_num * 512;
 
-    acb->next = posix_aio_state->first_aio;
-    posix_aio_state->first_aio = acb;
-
     trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
-    qemu_paio_submit(acb);
-    return &acb->common;
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }
 
 BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
@@ -630,50 +326,5 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
     acb->aio_ioctl_buf = buf;
     acb->aio_ioctl_cmd = req;
 
-    acb->next = posix_aio_state->first_aio;
-    posix_aio_state->first_aio = acb;
-
-    qemu_paio_submit(acb);
-    return &acb->common;
-}
-
-int paio_init(void)
-{
-    PosixAioState *s;
-    int fds[2];
-    int ret;
-
-    if (posix_aio_state)
-        return 0;
-
-    s = g_malloc(sizeof(PosixAioState));
-
-    s->first_aio = NULL;
-    if (qemu_pipe(fds) == -1) {
-        fprintf(stderr, "failed to create pipe\n");
-        g_free(s);
-        return -1;
-    }
-
-    s->rfd = fds[0];
-    s->wfd = fds[1];
-
-    fcntl(s->rfd, F_SETFL, O_NONBLOCK);
-    fcntl(s->wfd, F_SETFL, O_NONBLOCK);
-
-    qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
-
-    ret = pthread_attr_init(&attr);
-    if (ret)
-        die2(ret, "pthread_attr_init");
-
-    ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-    if (ret)
-        die2(ret, "pthread_attr_setdetachstate");
-
-    QTAILQ_INIT(&request_list);
-    new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL);
-
-    posix_aio_state = s;
-    return 0;
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }
-- 
cgit v1.2.3


From de81a169366c2e3e0c47d0be637cc450b71aac67 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 25 May 2012 11:46:27 +0200
Subject: raw: merge posix-aio-compat.c into block/raw-posix.c

Making the qemu_paiocb specific to raw devices will let us access members
of the BDRVRawState arbitrarily.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Makefile.objs         |   1 -
 block/raw-posix-aio.h |   8 --
 block/raw-posix.c     | 294 ++++++++++++++++++++++++++++++++++++++++++++
 posix-aio-compat.c    | 330 --------------------------------------------------
 4 files changed, 294 insertions(+), 339 deletions(-)
 delete mode 100644 posix-aio-compat.c

diff --git a/Makefile.objs b/Makefile.objs
index f8ae0316b8..35c2355e82 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -45,7 +45,6 @@ block-obj-y = iov.o cache-utils.o qemu-option.o module.o async.o
 block-obj-y += nbd.o block.o blockjob.o aes.o qemu-config.o
 block-obj-y += thread-pool.o qemu-progress.o qemu-sockets.o uri.o notify.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
-block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o
 block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
diff --git a/block/raw-posix-aio.h b/block/raw-posix-aio.h
index 6725135dd4..c714367401 100644
--- a/block/raw-posix-aio.h
+++ b/block/raw-posix-aio.h
@@ -27,14 +27,6 @@
 #define QEMU_AIO_MISALIGNED   0x1000
 
 
-/* posix-aio-compat.c - thread pool based implementation */
-BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type);
-BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
-        unsigned long int req, void *buf,
-        BlockDriverCompletionFunc *cb, void *opaque);
-
 /* linux-aio.c - Linux native implementation */
 void *laio_init(void);
 BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 9ae2c505a8..4d6d5df5bc 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -27,6 +27,9 @@
 #include "qemu-log.h"
 #include "block_int.h"
 #include "module.h"
+#include "trace.h"
+#include "thread-pool.h"
+#include "iov.h"
 #include "block/raw-posix-aio.h"
 
 #if defined(__APPLE__) && (__MACH__)
@@ -149,6 +152,20 @@ typedef struct BDRVRawReopenState {
 static int fd_open(BlockDriverState *bs);
 static int64_t raw_getlength(BlockDriverState *bs);
 
+typedef struct RawPosixAIOData {
+    BlockDriverState *bs;
+    int aio_fildes;
+    union {
+        struct iovec *aio_iov;
+        void *aio_ioctl_buf;
+    };
+    int aio_niov;
+    size_t aio_nbytes;
+#define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
+    off_t aio_offset;
+    int aio_type;
+} RawPosixAIOData;
+
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static int cdrom_reopen(BlockDriverState *bs);
 #endif
@@ -426,6 +443,283 @@ static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
     return 1;
 }
 
+static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
+{
+    int ret;
+
+    ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
+    if (ret == -1) {
+        return -errno;
+    }
+
+    /*
+     * This looks weird, but the aio code only considers a request
+     * successful if it has written the full number of bytes.
+     *
+     * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
+     * so in fact we return the ioctl command here to make posix_aio_read()
+     * happy..
+     */
+    return aiocb->aio_nbytes;
+}
+
+static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
+{
+    int ret;
+
+    ret = qemu_fdatasync(aiocb->aio_fildes);
+    if (ret == -1) {
+        return -errno;
+    }
+    return 0;
+}
+
+#ifdef CONFIG_PREADV
+
+static bool preadv_present = true;
+
+static ssize_t
+qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return preadv(fd, iov, nr_iov, offset);
+}
+
+static ssize_t
+qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return pwritev(fd, iov, nr_iov, offset);
+}
+
+#else
+
+static bool preadv_present = false;
+
+static ssize_t
+qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return -ENOSYS;
+}
+
+static ssize_t
+qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return -ENOSYS;
+}
+
+#endif
+
+static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
+{
+    ssize_t len;
+
+    do {
+        if (aiocb->aio_type & QEMU_AIO_WRITE)
+            len = qemu_pwritev(aiocb->aio_fildes,
+                               aiocb->aio_iov,
+                               aiocb->aio_niov,
+                               aiocb->aio_offset);
+         else
+            len = qemu_preadv(aiocb->aio_fildes,
+                              aiocb->aio_iov,
+                              aiocb->aio_niov,
+                              aiocb->aio_offset);
+    } while (len == -1 && errno == EINTR);
+
+    if (len == -1) {
+        return -errno;
+    }
+    return len;
+}
+
+/*
+ * Read/writes the data to/from a given linear buffer.
+ *
+ * Returns the number of bytes handles or -errno in case of an error. Short
+ * reads are only returned if the end of the file is reached.
+ */
+static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
+{
+    ssize_t offset = 0;
+    ssize_t len;
+
+    while (offset < aiocb->aio_nbytes) {
+        if (aiocb->aio_type & QEMU_AIO_WRITE) {
+            len = pwrite(aiocb->aio_fildes,
+                         (const char *)buf + offset,
+                         aiocb->aio_nbytes - offset,
+                         aiocb->aio_offset + offset);
+        } else {
+            len = pread(aiocb->aio_fildes,
+                        buf + offset,
+                        aiocb->aio_nbytes - offset,
+                        aiocb->aio_offset + offset);
+        }
+        if (len == -1 && errno == EINTR) {
+            continue;
+        } else if (len == -1) {
+            offset = -errno;
+            break;
+        } else if (len == 0) {
+            break;
+        }
+        offset += len;
+    }
+
+    return offset;
+}
+
+static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
+{
+    ssize_t nbytes;
+    char *buf;
+
+    if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
+        /*
+         * If there is just a single buffer, and it is properly aligned
+         * we can just use plain pread/pwrite without any problems.
+         */
+        if (aiocb->aio_niov == 1) {
+             return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
+        }
+        /*
+         * We have more than one iovec, and all are properly aligned.
+         *
+         * Try preadv/pwritev first and fall back to linearizing the
+         * buffer if it's not supported.
+         */
+        if (preadv_present) {
+            nbytes = handle_aiocb_rw_vector(aiocb);
+            if (nbytes == aiocb->aio_nbytes ||
+                (nbytes < 0 && nbytes != -ENOSYS)) {
+                return nbytes;
+            }
+            preadv_present = false;
+        }
+
+        /*
+         * XXX(hch): short read/write.  no easy way to handle the reminder
+         * using these interfaces.  For now retry using plain
+         * pread/pwrite?
+         */
+    }
+
+    /*
+     * Ok, we have to do it the hard way, copy all segments into
+     * a single aligned buffer.
+     */
+    buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
+    if (aiocb->aio_type & QEMU_AIO_WRITE) {
+        char *p = buf;
+        int i;
+
+        for (i = 0; i < aiocb->aio_niov; ++i) {
+            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
+            p += aiocb->aio_iov[i].iov_len;
+        }
+    }
+
+    nbytes = handle_aiocb_rw_linear(aiocb, buf);
+    if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
+        char *p = buf;
+        size_t count = aiocb->aio_nbytes, copy;
+        int i;
+
+        for (i = 0; i < aiocb->aio_niov && count; ++i) {
+            copy = count;
+            if (copy > aiocb->aio_iov[i].iov_len) {
+                copy = aiocb->aio_iov[i].iov_len;
+            }
+            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
+            p     += copy;
+            count -= copy;
+        }
+    }
+    qemu_vfree(buf);
+
+    return nbytes;
+}
+
+static int aio_worker(void *arg)
+{
+    RawPosixAIOData *aiocb = arg;
+    ssize_t ret = 0;
+
+    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
+    case QEMU_AIO_READ:
+        ret = handle_aiocb_rw(aiocb);
+        if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
+            iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
+                      0, aiocb->aio_nbytes - ret);
+
+            ret = aiocb->aio_nbytes;
+        }
+        if (ret == aiocb->aio_nbytes) {
+            ret = 0;
+        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
+            ret = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_WRITE:
+        ret = handle_aiocb_rw(aiocb);
+        if (ret == aiocb->aio_nbytes) {
+            ret = 0;
+        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
+            ret = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_FLUSH:
+        ret = handle_aiocb_flush(aiocb);
+        break;
+    case QEMU_AIO_IOCTL:
+        ret = handle_aiocb_ioctl(aiocb);
+        break;
+    default:
+        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
+        ret = -EINVAL;
+        break;
+    }
+
+    g_slice_free(RawPosixAIOData, aiocb);
+    return ret;
+}
+
+static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
+{
+    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+
+    acb->bs = bs;
+    acb->aio_type = type;
+    acb->aio_fildes = fd;
+
+    if (qiov) {
+        acb->aio_iov = qiov->iov;
+        acb->aio_niov = qiov->niov;
+    }
+    acb->aio_nbytes = nb_sectors * 512;
+    acb->aio_offset = sector_num * 512;
+
+    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+}
+
+static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
+        unsigned long int req, void *buf,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+
+    acb->bs = bs;
+    acb->aio_type = QEMU_AIO_IOCTL;
+    acb->aio_fildes = fd;
+    acb->aio_offset = 0;
+    acb->aio_ioctl_buf = buf;
+    acb->aio_ioctl_cmd = req;
+
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+}
+
 static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int type)
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
deleted file mode 100644
index 4a1e3d3662..0000000000
--- a/posix-aio-compat.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * QEMU posix-aio emulation
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <pthread.h>
-#include <unistd.h>
-#include <errno.h>
-#include <time.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-#include "qemu-queue.h"
-#include "osdep.h"
-#include "sysemu.h"
-#include "qemu-common.h"
-#include "trace.h"
-#include "thread-pool.h"
-#include "block_int.h"
-#include "iov.h"
-
-#include "block/raw-posix-aio.h"
-
-struct qemu_paiocb {
-    BlockDriverAIOCB common;
-    int aio_fildes;
-    union {
-        struct iovec *aio_iov;
-        void *aio_ioctl_buf;
-    };
-    int aio_niov;
-    size_t aio_nbytes;
-#define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
-    off_t aio_offset;
-    int aio_type;
-};
-
-#ifdef CONFIG_PREADV
-static int preadv_present = 1;
-#else
-static int preadv_present = 0;
-#endif
-
-static ssize_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
-{
-    int ret;
-
-    ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
-    if (ret == -1)
-        return -errno;
-
-    /*
-     * This looks weird, but the aio code only considers a request
-     * successful if it has written the full number of bytes.
-     *
-     * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
-     * so in fact we return the ioctl command here to make posix_aio_read()
-     * happy..
-     */
-    return aiocb->aio_nbytes;
-}
-
-static ssize_t handle_aiocb_flush(struct qemu_paiocb *aiocb)
-{
-    int ret;
-
-    ret = qemu_fdatasync(aiocb->aio_fildes);
-    if (ret == -1)
-        return -errno;
-    return 0;
-}
-
-#ifdef CONFIG_PREADV
-
-static ssize_t
-qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return preadv(fd, iov, nr_iov, offset);
-}
-
-static ssize_t
-qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return pwritev(fd, iov, nr_iov, offset);
-}
-
-#else
-
-static ssize_t
-qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return -ENOSYS;
-}
-
-static ssize_t
-qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return -ENOSYS;
-}
-
-#endif
-
-static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
-{
-    ssize_t len;
-
-    do {
-        if (aiocb->aio_type & QEMU_AIO_WRITE)
-            len = qemu_pwritev(aiocb->aio_fildes,
-                               aiocb->aio_iov,
-                               aiocb->aio_niov,
-                               aiocb->aio_offset);
-         else
-            len = qemu_preadv(aiocb->aio_fildes,
-                              aiocb->aio_iov,
-                              aiocb->aio_niov,
-                              aiocb->aio_offset);
-    } while (len == -1 && errno == EINTR);
-
-    if (len == -1)
-        return -errno;
-    return len;
-}
-
-/*
- * Read/writes the data to/from a given linear buffer.
- *
- * Returns the number of bytes handles or -errno in case of an error. Short
- * reads are only returned if the end of the file is reached.
- */
-static ssize_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
-{
-    ssize_t offset = 0;
-    ssize_t len;
-
-    while (offset < aiocb->aio_nbytes) {
-         if (aiocb->aio_type & QEMU_AIO_WRITE)
-             len = pwrite(aiocb->aio_fildes,
-                          (const char *)buf + offset,
-                          aiocb->aio_nbytes - offset,
-                          aiocb->aio_offset + offset);
-         else
-             len = pread(aiocb->aio_fildes,
-                         buf + offset,
-                         aiocb->aio_nbytes - offset,
-                         aiocb->aio_offset + offset);
-
-         if (len == -1 && errno == EINTR)
-             continue;
-         else if (len == -1) {
-             offset = -errno;
-             break;
-         } else if (len == 0)
-             break;
-
-         offset += len;
-    }
-
-    return offset;
-}
-
-static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
-{
-    ssize_t nbytes;
-    char *buf;
-
-    if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
-        /*
-         * If there is just a single buffer, and it is properly aligned
-         * we can just use plain pread/pwrite without any problems.
-         */
-        if (aiocb->aio_niov == 1)
-             return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
-
-        /*
-         * We have more than one iovec, and all are properly aligned.
-         *
-         * Try preadv/pwritev first and fall back to linearizing the
-         * buffer if it's not supported.
-         */
-        if (preadv_present) {
-            nbytes = handle_aiocb_rw_vector(aiocb);
-            if (nbytes == aiocb->aio_nbytes)
-                return nbytes;
-            if (nbytes < 0 && nbytes != -ENOSYS)
-                return nbytes;
-            preadv_present = 0;
-        }
-
-        /*
-         * XXX(hch): short read/write.  no easy way to handle the reminder
-         * using these interfaces.  For now retry using plain
-         * pread/pwrite?
-         */
-    }
-
-    /*
-     * Ok, we have to do it the hard way, copy all segments into
-     * a single aligned buffer.
-     */
-    buf = qemu_blockalign(aiocb->common.bs, aiocb->aio_nbytes);
-    if (aiocb->aio_type & QEMU_AIO_WRITE) {
-        char *p = buf;
-        int i;
-
-        for (i = 0; i < aiocb->aio_niov; ++i) {
-            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
-            p += aiocb->aio_iov[i].iov_len;
-        }
-    }
-
-    nbytes = handle_aiocb_rw_linear(aiocb, buf);
-    if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
-        char *p = buf;
-        size_t count = aiocb->aio_nbytes, copy;
-        int i;
-
-        for (i = 0; i < aiocb->aio_niov && count; ++i) {
-            copy = count;
-            if (copy > aiocb->aio_iov[i].iov_len)
-                copy = aiocb->aio_iov[i].iov_len;
-            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
-            p     += copy;
-            count -= copy;
-        }
-    }
-    qemu_vfree(buf);
-
-    return nbytes;
-}
-
-static int aio_worker(void *arg)
-{
-    struct qemu_paiocb *aiocb = arg;
-    ssize_t ret = 0;
-
-    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
-    case QEMU_AIO_READ:
-        ret = handle_aiocb_rw(aiocb);
-        if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
-            /* A short read means that we have reached EOF. Pad the buffer
-             * with zeros for bytes after EOF. */
-            iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
-                       0, aiocb->aio_nbytes - ret);
-
-            ret = aiocb->aio_nbytes;
-        }
-        if (ret == aiocb->aio_nbytes) {
-            ret = 0;
-        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
-            ret = -EINVAL;
-        }
-        break;
-    case QEMU_AIO_WRITE:
-        ret = handle_aiocb_rw(aiocb);
-        if (ret == aiocb->aio_nbytes) {
-            ret = 0;
-        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
-            ret = -EINVAL;
-        }
-        break;
-    case QEMU_AIO_FLUSH:
-        ret = handle_aiocb_flush(aiocb);
-        break;
-    case QEMU_AIO_IOCTL:
-        ret = handle_aiocb_ioctl(aiocb);
-        break;
-    default:
-        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
-        ret = -EINVAL;
-        break;
-    }
-
-    qemu_aio_release(aiocb);
-    return ret;
-}
-
-static AIOPool raw_aio_pool = {
-    .aiocb_size         = sizeof(struct qemu_paiocb),
-};
-
-BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type)
-{
-    struct qemu_paiocb *acb;
-
-    acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
-    acb->aio_type = type;
-    acb->aio_fildes = fd;
-
-    if (qiov) {
-        acb->aio_iov = qiov->iov;
-        acb->aio_niov = qiov->niov;
-    }
-    acb->aio_nbytes = nb_sectors * 512;
-    acb->aio_offset = sector_num * 512;
-
-    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
-    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
-}
-
-BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
-        unsigned long int req, void *buf,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    struct qemu_paiocb *acb;
-
-    acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
-    acb->aio_type = QEMU_AIO_IOCTL;
-    acb->aio_fildes = fd;
-    acb->aio_offset = 0;
-    acb->aio_ioctl_buf = buf;
-    acb->aio_ioctl_cmd = req;
-
-    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
-}
-- 
cgit v1.2.3


From 9f8540ecef6fc5ededa825d766c8bcd1987884dd Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 9 Jun 2012 10:57:37 +0200
Subject: raw-posix: rename raw-posix-aio.h, hide unavailable prototypes

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/raw-aio.h       | 38 ++++++++++++++++++++++++++++++++++++++
 block/raw-posix-aio.h | 36 ------------------------------------
 block/raw-posix.c     |  2 +-
 linux-aio.c           |  2 +-
 4 files changed, 40 insertions(+), 38 deletions(-)
 create mode 100644 block/raw-aio.h
 delete mode 100644 block/raw-posix-aio.h

diff --git a/block/raw-aio.h b/block/raw-aio.h
new file mode 100644
index 0000000000..b3bb07377e
--- /dev/null
+++ b/block/raw-aio.h
@@ -0,0 +1,38 @@
+/*
+ * Declarations for AIO in the raw protocol
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#ifndef QEMU_RAW_AIO_H
+#define QEMU_RAW_AIO_H
+
+/* AIO request types */
+#define QEMU_AIO_READ         0x0001
+#define QEMU_AIO_WRITE        0x0002
+#define QEMU_AIO_IOCTL        0x0004
+#define QEMU_AIO_FLUSH        0x0008
+#define QEMU_AIO_TYPE_MASK \
+	(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH)
+
+/* AIO flags */
+#define QEMU_AIO_MISALIGNED   0x1000
+
+
+/* linux-aio.c - Linux native implementation */
+#ifdef CONFIG_LINUX_AIO
+void *laio_init(void);
+BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type);
+#endif
+
+#endif /* QEMU_RAW_AIO_H */
diff --git a/block/raw-posix-aio.h b/block/raw-posix-aio.h
deleted file mode 100644
index c714367401..0000000000
--- a/block/raw-posix-aio.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * QEMU Posix block I/O backend AIO support
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-#ifndef QEMU_RAW_POSIX_AIO_H
-#define QEMU_RAW_POSIX_AIO_H
-
-/* AIO request types */
-#define QEMU_AIO_READ         0x0001
-#define QEMU_AIO_WRITE        0x0002
-#define QEMU_AIO_IOCTL        0x0004
-#define QEMU_AIO_FLUSH        0x0008
-#define QEMU_AIO_TYPE_MASK \
-	(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH)
-
-/* AIO flags */
-#define QEMU_AIO_MISALIGNED   0x1000
-
-
-/* linux-aio.c - Linux native implementation */
-void *laio_init(void);
-BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type);
-
-#endif /* QEMU_RAW_POSIX_AIO_H */
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 4d6d5df5bc..f2f0404f6f 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -30,7 +30,7 @@
 #include "trace.h"
 #include "thread-pool.h"
 #include "iov.h"
-#include "block/raw-posix-aio.h"
+#include "raw-aio.h"
 
 #if defined(__APPLE__) && (__MACH__)
 #include <paths.h>
diff --git a/linux-aio.c b/linux-aio.c
index d1afb460c2..6ca984dbe8 100644
--- a/linux-aio.c
+++ b/linux-aio.c
@@ -10,7 +10,7 @@
 #include "qemu-common.h"
 #include "qemu-aio.h"
 #include "qemu-queue.h"
-#include "block/raw-posix-aio.h"
+#include "block/raw-aio.h"
 #include "event_notifier.h"
 
 #include <libaio.h>
-- 
cgit v1.2.3


From fc4edb84bfc49e4ed6c1a54c8ac037e9a7479fc8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 9 Jun 2012 04:48:28 +0200
Subject: raw-win32: add emulated AIO support

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/raw-win32.c | 187 ++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 138 insertions(+), 49 deletions(-)

diff --git a/block/raw-win32.c b/block/raw-win32.c
index 78c830648b..ffd86e3f38 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -25,6 +25,10 @@
 #include "qemu-timer.h"
 #include "block_int.h"
 #include "module.h"
+#include "raw-aio.h"
+#include "trace.h"
+#include "thread-pool.h"
+#include "iov.h"
 #include <windows.h>
 #include <winioctl.h>
 
@@ -32,12 +36,127 @@
 #define FTYPE_CD     1
 #define FTYPE_HARDDISK 2
 
+typedef struct RawWin32AIOData {
+    BlockDriverState *bs;
+    HANDLE hfile;
+    struct iovec *aio_iov;
+    int aio_niov;
+    size_t aio_nbytes;
+    off64_t aio_offset;
+    int aio_type;
+} RawWin32AIOData;
+
 typedef struct BDRVRawState {
     HANDLE hfile;
     int type;
     char drive_path[16]; /* format: "d:\" */
 } BDRVRawState;
 
+/*
+ * Read/writes the data to/from a given linear buffer.
+ *
+ * Returns the number of bytes handles or -errno in case of an error. Short
+ * reads are only returned if the end of the file is reached.
+ */
+static size_t handle_aiocb_rw(RawWin32AIOData *aiocb)
+{
+    size_t offset = 0;
+    int i;
+
+    for (i = 0; i < aiocb->aio_niov; i++) {
+        OVERLAPPED ov;
+        DWORD ret, ret_count, len;
+
+        memset(&ov, 0, sizeof(ov));
+        ov.Offset = (aiocb->aio_offset + offset);
+        ov.OffsetHigh = (aiocb->aio_offset + offset) >> 32;
+        len = aiocb->aio_iov[i].iov_len;
+        if (aiocb->aio_type & QEMU_AIO_WRITE) {
+            ret = WriteFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
+                            len, &ret_count, &ov);
+        } else {
+            ret = ReadFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
+                           len, &ret_count, &ov);
+        }
+        if (!ret) {
+            ret_count = 0;
+        }
+        if (ret_count != len) {
+            break;
+        }
+        offset += len;
+    }
+
+    return offset;
+}
+
+static int aio_worker(void *arg)
+{
+    RawWin32AIOData *aiocb = arg;
+    ssize_t ret = 0;
+    size_t count;
+
+    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
+    case QEMU_AIO_READ:
+        count = handle_aiocb_rw(aiocb);
+        if (count < aiocb->aio_nbytes && aiocb->bs->growable) {
+            /* A short read means that we have reached EOF. Pad the buffer
+             * with zeros for bytes after EOF. */
+            iov_memset(aiocb->aio_iov, aiocb->aio_niov, count,
+                      0, aiocb->aio_nbytes - count);
+
+            count = aiocb->aio_nbytes;
+        }
+        if (count == aiocb->aio_nbytes) {
+            ret = 0;
+        } else {
+            ret = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_WRITE:
+        count = handle_aiocb_rw(aiocb);
+        if (count == aiocb->aio_nbytes) {
+            count = 0;
+        } else {
+            count = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_FLUSH:
+        if (!FlushFileBuffers(aiocb->hfile)) {
+            return -EIO;
+        }
+        break;
+    default:
+        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
+        ret = -EINVAL;
+        break;
+    }
+
+    g_slice_free(RawWin32AIOData, aiocb);
+    return ret;
+}
+
+static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
+{
+    RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
+
+    acb->bs = bs;
+    acb->hfile = hfile;
+    acb->aio_type = type;
+
+    if (qiov) {
+        acb->aio_iov = qiov->iov;
+        acb->aio_niov = qiov->niov;
+    }
+    acb->aio_nbytes = nb_sectors * 512;
+    acb->aio_offset = sector_num * 512;
+
+    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+}
+
 int qemu_ftruncate64(int fd, int64_t length)
 {
     LARGE_INTEGER li;
@@ -117,59 +236,29 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
     return 0;
 }
 
-static int raw_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
+static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
+                         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+                         BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
-    OVERLAPPED ov;
-    DWORD ret_count;
-    int ret;
-    int64_t offset = sector_num * 512;
-    int count = nb_sectors * 512;
-
-    memset(&ov, 0, sizeof(ov));
-    ov.Offset = offset;
-    ov.OffsetHigh = offset >> 32;
-    ret = ReadFile(s->hfile, buf, count, &ret_count, &ov);
-    if (!ret)
-        return ret_count;
-    if (ret_count == count)
-        ret_count = 0;
-    return ret_count;
+    return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+                       cb, opaque, QEMU_AIO_READ);
 }
 
-static int raw_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
+static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
+                          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+                          BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
-    OVERLAPPED ov;
-    DWORD ret_count;
-    int ret;
-    int64_t offset = sector_num * 512;
-    int count = nb_sectors * 512;
-
-    memset(&ov, 0, sizeof(ov));
-    ov.Offset = offset;
-    ov.OffsetHigh = offset >> 32;
-    ret = WriteFile(s->hfile, buf, count, &ret_count, &ov);
-    if (!ret)
-        return ret_count;
-    if (ret_count == count)
-        ret_count = 0;
-    return ret_count;
+    return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+                       cb, opaque, QEMU_AIO_WRITE);
 }
 
-static int raw_flush(BlockDriverState *bs)
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+                         BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
-    int ret;
-
-    ret = FlushFileBuffers(s->hfile);
-    if (ret == 0) {
-        return -EIO;
-    }
-
-    return 0;
+    return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
 }
 
 static void raw_close(BlockDriverState *bs)
@@ -290,9 +379,9 @@ static BlockDriver bdrv_file = {
     .bdrv_close		= raw_close,
     .bdrv_create	= raw_create,
 
-    .bdrv_read              = raw_read,
-    .bdrv_write             = raw_write,
-    .bdrv_co_flush_to_disk  = raw_flush,
+    .bdrv_aio_readv     = raw_aio_readv,
+    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_aio_flush     = raw_aio_flush,
 
     .bdrv_truncate	= raw_truncate,
     .bdrv_getlength	= raw_getlength,
@@ -413,9 +502,9 @@ static BlockDriver bdrv_host_device = {
     .bdrv_close		= raw_close,
     .bdrv_has_zero_init = hdev_has_zero_init,
 
-    .bdrv_read              = raw_read,
-    .bdrv_write             = raw_write,
-    .bdrv_co_flush_to_disk  = raw_flush,
+    .bdrv_aio_readv     = raw_aio_readv,
+    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_aio_flush     = raw_aio_flush,
 
     .bdrv_getlength	= raw_getlength,
     .bdrv_get_allocated_file_size
-- 
cgit v1.2.3


From 10fb6e06825743bd517d4b5bb0e7b9e05e0fe92c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 26 Oct 2012 11:27:45 +0200
Subject: raw-posix: move linux-aio.c to block/

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Makefile.objs       |   1 -
 block/Makefile.objs |   1 +
 block/linux-aio.c   | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 linux-aio.c         | 216 ----------------------------------------------------
 4 files changed, 217 insertions(+), 217 deletions(-)
 create mode 100644 block/linux-aio.c
 delete mode 100644 linux-aio.c

diff --git a/Makefile.objs b/Makefile.objs
index 35c2355e82..2b5427ee5f 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -47,7 +47,6 @@ block-obj-y += thread-pool.o qemu-progress.o qemu-sockets.o uri.o notify.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o
 block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o
-block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 block-obj-y += block/
 block-obj-y += $(qapi-obj-y) qapi-types.o qapi-visit.o
 
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 684765bf63..771d3414d9 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -5,6 +5,7 @@ block-obj-y += qed-check.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
+block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 
 ifeq ($(CONFIG_POSIX),y)
 block-obj-y += nbd.o sheepdog.o
diff --git a/block/linux-aio.c b/block/linux-aio.c
new file mode 100644
index 0000000000..6ca984dbe8
--- /dev/null
+++ b/block/linux-aio.c
@@ -0,0 +1,216 @@
+/*
+ * Linux native AIO support.
+ *
+ * Copyright (C) 2009 IBM, Corp.
+ * Copyright (C) 2009 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu-common.h"
+#include "qemu-aio.h"
+#include "qemu-queue.h"
+#include "block/raw-aio.h"
+#include "event_notifier.h"
+
+#include <libaio.h>
+
+/*
+ * Queue size (per-device).
+ *
+ * XXX: eventually we need to communicate this to the guest and/or make it
+ *      tunable by the guest.  If we get more outstanding requests at a time
+ *      than this we will get EAGAIN from io_submit which is communicated to
+ *      the guest as an I/O error.
+ */
+#define MAX_EVENTS 128
+
+struct qemu_laiocb {
+    BlockDriverAIOCB common;
+    struct qemu_laio_state *ctx;
+    struct iocb iocb;
+    ssize_t ret;
+    size_t nbytes;
+    QEMUIOVector *qiov;
+    bool is_read;
+    QLIST_ENTRY(qemu_laiocb) node;
+};
+
+struct qemu_laio_state {
+    io_context_t ctx;
+    EventNotifier e;
+    int count;
+};
+
+static inline ssize_t io_event_ret(struct io_event *ev)
+{
+    return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
+}
+
+/*
+ * Completes an AIO request (calls the callback and frees the ACB).
+ */
+static void qemu_laio_process_completion(struct qemu_laio_state *s,
+    struct qemu_laiocb *laiocb)
+{
+    int ret;
+
+    s->count--;
+
+    ret = laiocb->ret;
+    if (ret != -ECANCELED) {
+        if (ret == laiocb->nbytes) {
+            ret = 0;
+        } else if (ret >= 0) {
+            /* Short reads mean EOF, pad with zeros. */
+            if (laiocb->is_read) {
+                qemu_iovec_memset(laiocb->qiov, ret, 0,
+                    laiocb->qiov->size - ret);
+            } else {
+                ret = -EINVAL;
+            }
+        }
+
+        laiocb->common.cb(laiocb->common.opaque, ret);
+    }
+
+    qemu_aio_release(laiocb);
+}
+
+static void qemu_laio_completion_cb(EventNotifier *e)
+{
+    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+
+    while (event_notifier_test_and_clear(&s->e)) {
+        struct io_event events[MAX_EVENTS];
+        struct timespec ts = { 0 };
+        int nevents, i;
+
+        do {
+            nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
+        } while (nevents == -EINTR);
+
+        for (i = 0; i < nevents; i++) {
+            struct iocb *iocb = events[i].obj;
+            struct qemu_laiocb *laiocb =
+                    container_of(iocb, struct qemu_laiocb, iocb);
+
+            laiocb->ret = io_event_ret(&events[i]);
+            qemu_laio_process_completion(s, laiocb);
+        }
+    }
+}
+
+static int qemu_laio_flush_cb(EventNotifier *e)
+{
+    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+
+    return (s->count > 0) ? 1 : 0;
+}
+
+static void laio_cancel(BlockDriverAIOCB *blockacb)
+{
+    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
+    struct io_event event;
+    int ret;
+
+    if (laiocb->ret != -EINPROGRESS)
+        return;
+
+    /*
+     * Note that as of Linux 2.6.31 neither the block device code nor any
+     * filesystem implements cancellation of AIO request.
+     * Thus the polling loop below is the normal code path.
+     */
+    ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
+    if (ret == 0) {
+        laiocb->ret = -ECANCELED;
+        return;
+    }
+
+    /*
+     * We have to wait for the iocb to finish.
+     *
+     * The only way to get the iocb status update is by polling the io context.
+     * We might be able to do this slightly more optimal by removing the
+     * O_NONBLOCK flag.
+     */
+    while (laiocb->ret == -EINPROGRESS) {
+        qemu_laio_completion_cb(&laiocb->ctx->e);
+    }
+}
+
+static AIOPool laio_pool = {
+    .aiocb_size         = sizeof(struct qemu_laiocb),
+    .cancel             = laio_cancel,
+};
+
+BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
+{
+    struct qemu_laio_state *s = aio_ctx;
+    struct qemu_laiocb *laiocb;
+    struct iocb *iocbs;
+    off_t offset = sector_num * 512;
+
+    laiocb = qemu_aio_get(&laio_pool, bs, cb, opaque);
+    laiocb->nbytes = nb_sectors * 512;
+    laiocb->ctx = s;
+    laiocb->ret = -EINPROGRESS;
+    laiocb->is_read = (type == QEMU_AIO_READ);
+    laiocb->qiov = qiov;
+
+    iocbs = &laiocb->iocb;
+
+    switch (type) {
+    case QEMU_AIO_WRITE:
+        io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
+	break;
+    case QEMU_AIO_READ:
+        io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
+	break;
+    /* Currently Linux kernel does not support other operations */
+    default:
+        fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
+                        __func__, type);
+        goto out_free_aiocb;
+    }
+    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
+    s->count++;
+
+    if (io_submit(s->ctx, 1, &iocbs) < 0)
+        goto out_dec_count;
+    return &laiocb->common;
+
+out_dec_count:
+    s->count--;
+out_free_aiocb:
+    qemu_aio_release(laiocb);
+    return NULL;
+}
+
+void *laio_init(void)
+{
+    struct qemu_laio_state *s;
+
+    s = g_malloc0(sizeof(*s));
+    if (event_notifier_init(&s->e, false) < 0) {
+        goto out_free_state;
+    }
+
+    if (io_setup(MAX_EVENTS, &s->ctx) != 0) {
+        goto out_close_efd;
+    }
+
+    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
+                                qemu_laio_flush_cb);
+
+    return s;
+
+out_close_efd:
+    event_notifier_cleanup(&s->e);
+out_free_state:
+    g_free(s);
+    return NULL;
+}
diff --git a/linux-aio.c b/linux-aio.c
deleted file mode 100644
index 6ca984dbe8..0000000000
--- a/linux-aio.c
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Linux native AIO support.
- *
- * Copyright (C) 2009 IBM, Corp.
- * Copyright (C) 2009 Red Hat, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-#include "qemu-common.h"
-#include "qemu-aio.h"
-#include "qemu-queue.h"
-#include "block/raw-aio.h"
-#include "event_notifier.h"
-
-#include <libaio.h>
-
-/*
- * Queue size (per-device).
- *
- * XXX: eventually we need to communicate this to the guest and/or make it
- *      tunable by the guest.  If we get more outstanding requests at a time
- *      than this we will get EAGAIN from io_submit which is communicated to
- *      the guest as an I/O error.
- */
-#define MAX_EVENTS 128
-
-struct qemu_laiocb {
-    BlockDriverAIOCB common;
-    struct qemu_laio_state *ctx;
-    struct iocb iocb;
-    ssize_t ret;
-    size_t nbytes;
-    QEMUIOVector *qiov;
-    bool is_read;
-    QLIST_ENTRY(qemu_laiocb) node;
-};
-
-struct qemu_laio_state {
-    io_context_t ctx;
-    EventNotifier e;
-    int count;
-};
-
-static inline ssize_t io_event_ret(struct io_event *ev)
-{
-    return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
-}
-
-/*
- * Completes an AIO request (calls the callback and frees the ACB).
- */
-static void qemu_laio_process_completion(struct qemu_laio_state *s,
-    struct qemu_laiocb *laiocb)
-{
-    int ret;
-
-    s->count--;
-
-    ret = laiocb->ret;
-    if (ret != -ECANCELED) {
-        if (ret == laiocb->nbytes) {
-            ret = 0;
-        } else if (ret >= 0) {
-            /* Short reads mean EOF, pad with zeros. */
-            if (laiocb->is_read) {
-                qemu_iovec_memset(laiocb->qiov, ret, 0,
-                    laiocb->qiov->size - ret);
-            } else {
-                ret = -EINVAL;
-            }
-        }
-
-        laiocb->common.cb(laiocb->common.opaque, ret);
-    }
-
-    qemu_aio_release(laiocb);
-}
-
-static void qemu_laio_completion_cb(EventNotifier *e)
-{
-    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
-
-    while (event_notifier_test_and_clear(&s->e)) {
-        struct io_event events[MAX_EVENTS];
-        struct timespec ts = { 0 };
-        int nevents, i;
-
-        do {
-            nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
-        } while (nevents == -EINTR);
-
-        for (i = 0; i < nevents; i++) {
-            struct iocb *iocb = events[i].obj;
-            struct qemu_laiocb *laiocb =
-                    container_of(iocb, struct qemu_laiocb, iocb);
-
-            laiocb->ret = io_event_ret(&events[i]);
-            qemu_laio_process_completion(s, laiocb);
-        }
-    }
-}
-
-static int qemu_laio_flush_cb(EventNotifier *e)
-{
-    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
-
-    return (s->count > 0) ? 1 : 0;
-}
-
-static void laio_cancel(BlockDriverAIOCB *blockacb)
-{
-    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
-    struct io_event event;
-    int ret;
-
-    if (laiocb->ret != -EINPROGRESS)
-        return;
-
-    /*
-     * Note that as of Linux 2.6.31 neither the block device code nor any
-     * filesystem implements cancellation of AIO request.
-     * Thus the polling loop below is the normal code path.
-     */
-    ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
-    if (ret == 0) {
-        laiocb->ret = -ECANCELED;
-        return;
-    }
-
-    /*
-     * We have to wait for the iocb to finish.
-     *
-     * The only way to get the iocb status update is by polling the io context.
-     * We might be able to do this slightly more optimal by removing the
-     * O_NONBLOCK flag.
-     */
-    while (laiocb->ret == -EINPROGRESS) {
-        qemu_laio_completion_cb(&laiocb->ctx->e);
-    }
-}
-
-static AIOPool laio_pool = {
-    .aiocb_size         = sizeof(struct qemu_laiocb),
-    .cancel             = laio_cancel,
-};
-
-BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type)
-{
-    struct qemu_laio_state *s = aio_ctx;
-    struct qemu_laiocb *laiocb;
-    struct iocb *iocbs;
-    off_t offset = sector_num * 512;
-
-    laiocb = qemu_aio_get(&laio_pool, bs, cb, opaque);
-    laiocb->nbytes = nb_sectors * 512;
-    laiocb->ctx = s;
-    laiocb->ret = -EINPROGRESS;
-    laiocb->is_read = (type == QEMU_AIO_READ);
-    laiocb->qiov = qiov;
-
-    iocbs = &laiocb->iocb;
-
-    switch (type) {
-    case QEMU_AIO_WRITE:
-        io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
-	break;
-    case QEMU_AIO_READ:
-        io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
-	break;
-    /* Currently Linux kernel does not support other operations */
-    default:
-        fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
-                        __func__, type);
-        goto out_free_aiocb;
-    }
-    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
-    s->count++;
-
-    if (io_submit(s->ctx, 1, &iocbs) < 0)
-        goto out_dec_count;
-    return &laiocb->common;
-
-out_dec_count:
-    s->count--;
-out_free_aiocb:
-    qemu_aio_release(laiocb);
-    return NULL;
-}
-
-void *laio_init(void)
-{
-    struct qemu_laio_state *s;
-
-    s = g_malloc0(sizeof(*s));
-    if (event_notifier_init(&s->e, false) < 0) {
-        goto out_free_state;
-    }
-
-    if (io_setup(MAX_EVENTS, &s->ctx) != 0) {
-        goto out_close_efd;
-    }
-
-    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
-                                qemu_laio_flush_cb);
-
-    return s;
-
-out_close_efd:
-    event_notifier_cleanup(&s->e);
-out_free_state:
-    g_free(s);
-    return NULL;
-}
-- 
cgit v1.2.3


From a27365265cc2fed1178bf25a205e8ee02a9c0caf Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 26 Oct 2012 11:43:58 +0200
Subject: raw-win32: implement native asynchronous I/O

With the new support for EventNotifiers in the AIO event loop, we
can hook a completion port to every opened file and use asynchronous
I/O on them.

Wine's support is extremely inefficient, also because it really does
the I/O synchronously on regular files. (!)  But it works, and it is
good to keep the Win32 and POSIX ports as similar as possible.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/Makefile.objs |   2 +-
 block/raw-aio.h     |  10 +++
 block/raw-win32.c   |  42 ++++++++--
 block/win32-aio.c   | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 274 insertions(+), 6 deletions(-)
 create mode 100644 block/win32-aio.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 771d3414d9..30ef6aec03 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -3,7 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
-block-obj-$(CONFIG_WIN32) += raw-win32.o
+block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 
diff --git a/block/raw-aio.h b/block/raw-aio.h
index b3bb07377e..e77f361148 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -35,4 +35,14 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
         BlockDriverCompletionFunc *cb, void *opaque, int type);
 #endif
 
+#ifdef _WIN32
+typedef struct QEMUWin32AIOState QEMUWin32AIOState;
+QEMUWin32AIOState *win32_aio_init(void);
+int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
+BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
+        QEMUWin32AIOState *aio, HANDLE hfile,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type);
+#endif
+
 #endif /* QEMU_RAW_AIO_H */
diff --git a/block/raw-win32.c b/block/raw-win32.c
index ffd86e3f38..0c05c58c5a 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -36,6 +36,8 @@
 #define FTYPE_CD     1
 #define FTYPE_HARDDISK 2
 
+static QEMUWin32AIOState *aio;
+
 typedef struct RawWin32AIOData {
     BlockDriverState *bs;
     HANDLE hfile;
@@ -50,6 +52,7 @@ typedef struct BDRVRawState {
     HANDLE hfile;
     int type;
     char drive_path[16]; /* format: "d:\" */
+    QEMUWin32AIOState *aio;
 } BDRVRawState;
 
 /*
@@ -208,6 +211,9 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
     }
 
     *overlapped = FILE_ATTRIBUTE_NORMAL;
+    if (flags & BDRV_O_NATIVE_AIO) {
+        *overlapped |= FILE_FLAG_OVERLAPPED;
+    }
     if (flags & BDRV_O_NOCACHE) {
         *overlapped |= FILE_FLAG_NO_BUFFERING;
     }
@@ -222,6 +228,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
     s->type = FTYPE_FILE;
 
     raw_parse_flags(flags, &access_flags, &overlapped);
+    
+    if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
+        aio = win32_aio_init();
+        if (aio == NULL) {
+            return -EINVAL;
+        }
+    }
 
     s->hfile = CreateFile(filename, access_flags,
                           FILE_SHARE_READ, NULL,
@@ -231,7 +244,16 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
 
         if (err == ERROR_ACCESS_DENIED)
             return -EACCES;
-        return -1;
+        return -EINVAL;
+    }
+
+    if (flags & BDRV_O_NATIVE_AIO) {
+        int ret = win32_aio_attach(aio, s->hfile);
+        if (ret < 0) {
+            CloseHandle(s->hfile);
+            return ret;
+        }
+        s->aio = aio;
     }
     return 0;
 }
@@ -241,8 +263,13 @@ static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
                          BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
-    return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
-                       cb, opaque, QEMU_AIO_READ);
+    if (s->aio) {
+        return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
+                                nb_sectors, cb, opaque, QEMU_AIO_READ); 
+    } else {
+        return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+                           cb, opaque, QEMU_AIO_READ);
+    }
 }
 
 static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
@@ -250,8 +277,13 @@ static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
                           BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
-    return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
-                       cb, opaque, QEMU_AIO_WRITE);
+    if (s->aio) {
+        return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
+                                nb_sectors, cb, opaque, QEMU_AIO_WRITE); 
+    } else {
+        return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+                           cb, opaque, QEMU_AIO_WRITE);
+    }
 }
 
 static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
diff --git a/block/win32-aio.c b/block/win32-aio.c
new file mode 100644
index 0000000000..c34dc73b6c
--- /dev/null
+++ b/block/win32-aio.c
@@ -0,0 +1,226 @@
+/*
+ * Block driver for RAW files (win32)
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu-timer.h"
+#include "block_int.h"
+#include "module.h"
+#include "qemu-common.h"
+#include "qemu-aio.h"
+#include "raw-aio.h"
+#include "event_notifier.h"
+#include <windows.h>
+#include <winioctl.h>
+
+#define FTYPE_FILE 0
+#define FTYPE_CD     1
+#define FTYPE_HARDDISK 2
+
+struct QEMUWin32AIOState {
+    HANDLE hIOCP;
+    EventNotifier e;
+    int count;
+};
+
+typedef struct QEMUWin32AIOCB {
+    BlockDriverAIOCB common;
+    struct QEMUWin32AIOState *ctx;
+    int nbytes;
+    OVERLAPPED ov;
+    QEMUIOVector *qiov;
+    void *buf;
+    bool is_read;
+    bool is_linear;
+} QEMUWin32AIOCB;
+
+/*
+ * Completes an AIO request (calls the callback and frees the ACB).
+ */
+static void win32_aio_process_completion(QEMUWin32AIOState *s,
+    QEMUWin32AIOCB *waiocb, DWORD count)
+{
+    int ret;
+    s->count--;
+
+    if (waiocb->ov.Internal != 0) {
+        ret = -EIO;
+    } else {
+        ret = 0;
+        if (count < waiocb->nbytes) {
+            /* Short reads mean EOF, pad with zeros. */
+            if (waiocb->is_read) {
+                qemu_iovec_memset(waiocb->qiov, count, 0,
+                    waiocb->qiov->size - count);
+            } else {
+                ret = -EINVAL;
+            }
+       }
+    }
+
+    if (!waiocb->is_linear) {
+        if (ret == 0 && waiocb->is_read) {
+            QEMUIOVector *qiov = waiocb->qiov;
+            char *p = waiocb->buf;
+            int i;
+
+            for (i = 0; i < qiov->niov; ++i) {
+                memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
+                p += qiov->iov[i].iov_len;
+            }
+            g_free(waiocb->buf);
+        }
+    }
+
+
+    waiocb->common.cb(waiocb->common.opaque, ret);
+    qemu_aio_release(waiocb);
+}
+
+static void win32_aio_completion_cb(EventNotifier *e)
+{
+    QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
+    DWORD count;
+    ULONG_PTR key;
+    OVERLAPPED *ov;
+
+    event_notifier_test_and_clear(&s->e);
+    while (GetQueuedCompletionStatus(s->hIOCP, &count, &key, &ov, 0)) {
+        QEMUWin32AIOCB *waiocb = container_of(ov, QEMUWin32AIOCB, ov);
+
+        win32_aio_process_completion(s, waiocb, count);
+    }
+}
+
+static int win32_aio_flush_cb(EventNotifier *e)
+{
+    QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
+
+    return (s->count > 0) ? 1 : 0;
+}
+
+static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb;
+
+    /*
+     * CancelIoEx is only supported in Vista and newer.  For now, just
+     * wait for completion.
+     */
+    while (!HasOverlappedIoCompleted(&waiocb->ov)) {
+        qemu_aio_wait();
+    }
+}
+
+static AIOPool win32_aio_pool = {
+    .aiocb_size         = sizeof(QEMUWin32AIOCB),
+    .cancel             = win32_aio_cancel,
+};
+
+BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
+        QEMUWin32AIOState *aio, HANDLE hfile,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
+{
+    struct QEMUWin32AIOCB *waiocb;
+    uint64_t offset = sector_num * 512;
+    DWORD rc;
+
+    waiocb = qemu_aio_get(&win32_aio_pool, bs, cb, opaque);
+    waiocb->nbytes = nb_sectors * 512;
+    waiocb->qiov = qiov;
+    waiocb->is_read = (type == QEMU_AIO_READ);
+
+    if (qiov->niov > 1) {
+        waiocb->buf = qemu_blockalign(bs, qiov->size);
+        if (type & QEMU_AIO_WRITE) {
+            char *p = waiocb->buf;
+            int i;
+
+            for (i = 0; i < qiov->niov; ++i) {
+                memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
+                p += qiov->iov[i].iov_len;
+            }
+        }
+        waiocb->is_linear = false;
+    } else {
+        waiocb->buf = qiov->iov[0].iov_base;
+        waiocb->is_linear = true;
+    }
+
+    waiocb->ov = (OVERLAPPED) {
+        .Offset = (DWORD) offset,
+        .OffsetHigh = (DWORD) (offset >> 32),
+        .hEvent = event_notifier_get_handle(&aio->e)
+    };
+    aio->count++;
+
+    if (type & QEMU_AIO_READ) {
+        rc = ReadFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov);
+    } else {
+        rc = WriteFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov);
+    }
+    if(rc == 0 && GetLastError() != ERROR_IO_PENDING) {
+        goto out_dec_count;
+    }
+    return &waiocb->common;
+
+out_dec_count:
+    aio->count--;
+    qemu_aio_release(waiocb);
+    return NULL;
+}
+
+int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
+{
+    if (CreateIoCompletionPort(hfile, aio->hIOCP, (ULONG_PTR) 0, 0) == NULL) {
+        return -EINVAL;
+    } else {
+        return 0;
+    }
+}
+
+QEMUWin32AIOState *win32_aio_init(void)
+{
+    QEMUWin32AIOState *s;
+
+    s = g_malloc0(sizeof(*s));
+    if (event_notifier_init(&s->e, false) < 0) {
+        goto out_free_state;
+    }
+
+    s->hIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
+    if (s->hIOCP == NULL) {
+        goto out_close_efd;
+    }
+
+    qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb,
+                                win32_aio_flush_cb);
+
+    return s;
+
+out_close_efd:
+    event_notifier_cleanup(&s->e);
+out_free_state:
+    g_free(s);
+    return NULL;
+}
-- 
cgit v1.2.3