diff options
515 files changed, 20866 insertions, 4887 deletions
diff --git a/.gitignore b/.gitignore index 75bceb9975..88a80ff4a5 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,8 @@ /*-darwin-user /*-linux-user /*-bsd-user +/ivshmem-client +/ivshmem-server /libdis* /libuser /linux-headers/asm diff --git a/MAINTAINERS b/MAINTAINERS index 314411332c..bb1f3e4062 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -62,14 +62,22 @@ Guest CPU cores (TCG): ---------------------- Overall L: qemu-devel@nongnu.org -S: Odd fixes +M: Paolo Bonzini <pbonzini@redhat.com> +M: Peter Crosthwaite <crosthwaite.peter@gmail.com> +M: Richard Henderson <rth@twiddle.net> +S: Maintained F: cpu-exec.c +F: cpu-exec-common.c +F: cpus.c F: cputlb.c +F: exec.c F: softmmu_template.h -F: translate-all.c -F: include/exec/cpu_ldst.h -F: include/exec/cpu_ldst_template.h +F: translate-all.* +F: translate-common.c +F: include/exec/cpu*.h +F: include/exec/exec-all.h F: include/exec/helper*.h +F: include/exec/tb-hash.h Alpha M: Richard Henderson <rth@twiddle.net> @@ -81,6 +89,7 @@ F: disas/alpha.c ARM M: Peter Maydell <peter.maydell@linaro.org> +L: qemu-arm@nongnu.org S: Maintained F: target-arm/ F: hw/arm/ @@ -216,6 +225,7 @@ F: */kvm.* ARM M: Peter Maydell <peter.maydell@linaro.org> +L: qemu-arm@nongnu.org S: Maintained F: target-arm/kvm.c @@ -235,9 +245,14 @@ M: Cornelia Huck <cornelia.huck@de.ibm.com> M: Alexander Graf <agraf@suse.de> S: Maintained F: target-s390x/kvm.c +F: target-s390x/ioinst.[ch] +F: target-s390x/machine.c F: hw/intc/s390_flic.c F: hw/intc/s390_flic_kvm.c F: include/hw/s390x/s390_flic.h +F: gdb-xml/s390*.xml +T: git git://github.com/cohuck/qemu.git s390-next +T: git git://github.com/borntraeger/qemu.git s390-next X86 M: Paolo Bonzini <pbonzini@redhat.com> @@ -287,6 +302,7 @@ ARM Machines ------------ Allwinner-a10 M: Beniamino Galvani <b.galvani@gmail.com> +L: qemu-arm@nongnu.org S: Maintained F: hw/*/allwinner* F: include/hw/*/allwinner* @@ -294,6 +310,7 @@ F: hw/arm/cubieboard.c ARM PrimeCell M: Peter Maydell <peter.maydell@linaro.org> +L: qemu-arm@nongnu.org S: Maintained F: hw/char/pl011.c F: hw/display/pl110* @@ -308,6 +325,7 @@ F: include/hw/arm/primecell.h ARM cores M: Peter Maydell <peter.maydell@linaro.org> +L: qemu-arm@nongnu.org S: Maintained F: hw/intc/arm* F: hw/intc/gic_internal.h @@ -327,54 +345,64 @@ M: Evgeny Voevodin <e.voevodin@samsung.com> M: Maksim Kozlov <m.kozlov@samsung.com> M: Igor Mitsyanko <i.mitsyanko@gmail.com> M: Dmitry Solodkiy <d.solodkiy@samsung.com> +L: qemu-arm@nongnu.org S: Maintained F: hw/*/exynos* Calxeda Highbank M: Rob Herring <robh@kernel.org> +L: qemu-arm@nongnu.org S: Maintained F: hw/arm/highbank.c F: hw/net/xgmac.c Canon DIGIC M: Antony Pavlov <antonynpavlov@gmail.com> +L: qemu-arm@nongnu.org S: Maintained F: include/hw/arm/digic.h F: hw/*/digic* Gumstix L: qemu-devel@nongnu.org +L: qemu-arm@nongnu.org S: Orphan F: hw/arm/gumstix.c i.MX31 M: Peter Chubb <peter.chubb@nicta.com.au> +L: qemu-arm@nongnu.org S: Odd fixes F: hw/*/imx* F: hw/arm/kzm.c Integrator CP M: Peter Maydell <peter.maydell@linaro.org> +L: qemu-arm@nongnu.org S: Maintained F: hw/arm/integratorcp.c Musicpal M: Jan Kiszka <jan.kiszka@web.de> +L: qemu-arm@nongnu.org S: Maintained F: hw/arm/musicpal.c nSeries M: Andrzej Zaborowski <balrogg@gmail.com> +L: qemu-arm@nongnu.org S: Maintained F: hw/arm/nseries.c Palm M: Andrzej Zaborowski <balrogg@gmail.com> +L: qemu-arm@nongnu.org S: Maintained F: hw/arm/palm.c Real View M: Peter Maydell <peter.maydell@linaro.org> +L: qemu-arm@nongnu.org S: Maintained F: hw/arm/realview* F: hw/intc/realview_gic.c @@ -382,6 +410,7 @@ F: include/hw/intc/realview_gic.h PXA2XX M: Andrzej Zaborowski <balrogg@gmail.com> +L: qemu-arm@nongnu.org S: Maintained F: hw/arm/mainstone.c F: hw/arm/spitz.c @@ -391,17 +420,20 @@ F: hw/*/pxa2xx* Stellaris M: Peter Maydell <peter.maydell@linaro.org> +L: qemu-arm@nongnu.org S: Maintained F: hw/*/stellaris* Versatile PB M: Peter Maydell <peter.maydell@linaro.org> +L: qemu-arm@nongnu.org S: Maintained F: hw/*/versatile* Xilinx Zynq M: Alistair Francis <alistair.francis@xilinx.com> M: Peter Crosthwaite <crosthwaite.peter@gmail.com> +L: qemu-arm@nongnu.org S: Maintained F: hw/arm/xilinx_zynq.c F: hw/misc/zynq_slcr.c @@ -411,6 +443,7 @@ F: hw/ssi/xilinx_spips.c Xilinx ZynqMP M: Alistair Francis <alistair.francis@xilinx.com> M: Peter Crosthwaite <crosthwaite.peter@gmail.com> +L: qemu-arm@nongnu.org S: Maintained F: hw/arm/xlnx-zynqmp.c F: hw/arm/xlnx-ep108.c @@ -419,6 +452,7 @@ F: include/hw/arm/xlnx-zynqmp.h ARM ACPI Subsystem M: Shannon Zhao <zhaoshenglong@huawei.com> M: Shannon Zhao <shannon.zhao@linaro.org> +L: qemu-arm@nongnu.org S: Maintained F: hw/arm/virt-acpi-build.c F: include/hw/arm/virt-acpi-build.h @@ -616,15 +650,13 @@ M: Christian Borntraeger <borntraeger@de.ibm.com> M: Alexander Graf <agraf@suse.de> S: Supported F: hw/char/sclp*.[hc] -F: hw/s390x/s390-virtio-ccw.c -F: hw/s390x/css.[hc] -F: hw/s390x/sclp*.[hc] -F: hw/s390x/ipl*.[hc] -F: hw/s390x/*pci*.[hc] -F: hw/s390x/s390-skeys*.c +F: hw/s390x/ +X: hw/s390x/s390-virtio-bus.[ch] F: include/hw/s390x/ F: pc-bios/s390-ccw/ -T: git git://github.com/cohuck/qemu virtio-ccw-upstr +F: hw/watchdog/wdt_diag288.c +T: git git://github.com/cohuck/qemu.git s390-next +T: git git://github.com/borntraeger/qemu.git s390-next UniCore32 Machines ------------- @@ -851,7 +883,8 @@ M: Cornelia Huck <cornelia.huck@de.ibm.com> M: Christian Borntraeger <borntraeger@de.ibm.com> S: Supported F: hw/s390x/virtio-ccw.[hc] -T: git git://github.com/cohuck/qemu virtio-ccw-upstr +T: git git://github.com/cohuck/qemu.git s390-next +T: git git://github.com/borntraeger/qemu.git s390-next virtio-input M: Gerd Hoffmann <kraxel@redhat.com> @@ -1017,6 +1050,7 @@ S: Supported F: include/exec/ioport.h F: ioport.c F: include/exec/memory.h +F: include/exec/ram_addr.h F: memory.c F: include/exec/memory-internal.h F: exec.c @@ -1139,6 +1173,8 @@ F: include/qom/ X: include/qom/cpu.h F: qom/ X: qom/cpu.c +F: tests/check-qom-interface.c +F: tests/check-qom-proplist.c F: tests/qom-test.c QMP @@ -1235,6 +1271,7 @@ AArch64 target M: Claudio Fontana <claudio.fontana@huawei.com> M: Claudio Fontana <claudio.fontana@gmail.com> S: Maintained +L: qemu-arm@nongnu.org F: tcg/aarch64/ F: disas/arm-a64.cc F: disas/libvixl/ @@ -1242,6 +1279,7 @@ F: disas/libvixl/ ARM target M: Andrzej Zaborowski <balrogg@gmail.com> S: Maintained +L: qemu-arm@nongnu.org F: tcg/arm/ F: disas/arm.c @@ -440,7 +440,7 @@ endif install: all $(if $(BUILD_DOCS),install-doc) \ install-datadir install-localstatedir ifneq ($(TOOLS),) - $(call install-prog,$(TOOLS),$(DESTDIR)$(bindir)) + $(call install-prog,$(subst qemu-ga,qemu-ga$(EXESUF),$(TOOLS)),$(DESTDIR)$(bindir)) endif ifneq ($(CONFIG_MODULES),) $(INSTALL_DIR) "$(DESTDIR)$(qemu_moddir)" diff --git a/Makefile.objs b/Makefile.objs index fe02ee2cf4..77be052dde 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -54,6 +54,8 @@ common-obj-y += audio/ common-obj-y += hw/ common-obj-y += accel.o +common-obj-y += replay/ + common-obj-y += ui/ common-obj-y += bt-host.o bt-vhci.o bt-host.o-cflags := $(BLUEZ_CFLAGS) @@ -1 +1 @@ -2.4.50 +2.4.92 diff --git a/aio-posix.c b/aio-posix.c index 0467f23a63..482b316502 100644 --- a/aio-posix.c +++ b/aio-posix.c @@ -17,6 +17,9 @@ #include "block/block.h" #include "qemu/queue.h" #include "qemu/sockets.h" +#ifdef CONFIG_EPOLL +#include <sys/epoll.h> +#endif struct AioHandler { @@ -29,6 +32,162 @@ struct AioHandler QLIST_ENTRY(AioHandler) node; }; +#ifdef CONFIG_EPOLL + +/* The fd number threashold to switch to epoll */ +#define EPOLL_ENABLE_THRESHOLD 64 + +static void aio_epoll_disable(AioContext *ctx) +{ + ctx->epoll_available = false; + if (!ctx->epoll_enabled) { + return; + } + ctx->epoll_enabled = false; + close(ctx->epollfd); +} + +static inline int epoll_events_from_pfd(int pfd_events) +{ + return (pfd_events & G_IO_IN ? EPOLLIN : 0) | + (pfd_events & G_IO_OUT ? EPOLLOUT : 0) | + (pfd_events & G_IO_HUP ? EPOLLHUP : 0) | + (pfd_events & G_IO_ERR ? EPOLLERR : 0); +} + +static bool aio_epoll_try_enable(AioContext *ctx) +{ + AioHandler *node; + struct epoll_event event; + + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + int r; + if (node->deleted || !node->pfd.events) { + continue; + } + event.events = epoll_events_from_pfd(node->pfd.events); + event.data.ptr = node; + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event); + if (r) { + return false; + } + } + ctx->epoll_enabled = true; + return true; +} + +static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new) +{ + struct epoll_event event; + int r; + + if (!ctx->epoll_enabled) { + return; + } + if (!node->pfd.events) { + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event); + if (r) { + aio_epoll_disable(ctx); + } + } else { + event.data.ptr = node; + event.events = epoll_events_from_pfd(node->pfd.events); + if (is_new) { + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event); + if (r) { + aio_epoll_disable(ctx); + } + } else { + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event); + if (r) { + aio_epoll_disable(ctx); + } + } + } +} + +static int aio_epoll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + AioHandler *node; + int i, ret = 0; + struct epoll_event events[128]; + + assert(npfd == 1); + assert(pfds[0].fd == ctx->epollfd); + if (timeout > 0) { + ret = qemu_poll_ns(pfds, npfd, timeout); + } + if (timeout <= 0 || ret > 0) { + ret = epoll_wait(ctx->epollfd, events, + sizeof(events) / sizeof(events[0]), + timeout); + if (ret <= 0) { + goto out; + } + for (i = 0; i < ret; i++) { + int ev = events[i].events; + node = events[i].data.ptr; + node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) | + (ev & EPOLLOUT ? G_IO_OUT : 0) | + (ev & EPOLLHUP ? G_IO_HUP : 0) | + (ev & EPOLLERR ? G_IO_ERR : 0); + } + } +out: + return ret; +} + +static bool aio_epoll_enabled(AioContext *ctx) +{ + /* Fall back to ppoll when external clients are disabled. */ + return !aio_external_disabled(ctx) && ctx->epoll_enabled; +} + +static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + if (!ctx->epoll_available) { + return false; + } + if (aio_epoll_enabled(ctx)) { + return true; + } + if (npfd >= EPOLL_ENABLE_THRESHOLD) { + if (aio_epoll_try_enable(ctx)) { + return true; + } else { + aio_epoll_disable(ctx); + } + } + return false; +} + +#else + +static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new) +{ +} + +static int aio_epoll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + assert(false); +} + +static bool aio_epoll_enabled(AioContext *ctx) +{ + return false; +} + +static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + return false; +} + +#endif + static AioHandler *find_aio_handler(AioContext *ctx, int fd) { AioHandler *node; @@ -50,6 +209,8 @@ void aio_set_fd_handler(AioContext *ctx, void *opaque) { AioHandler *node; + bool is_new = false; + bool deleted = false; node = find_aio_handler(ctx, fd); @@ -68,7 +229,7 @@ void aio_set_fd_handler(AioContext *ctx, * releasing the walking_handlers lock. */ QLIST_REMOVE(node, node); - g_free(node); + deleted = true; } } } else { @@ -79,6 +240,7 @@ void aio_set_fd_handler(AioContext *ctx, QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node); g_source_add_poll(&ctx->source, &node->pfd); + is_new = true; } /* Update handler with latest information */ node->io_read = io_read; @@ -90,7 +252,11 @@ void aio_set_fd_handler(AioContext *ctx, node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0); } + aio_epoll_update(ctx, node, is_new); aio_notify(ctx); + if (deleted) { + g_free(node); + } } void aio_set_event_notifier(AioContext *ctx, @@ -262,6 +428,7 @@ bool aio_poll(AioContext *ctx, bool blocking) /* fill pollfds */ QLIST_FOREACH(node, &ctx->aio_handlers, node) { if (!node->deleted && node->pfd.events + && !aio_epoll_enabled(ctx) && aio_node_check(ctx, node->is_external)) { add_pollfd(node); } @@ -273,7 +440,17 @@ bool aio_poll(AioContext *ctx, bool blocking) if (timeout) { aio_context_release(ctx); } - ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout); + if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) { + AioHandler epoll_handler; + + epoll_handler.pfd.fd = ctx->epollfd; + epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR; + npfd = 0; + add_pollfd(&epoll_handler); + ret = aio_epoll(ctx, pollfds, npfd, timeout); + } else { + ret = qemu_poll_ns(pollfds, npfd, timeout); + } if (blocking) { atomic_sub(&ctx->notify_me, 2); } @@ -302,3 +479,16 @@ bool aio_poll(AioContext *ctx, bool blocking) return progress; } + +void aio_context_setup(AioContext *ctx, Error **errp) +{ +#ifdef CONFIG_EPOLL + assert(!ctx->epollfd); + ctx->epollfd = epoll_create1(EPOLL_CLOEXEC); + if (ctx->epollfd == -1) { + ctx->epoll_available = false; + } else { + ctx->epoll_available = true; + } +#endif +} diff --git a/aio-win32.c b/aio-win32.c index 43c4c79a75..cdc445608b 100644 --- a/aio-win32.c +++ b/aio-win32.c @@ -369,3 +369,7 @@ bool aio_poll(AioContext *ctx, bool blocking) aio_context_release(ctx); return progress; } + +void aio_context_setup(AioContext *ctx, Error **errp) +{ +} @@ -59,6 +59,11 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) return bh; } +void aio_bh_call(QEMUBH *bh) +{ + bh->cb(bh->opaque); +} + /* Multiple occurrences of aio_bh_poll cannot be called concurrently */ int aio_bh_poll(AioContext *ctx) { @@ -84,7 +89,7 @@ int aio_bh_poll(AioContext *ctx) ret = 1; } bh->idle = 0; - bh->cb(bh->opaque); + aio_bh_call(bh); } } @@ -320,12 +325,18 @@ AioContext *aio_context_new(Error **errp) { int ret; AioContext *ctx; + Error *local_err = NULL; + ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); + aio_context_setup(ctx, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto fail; + } ret = event_notifier_init(&ctx->notifier, false); if (ret < 0) { - g_source_destroy(&ctx->source); error_setg_errno(errp, -ret, "Failed to initialize event notifier"); - return NULL; + goto fail; } g_source_set_can_recurse(&ctx->source, true); aio_set_event_notifier(ctx, &ctx->notifier, @@ -340,6 +351,9 @@ AioContext *aio_context_new(Error **errp) ctx->notify_dummy_bh = aio_bh_new(ctx, notify_dummy_bh, NULL); return ctx; +fail: + g_source_destroy(&ctx->source); + return NULL; } void aio_context_ref(AioContext *ctx) diff --git a/backends/hostmem.c b/backends/hostmem.c index 41ba2af52f..1b4eb45817 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -313,9 +313,11 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) assert(maxnode <= MAX_NODES); if (mbind(ptr, sz, backend->policy, maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { - error_setg_errno(errp, errno, - "cannot bind memory to host NUMA nodes"); - return; + if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { + error_setg_errno(errp, errno, + "cannot bind memory to host NUMA nodes"); + return; + } } #endif /* Preallocate memory after the NUMA policy has been instantiated. @@ -36,6 +36,17 @@ static QEMUBalloonEvent *balloon_event_fn; static QEMUBalloonStatus *balloon_stat_fn; static void *balloon_opaque; +static bool balloon_inhibited; + +bool qemu_balloon_is_inhibited(void) +{ + return balloon_inhibited; +} + +void qemu_balloon_inhibit(bool state) +{ + balloon_inhibited = state; +} static bool have_balloon(Error **errp) { @@ -73,8 +73,7 @@ struct BdrvDirtyBitmap { #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ -static QTAILQ_HEAD(, BlockDriverState) bdrv_states = - QTAILQ_HEAD_INITIALIZER(bdrv_states); +struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); @@ -1394,6 +1393,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, BlockDriverState *bs; BlockDriver *drv = NULL; const char *drvname; + const char *backing; Error *local_err = NULL; int snapshot_flags = 0; @@ -1461,6 +1461,12 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, assert(drvname || !(flags & BDRV_O_PROTOCOL)); + backing = qdict_get_try_str(options, "backing"); + if (backing && *backing == '\0') { + flags |= BDRV_O_NO_BACKING; + qdict_del(options, "backing"); + } + bs->open_flags = flags; bs->options = options; options = qdict_clone_shallow(options); @@ -1795,8 +1801,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, ret = bdrv_flush(reopen_state->bs); if (ret) { - error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", - strerror(-ret)); + error_setg_errno(errp, -ret, "Error flushing drive"); goto error; } @@ -1901,7 +1906,7 @@ void bdrv_close(BlockDriverState *bs) } /* Disable I/O limits and drain all pending throttled requests */ - if (bs->io_limits_enabled) { + if (bs->throttle_state) { bdrv_io_limits_disable(bs); } @@ -2683,12 +2688,12 @@ BlockDriverState *bdrv_lookup_bs(const char *device, blk = blk_by_name(device); if (blk) { - if (!blk_bs(blk)) { + bs = blk_bs(blk); + if (!bs) { error_setg(errp, "Device '%s' has no medium", device); - return NULL; } - return blk_bs(blk); + return bs; } } @@ -3399,10 +3404,25 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); } -void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap) +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + if (!out) { + hbitmap_reset_all(bitmap->bitmap); + } else { + HBitmap *backup = bitmap->bitmap; + bitmap->bitmap = hbitmap_alloc(bitmap->size, + hbitmap_granularity(backup)); + *out = backup; + } +} + +void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) { + HBitmap *tmp = bitmap->bitmap; assert(bdrv_dirty_bitmap_enabled(bitmap)); - hbitmap_reset_all(bitmap->bitmap); + bitmap->bitmap = in; + hbitmap_free(tmp); } void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, @@ -3706,7 +3726,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs) baf->detach_aio_context(baf->opaque); } - if (bs->io_limits_enabled) { + if (bs->throttle_state) { throttle_timers_detach_aio_context(&bs->throttle_timers); } if (bs->drv->bdrv_detach_aio_context) { @@ -3742,7 +3762,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs, if (bs->drv->bdrv_attach_aio_context) { bs->drv->bdrv_attach_aio_context(bs, new_context); } - if (bs->io_limits_enabled) { + if (bs->throttle_state) { throttle_timers_attach_aio_context(&bs->throttle_timers, new_context); } diff --git a/block/accounting.c b/block/accounting.c index a423560206..185025ec1e 100644 --- a/block/accounting.c +++ b/block/accounting.c @@ -2,6 +2,7 @@ * QEMU System Emulator block accounting * * Copyright (c) 2011 Christoph Hellwig + * Copyright (c) 2015 Igalia, S.L. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -25,6 +26,54 @@ #include "block/accounting.h" #include "block/block_int.h" #include "qemu/timer.h" +#include "sysemu/qtest.h" + +static QEMUClockType clock_type = QEMU_CLOCK_REALTIME; +static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000; + +void block_acct_init(BlockAcctStats *stats, bool account_invalid, + bool account_failed) +{ + stats->account_invalid = account_invalid; + stats->account_failed = account_failed; + + if (qtest_enabled()) { + clock_type = QEMU_CLOCK_VIRTUAL; + } +} + +void block_acct_cleanup(BlockAcctStats *stats) +{ + BlockAcctTimedStats *s, *next; + QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) { + g_free(s); + } +} + +void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length) +{ + BlockAcctTimedStats *s; + unsigned i; + + s = g_new0(BlockAcctTimedStats, 1); + s->interval_length = interval_length; + QSLIST_INSERT_HEAD(&stats->intervals, s, entries); + + for (i = 0; i < BLOCK_MAX_IOTYPE; i++) { + timed_average_init(&s->latency[i], clock_type, + (uint64_t) interval_length * NANOSECONDS_PER_SECOND); + } +} + +BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats, + BlockAcctTimedStats *s) +{ + if (s == NULL) { + return QSLIST_FIRST(&stats->intervals); + } else { + return QSLIST_NEXT(s, entries); + } +} void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, int64_t bytes, enum BlockAcctType type) @@ -32,20 +81,71 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, assert(type < BLOCK_MAX_IOTYPE); cookie->bytes = bytes; - cookie->start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + cookie->start_time_ns = qemu_clock_get_ns(clock_type); cookie->type = type; } void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie) { + BlockAcctTimedStats *s; + int64_t time_ns = qemu_clock_get_ns(clock_type); + int64_t latency_ns = time_ns - cookie->start_time_ns; + + if (qtest_enabled()) { + latency_ns = qtest_latency_ns; + } + assert(cookie->type < BLOCK_MAX_IOTYPE); stats->nr_bytes[cookie->type] += cookie->bytes; stats->nr_ops[cookie->type]++; - stats->total_time_ns[cookie->type] += - qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - cookie->start_time_ns; + stats->total_time_ns[cookie->type] += latency_ns; + stats->last_access_time_ns = time_ns; + + QSLIST_FOREACH(s, &stats->intervals, entries) { + timed_average_account(&s->latency[cookie->type], latency_ns); + } +} + +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie) +{ + assert(cookie->type < BLOCK_MAX_IOTYPE); + + stats->failed_ops[cookie->type]++; + + if (stats->account_failed) { + BlockAcctTimedStats *s; + int64_t time_ns = qemu_clock_get_ns(clock_type); + int64_t latency_ns = time_ns - cookie->start_time_ns; + + if (qtest_enabled()) { + latency_ns = qtest_latency_ns; + } + + stats->total_time_ns[cookie->type] += latency_ns; + stats->last_access_time_ns = time_ns; + + QSLIST_FOREACH(s, &stats->intervals, entries) { + timed_average_account(&s->latency[cookie->type], latency_ns); + } + } } +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type) +{ + assert(type < BLOCK_MAX_IOTYPE); + + /* block_acct_done() and block_acct_failed() update + * total_time_ns[], but this one does not. The reason is that + * invalid requests are accounted during their submission, + * therefore there's no actual I/O involved. */ + + stats->invalid_ops[type]++; + + if (stats->account_invalid) { + stats->last_access_time_ns = qemu_clock_get_ns(clock_type); + } +} void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, int num_requests) @@ -53,3 +153,20 @@ void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, assert(type < BLOCK_MAX_IOTYPE); stats->merged[type] += num_requests; } + +int64_t block_acct_idle_time_ns(BlockAcctStats *stats) +{ + return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns; +} + +double block_acct_queue_depth(BlockAcctTimedStats *stats, + enum BlockAcctType type) +{ + uint64_t sum, elapsed; + + assert(type < BLOCK_MAX_IOTYPE); + + sum = timed_average_sum(&stats->latency[type], &elapsed); + + return (double) sum / elapsed; +} diff --git a/block/backup.c b/block/backup.c index ec01db8eff..705bb77661 100644 --- a/block/backup.c +++ b/block/backup.c @@ -132,7 +132,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, qemu_iovec_init_external(&bounce_qiov, &iov, 1); if (is_write_notifier) { - ret = bdrv_co_no_copy_on_readv(bs, + ret = bdrv_co_readv_no_serialising(bs, start * BACKUP_SECTORS_PER_CLUSTER, n, &bounce_qiov); } else { @@ -221,11 +221,45 @@ static void backup_iostatus_reset(BlockJob *job) } } +static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) +{ + BdrvDirtyBitmap *bm; + BlockDriverState *bs = job->common.bs; + + if (ret < 0 || block_job_is_cancelled(&job->common)) { + /* Merge the successor back into the parent, delete nothing. */ + bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL); + assert(bm); + } else { + /* Everything is fine, delete this bitmap and install the backup. */ + bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL); + assert(bm); + } +} + +static void backup_commit(BlockJob *job) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common); + if (s->sync_bitmap) { + backup_cleanup_sync_bitmap(s, 0); + } +} + +static void backup_abort(BlockJob *job) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common); + if (s->sync_bitmap) { + backup_cleanup_sync_bitmap(s, -1); + } +} + static const BlockJobDriver backup_job_driver = { .instance_size = sizeof(BackupBlockJob), .job_type = BLOCK_JOB_TYPE_BACKUP, .set_speed = backup_set_speed, .iostatus_reset = backup_iostatus_reset, + .commit = backup_commit, + .abort = backup_abort, }; static BlockErrorAction backup_error_action(BackupBlockJob *job, @@ -441,19 +475,6 @@ static void coroutine_fn backup_run(void *opaque) /* wait until pending backup_do_cow() calls have completed */ qemu_co_rwlock_wrlock(&job->flush_rwlock); qemu_co_rwlock_unlock(&job->flush_rwlock); - - if (job->sync_bitmap) { - BdrvDirtyBitmap *bm; - if (ret < 0 || block_job_is_cancelled(&job->common)) { - /* Merge the successor back into the parent, delete nothing. */ - bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL); - assert(bm); - } else { - /* Everything is fine, delete this bitmap and install the backup. */ - bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL); - assert(bm); - } - } hbitmap_free(job->bitmap); if (target->blk) { @@ -472,7 +493,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockCompletionFunc *cb, void *opaque, - Error **errp) + BlockJobTxn *txn, Error **errp) { int64_t len; @@ -554,6 +575,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, sync_bitmap : NULL; job->common.len = len; job->common.co = qemu_coroutine_create(backup_run); + block_job_txn_add_job(txn, &job->common); qemu_coroutine_enter(job->common.co, job); return; diff --git a/block/blkdebug.c b/block/blkdebug.c index 6860a2ba2f..dee3a0edfc 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -30,6 +30,7 @@ #include "qapi/qmp/qdict.h" #include "qapi/qmp/qint.h" #include "qapi/qmp/qstring.h" +#include "sysemu/qtest.h" typedef struct BDRVBlkdebugState { int state; @@ -583,9 +584,13 @@ static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule) remove_rule(rule); QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next); - printf("blkdebug: Suspended request '%s'\n", r.tag); + if (!qtest_enabled()) { + printf("blkdebug: Suspended request '%s'\n", r.tag); + } qemu_coroutine_yield(); - printf("blkdebug: Resuming request '%s'\n", r.tag); + if (!qtest_enabled()) { + printf("blkdebug: Resuming request '%s'\n", r.tag); + } QLIST_REMOVE(&r, next); g_free(r.tag); diff --git a/block/block-backend.c b/block/block-backend.c index 19fdaaec1a..36ccc9e616 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -176,6 +176,7 @@ static void blk_delete(BlockBackend *blk) } g_free(blk->name); drive_info_del(blk->legacy_dinfo); + block_acct_cleanup(&blk->stats); g_free(blk); } @@ -189,6 +190,11 @@ static void drive_info_del(DriveInfo *dinfo) g_free(dinfo); } +int blk_get_refcnt(BlockBackend *blk) +{ + return blk ? blk->refcnt : 0; +} + /* * Increment @blk's reference count. * @blk must not be null. @@ -334,6 +340,18 @@ void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk) } /* + * Disassociates the currently associated BlockDriverState from @blk. + */ +void blk_remove_bs(BlockBackend *blk) +{ + blk_update_root_state(blk); + + blk->bs->blk = NULL; + bdrv_unref(blk->bs); + blk->bs = NULL; +} + +/* * Associates a new BlockDriverState with @blk. */ void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs) @@ -417,18 +435,15 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void blk_dev_change_media_cb(BlockBackend *blk, bool load) { if (blk->dev_ops && blk->dev_ops->change_media_cb) { - bool tray_was_closed = !blk_dev_is_tray_open(blk); + bool tray_was_open, tray_is_open; + tray_was_open = blk_dev_is_tray_open(blk); blk->dev_ops->change_media_cb(blk->dev_opaque, load); - if (tray_was_closed) { - /* tray open */ - qapi_event_send_device_tray_moved(blk_name(blk), - true, &error_abort); - } - if (load) { - /* tray close */ - qapi_event_send_device_tray_moved(blk_name(blk), - false, &error_abort); + tray_is_open = blk_dev_is_tray_open(blk); + + if (tray_was_open != tray_is_open) { + qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open, + &error_abort); } } } @@ -627,8 +642,9 @@ static void error_callback_bh(void *opaque) qemu_aio_unref(acb); } -static BlockAIOCB *abort_aio_request(BlockBackend *blk, BlockCompletionFunc *cb, - void *opaque, int ret) +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret) { struct BlockBackendAIOCB *acb; QEMUBH *bh; @@ -650,7 +666,7 @@ BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags, @@ -710,7 +726,7 @@ BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque); @@ -722,7 +738,7 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque); @@ -732,7 +748,7 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, BlockCompletionFunc *cb, void *opaque) { if (!blk_is_available(blk)) { - return abort_aio_request(blk, cb, opaque, -ENOMEDIUM); + return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM); } return bdrv_aio_flush(blk->bs, cb, opaque); @@ -744,7 +760,7 @@ BlockAIOCB *blk_aio_discard(BlockBackend *blk, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque); @@ -787,7 +803,7 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) { if (!blk_is_available(blk)) { - return abort_aio_request(blk, cb, opaque, -ENOMEDIUM); + return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM); } return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque); @@ -1227,6 +1243,33 @@ void blk_update_root_state(BlockBackend *blk) } } +/* + * Applies the information in the root state to the given BlockDriverState. This + * does not include the flags which have to be specified for bdrv_open(), use + * blk_get_open_flags_from_root_state() to inquire them. + */ +void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs) +{ + bs->detect_zeroes = blk->root_state.detect_zeroes; + if (blk->root_state.throttle_group) { + bdrv_io_limits_enable(bs, blk->root_state.throttle_group); + } +} + +/* + * Returns the flags to be used for bdrv_open() of a BlockDriverState which is + * supposed to inherit the root state. + */ +int blk_get_open_flags_from_root_state(BlockBackend *blk) +{ + int bs_flags; + + bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR; + bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR; + + return bs_flags; +} + BlockBackendRootState *blk_get_root_state(BlockBackend *blk) { return &blk->root_state; diff --git a/block/commit.c b/block/commit.c index fdebe87c16..a5d02aa560 100644 --- a/block/commit.c +++ b/block/commit.c @@ -236,14 +236,14 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, orig_overlay_flags = bdrv_get_flags(overlay_bs); /* convert base & overlay_bs to r/w, if necessary */ - if (!(orig_base_flags & BDRV_O_RDWR)) { - reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, - orig_base_flags | BDRV_O_RDWR); - } if (!(orig_overlay_flags & BDRV_O_RDWR)) { reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL, orig_overlay_flags | BDRV_O_RDWR); } + if (!(orig_base_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, + orig_base_flags | BDRV_O_RDWR); + } if (reopen_queue) { bdrv_reopen_multiple(reopen_queue, &local_err); if (local_err != NULL) { diff --git a/block/gluster.c b/block/gluster.c index 1eb3a8c398..0857c14645 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -429,28 +429,23 @@ static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { int ret; - GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); + GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; off_t size = nb_sectors * BDRV_SECTOR_SIZE; off_t offset = sector_num * BDRV_SECTOR_SIZE; - acb->size = size; - acb->ret = 0; - acb->coroutine = qemu_coroutine_self(); - acb->aio_context = bdrv_get_aio_context(bs); + acb.size = size; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); - ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb); + ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb); if (ret < 0) { - ret = -errno; - goto out; + return -errno; } qemu_coroutine_yield(); - ret = acb->ret; - -out: - g_slice_free(GlusterAIOCB, acb); - return ret; + return acb.ret; } static inline bool gluster_supports_zerofill(void) @@ -541,35 +536,30 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write) { int ret; - GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); + GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; size_t size = nb_sectors * BDRV_SECTOR_SIZE; off_t offset = sector_num * BDRV_SECTOR_SIZE; - acb->size = size; - acb->ret = 0; - acb->coroutine = qemu_coroutine_self(); - acb->aio_context = bdrv_get_aio_context(bs); + acb.size = size; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); if (write) { ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, - &gluster_finish_aiocb, acb); + gluster_finish_aiocb, &acb); } else { ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, - &gluster_finish_aiocb, acb); + gluster_finish_aiocb, &acb); } if (ret < 0) { - ret = -errno; - goto out; + return -errno; } qemu_coroutine_yield(); - ret = acb->ret; - -out: - g_slice_free(GlusterAIOCB, acb); - return ret; + return acb.ret; } static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset) @@ -600,26 +590,21 @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs, static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs) { int ret; - GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); + GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; - acb->size = 0; - acb->ret = 0; - acb->coroutine = qemu_coroutine_self(); - acb->aio_context = bdrv_get_aio_context(bs); + acb.size = 0; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); - ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); + ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb); if (ret < 0) { - ret = -errno; - goto out; + return -errno; } qemu_coroutine_yield(); - ret = acb->ret; - -out: - g_slice_free(GlusterAIOCB, acb); - return ret; + return acb.ret; } #ifdef CONFIG_GLUSTERFS_DISCARD @@ -627,28 +612,23 @@ static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { int ret; - GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); + GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; size_t size = nb_sectors * BDRV_SECTOR_SIZE; off_t offset = sector_num * BDRV_SECTOR_SIZE; - acb->size = 0; - acb->ret = 0; - acb->coroutine = qemu_coroutine_self(); - acb->aio_context = bdrv_get_aio_context(bs); + acb.size = 0; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); - ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb); + ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb); if (ret < 0) { - ret = -errno; - goto out; + return -errno; } qemu_coroutine_yield(); - ret = acb->ret; - -out: - g_slice_free(GlusterAIOCB, acb); - return ret; + return acb.ret; } #endif diff --git a/block/io.c b/block/io.c index 5ac6256ad3..e00fb5d690 100644 --- a/block/io.c +++ b/block/io.c @@ -216,6 +216,8 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs) /* Check if any requests are in-flight (including throttled requests) */ bool bdrv_requests_pending(BlockDriverState *bs) { + BdrvChild *child; + if (!QLIST_EMPTY(&bs->tracked_requests)) { return true; } @@ -225,17 +227,31 @@ bool bdrv_requests_pending(BlockDriverState *bs) if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { return true; } - if (bs->file && bdrv_requests_pending(bs->file->bs)) { - return true; - } - if (bs->backing && bdrv_requests_pending(bs->backing->bs)) { - return true; + + QLIST_FOREACH(child, &bs->children, next) { + if (bdrv_requests_pending(child->bs)) { + return true; + } } + return false; } +static void bdrv_drain_recurse(BlockDriverState *bs) +{ + BdrvChild *child; + + if (bs->drv && bs->drv->bdrv_drain) { + bs->drv->bdrv_drain(bs); + } + QLIST_FOREACH(child, &bs->children, next) { + bdrv_drain_recurse(child->bs); + } +} + /* - * Wait for pending requests to complete on a single BlockDriverState subtree + * Wait for pending requests to complete on a single BlockDriverState subtree, + * and suspend block driver's internal I/O until next request arrives. * * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState * AioContext. @@ -248,6 +264,7 @@ void bdrv_drain(BlockDriverState *bs) { bool busy = true; + bdrv_drain_recurse(bs); while (busy) { /* Keep iterating */ bdrv_flush_io_queue(bs); @@ -345,13 +362,14 @@ static void tracked_request_end(BdrvTrackedRequest *req) static void tracked_request_begin(BdrvTrackedRequest *req, BlockDriverState *bs, int64_t offset, - unsigned int bytes, bool is_write) + unsigned int bytes, + enum BdrvTrackedRequestType type) { *req = (BdrvTrackedRequest){ .bs = bs, .offset = offset, .bytes = bytes, - .is_write = is_write, + .type = type, .co = qemu_coroutine_self(), .serialising = false, .overlap_offset = offset, @@ -845,7 +863,9 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, mark_request_serialising(req, bdrv_get_cluster_size(bs)); } - wait_serialising_requests(req); + if (!(flags & BDRV_REQ_NO_SERIALISING)) { + wait_serialising_requests(req); + } if (flags & BDRV_REQ_COPY_ON_READ) { int pnum; @@ -934,7 +954,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, } /* Don't do copy-on-read if we read data before write operation */ - if (bs->copy_on_read && !(flags & BDRV_REQ_NO_COPY_ON_READ)) { + if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) { flags |= BDRV_REQ_COPY_ON_READ; } @@ -968,7 +988,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, bytes = ROUND_UP(bytes, align); } - tracked_request_begin(&req, bs, offset, bytes, false); + tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, use_local_qiov ? &local_qiov : qiov, flags); @@ -1003,13 +1023,13 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); } -int coroutine_fn bdrv_co_no_copy_on_readv(BlockDriverState *bs, +int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - trace_bdrv_co_no_copy_on_readv(bs, sector_num, nb_sectors); + trace_bdrv_co_readv_no_serialising(bs, sector_num, nb_sectors); return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, - BDRV_REQ_NO_COPY_ON_READ); + BDRV_REQ_NO_SERIALISING); } int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, @@ -1289,7 +1309,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, * Pad qiov with the read parts and be sure to have a tracked request not * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle. */ - tracked_request_begin(&req, bs, offset, bytes, true); + tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE); if (!qiov) { ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req); @@ -2314,18 +2334,20 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque) int coroutine_fn bdrv_co_flush(BlockDriverState *bs) { int ret; + BdrvTrackedRequest req; if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) || bdrv_is_sg(bs)) { return 0; } + tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH); /* Write back cached data to the OS even with cache=unsafe */ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS); if (bs->drv->bdrv_co_flush_to_os) { ret = bs->drv->bdrv_co_flush_to_os(bs); if (ret < 0) { - return ret; + goto out; } } @@ -2365,14 +2387,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) ret = 0; } if (ret < 0) { - return ret; + goto out; } /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH * in the case of cache=unsafe, so there are no useless flushes. */ flush_parent: - return bs->file ? bdrv_co_flush(bs->file->bs) : 0; + ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0; +out: + tracked_request_end(&req); + return ret; } int bdrv_flush(BlockDriverState *bs) @@ -2415,6 +2440,7 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque) int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { + BdrvTrackedRequest req; int max_discard, ret; if (!bs->drv) { @@ -2437,6 +2463,8 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, return 0; } + tracked_request_begin(&req, bs, sector_num, nb_sectors, + BDRV_TRACKED_DISCARD); bdrv_set_dirty(bs, sector_num, nb_sectors); max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS); @@ -2470,20 +2498,24 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors, bdrv_co_io_em_complete, &co); if (acb == NULL) { - return -EIO; + ret = -EIO; + goto out; } else { qemu_coroutine_yield(); ret = co.ret; } } if (ret && ret != -ENOTSUP) { - return ret; + goto out; } sector_num += num; nb_sectors -= num; } - return 0; + ret = 0; +out: + tracked_request_end(&req); + return ret; } int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) @@ -2512,26 +2544,109 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) return rwco.ret; } -/* needed for generic scsi interface */ +typedef struct { + CoroutineIOCompletion *co; + QEMUBH *bh; +} BdrvIoctlCompletionData; -int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) +static void bdrv_ioctl_bh_cb(void *opaque) +{ + BdrvIoctlCompletionData *data = opaque; + + bdrv_co_io_em_complete(data->co, -ENOTSUP); + qemu_bh_delete(data->bh); +} + +static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf) { BlockDriver *drv = bs->drv; + BdrvTrackedRequest tracked_req; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + BlockAIOCB *acb; - if (drv && drv->bdrv_ioctl) - return drv->bdrv_ioctl(bs, req, buf); - return -ENOTSUP; + tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL); + if (!drv || !drv->bdrv_aio_ioctl) { + co.ret = -ENOTSUP; + goto out; + } + + acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co); + if (!acb) { + BdrvIoctlCompletionData *data = g_new(BdrvIoctlCompletionData, 1); + data->bh = aio_bh_new(bdrv_get_aio_context(bs), + bdrv_ioctl_bh_cb, data); + data->co = &co; + qemu_bh_schedule(data->bh); + } + qemu_coroutine_yield(); +out: + tracked_request_end(&tracked_req); + return co.ret; +} + +typedef struct { + BlockDriverState *bs; + int req; + void *buf; + int ret; +} BdrvIoctlCoData; + +static void coroutine_fn bdrv_co_ioctl_entry(void *opaque) +{ + BdrvIoctlCoData *data = opaque; + data->ret = bdrv_co_do_ioctl(data->bs, data->req, data->buf); +} + +/* needed for generic scsi interface */ +int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) +{ + BdrvIoctlCoData data = { + .bs = bs, + .req = req, + .buf = buf, + .ret = -EINPROGRESS, + }; + + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + bdrv_co_ioctl_entry(&data); + } else { + Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry); + qemu_coroutine_enter(co, &data); + } + while (data.ret == -EINPROGRESS) { + aio_poll(bdrv_get_aio_context(bs), true); + } + return data.ret; +} + +static void coroutine_fn bdrv_co_aio_ioctl_entry(void *opaque) +{ + BlockAIOCBCoroutine *acb = opaque; + acb->req.error = bdrv_co_do_ioctl(acb->common.bs, + acb->req.req, acb->req.buf); + bdrv_co_complete(acb); } BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) { - BlockDriver *drv = bs->drv; + BlockAIOCBCoroutine *acb = qemu_aio_get(&bdrv_em_co_aiocb_info, + bs, cb, opaque); + Coroutine *co; + + acb->need_bh = true; + acb->req.error = -EINPROGRESS; + acb->req.req = req; + acb->req.buf = buf; + co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry); + qemu_coroutine_enter(co, acb); - if (drv && drv->bdrv_aio_ioctl) - return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); - return NULL; + bdrv_co_maybe_schedule_bh(acb); + return &acb->common; } void *qemu_blockalign(BlockDriverState *bs, size_t size) diff --git a/block/iscsi.c b/block/iscsi.c index 9a628b7c66..bd1f1bfcd1 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -84,6 +84,7 @@ typedef struct IscsiTask { IscsiLun *iscsilun; QEMUTimer retry_timer; bool force_next_flush; + int err_code; } IscsiTask; typedef struct IscsiAIOCB { @@ -96,6 +97,7 @@ typedef struct IscsiAIOCB { int status; int64_t sector_num; int nb_sectors; + int ret; #ifdef __linux__ sg_io_hdr_t *ioh; #endif @@ -169,19 +171,70 @@ static inline unsigned exp_random(double mean) return -mean * log((double)rand() / RAND_MAX); } -/* SCSI_STATUS_TASK_SET_FULL and SCSI_STATUS_TIMEOUT were introduced - * in libiscsi 1.10.0 as part of an enum. The LIBISCSI_API_VERSION - * macro was introduced in 1.11.0. So use the API_VERSION macro as - * a hint that the macros are defined and define them ourselves - * otherwise to keep the required libiscsi version at 1.9.0 */ -#if !defined(LIBISCSI_API_VERSION) -#define QEMU_SCSI_STATUS_TASK_SET_FULL 0x28 -#define QEMU_SCSI_STATUS_TIMEOUT 0x0f000002 -#else -#define QEMU_SCSI_STATUS_TASK_SET_FULL SCSI_STATUS_TASK_SET_FULL -#define QEMU_SCSI_STATUS_TIMEOUT SCSI_STATUS_TIMEOUT +/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in + * libiscsi 1.10.0, together with other constants we need. Use it as + * a hint that we have to define them ourselves if needed, to keep the + * minimum required libiscsi version at 1.9.0. We use an ASCQ macro for + * the test because SCSI_STATUS_* is an enum. + * + * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes + * an enum, check against the LIBISCSI_API_VERSION macro, which was + * introduced in 1.11.0. If it is present, there is no need to define + * anything. + */ +#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \ + !defined(LIBISCSI_API_VERSION) +#define SCSI_STATUS_TASK_SET_FULL 0x28 +#define SCSI_STATUS_TIMEOUT 0x0f000002 +#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST 0x2600 +#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR 0x1a00 #endif +static int iscsi_translate_sense(struct scsi_sense *sense) +{ + int ret; + + switch (sense->key) { + case SCSI_SENSE_NOT_READY: + return -EBUSY; + case SCSI_SENSE_DATA_PROTECTION: + return -EACCES; + case SCSI_SENSE_COMMAND_ABORTED: + return -ECANCELED; + case SCSI_SENSE_ILLEGAL_REQUEST: + /* Parse ASCQ */ + break; + default: + return -EIO; + } + switch (sense->ascq) { + case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR: + case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE: + case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB: + case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST: + ret = -EINVAL; + break; + case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE: + ret = -ENOSPC; + break; + case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED: + ret = -ENOTSUP; + break; + case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT: + case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED: + case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN: + ret = -ENOMEDIUM; + break; + case SCSI_SENSE_ASCQ_WRITE_PROTECTED: + ret = -EACCES; + break; + default: + ret = -EIO; + break; + } + return ret; +} + static void iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, void *command_data, void *opaque) @@ -203,11 +256,11 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, goto out; } if (status == SCSI_STATUS_BUSY || - status == QEMU_SCSI_STATUS_TIMEOUT || - status == QEMU_SCSI_STATUS_TASK_SET_FULL) { + status == SCSI_STATUS_TIMEOUT || + status == SCSI_STATUS_TASK_SET_FULL) { unsigned retry_time = exp_random(iscsi_retry_times[iTask->retries - 1]); - if (status == QEMU_SCSI_STATUS_TIMEOUT) { + if (status == SCSI_STATUS_TIMEOUT) { /* make sure the request is rescheduled AFTER the * reconnect is initiated */ retry_time = EVENT_INTERVAL * 2; @@ -226,6 +279,7 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, return; } } + iTask->err_code = iscsi_translate_sense(&task->sense); error_report("iSCSI Failure: %s", iscsi_get_error(iscsi)); } else { iTask->iscsilun->force_next_flush |= iTask->force_next_flush; @@ -455,7 +509,7 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - return -EIO; + return iTask.err_code; } iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors); @@ -644,7 +698,7 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - return -EIO; + return iTask.err_code; } return 0; @@ -683,7 +737,7 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - return -EIO; + return iTask.err_code; } return 0; @@ -703,7 +757,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, if (status < 0) { error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s", iscsi_get_error(iscsi)); - acb->status = -EIO; + acb->status = iscsi_translate_sense(&acb->task->sense); } acb->ioh->driver_status = 0; @@ -726,6 +780,38 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, iscsi_schedule_bh(acb); } +static void iscsi_ioctl_bh_completion(void *opaque) +{ + IscsiAIOCB *acb = opaque; + + qemu_bh_delete(acb->bh); + acb->common.cb(acb->common.opaque, acb->ret); + qemu_aio_unref(acb); +} + +static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf) +{ + BlockDriverState *bs = acb->common.bs; + IscsiLun *iscsilun = bs->opaque; + int ret = 0; + + switch (req) { + case SG_GET_VERSION_NUM: + *(int *)buf = 30000; + break; + case SG_GET_SCSI_ID: + ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type; + break; + default: + ret = -EINVAL; + } + assert(!acb->bh); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), + iscsi_ioctl_bh_completion, acb); + acb->ret = ret; + qemu_bh_schedule(acb->bh); +} + static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) @@ -735,8 +821,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, struct iscsi_data data; IscsiAIOCB *acb; - assert(req == SG_IO); - acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); acb->iscsilun = iscsilun; @@ -745,6 +829,11 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, acb->buf = NULL; acb->ioh = buf; + if (req != SG_IO) { + iscsi_ioctl_handle_emulated(acb, req, buf); + return &acb->common; + } + acb->task = malloc(sizeof(struct scsi_task)); if (acb->task == NULL) { error_report("iSCSI: Failed to allocate task for scsi command. %s", @@ -809,38 +898,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, return &acb->common; } -static void ioctl_cb(void *opaque, int status) -{ - int *p_status = opaque; - *p_status = status; -} - -static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - IscsiLun *iscsilun = bs->opaque; - int status; - - switch (req) { - case SG_GET_VERSION_NUM: - *(int *)buf = 30000; - break; - case SG_GET_SCSI_ID: - ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type; - break; - case SG_IO: - status = -EINPROGRESS; - iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status); - - while (status == -EINPROGRESS) { - aio_poll(iscsilun->aio_context, true); - } - - return 0; - default: - return -1; - } - return 0; -} #endif static int64_t @@ -905,7 +962,7 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - return -EIO; + return iTask.err_code; } iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors); @@ -999,7 +1056,7 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - return -EIO; + return iTask.err_code; } if (flags & BDRV_REQ_MAY_UNMAP) { @@ -1771,7 +1828,6 @@ static BlockDriver bdrv_iscsi = { .bdrv_co_flush_to_disk = iscsi_co_flush, #ifdef __linux__ - .bdrv_ioctl = iscsi_ioctl, .bdrv_aio_ioctl = iscsi_aio_ioctl, #endif diff --git a/block/mirror.c b/block/mirror.c index b1252a1b29..0e8f5565a5 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -384,9 +384,11 @@ static void mirror_exit(BlockJob *job, void *opaque) aio_context_release(replace_aio_context); } g_free(s->replaces); + bdrv_op_unblock_all(s->target, s->common.blocker); bdrv_unref(s->target); block_job_completed(&s->common, data->ret); g_free(data); + bdrv_drained_end(src); bdrv_unref(src); } @@ -606,6 +608,9 @@ immediate_exit: data = g_malloc(sizeof(*data)); data->ret = ret; + /* Before we switch to target in mirror_exit, make sure data doesn't + * change. */ + bdrv_drained_begin(s->common.bs); block_job_defer_to_main_loop(&s->common, mirror_exit, data); } @@ -741,9 +746,12 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); if (!s->dirty_bitmap) { g_free(s->replaces); - block_job_release(bs); + block_job_unref(&s->common); return; } + + bdrv_op_block_all(s->target, s->common.blocker); + bdrv_set_enable_write_cache(s->target, true); if (s->target->blk) { blk_set_on_error(s->target->blk, on_target_error, on_target_error); diff --git a/block/nbd.c b/block/nbd.c index c2a87e99bb..cd6a587776 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -206,24 +206,24 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export, saddr = g_new0(SocketAddress, 1); if (qdict_haskey(options, "path")) { - saddr->kind = SOCKET_ADDRESS_KIND_UNIX; - saddr->q_unix = g_new0(UnixSocketAddress, 1); - saddr->q_unix->path = g_strdup(qdict_get_str(options, "path")); + saddr->type = SOCKET_ADDRESS_KIND_UNIX; + saddr->u.q_unix = g_new0(UnixSocketAddress, 1); + saddr->u.q_unix->path = g_strdup(qdict_get_str(options, "path")); qdict_del(options, "path"); } else { - saddr->kind = SOCKET_ADDRESS_KIND_INET; - saddr->inet = g_new0(InetSocketAddress, 1); - saddr->inet->host = g_strdup(qdict_get_str(options, "host")); + saddr->type = SOCKET_ADDRESS_KIND_INET; + saddr->u.inet = g_new0(InetSocketAddress, 1); + saddr->u.inet->host = g_strdup(qdict_get_str(options, "host")); if (!qdict_get_try_str(options, "port")) { - saddr->inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT); + saddr->u.inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT); } else { - saddr->inet->port = g_strdup(qdict_get_str(options, "port")); + saddr->u.inet->port = g_strdup(qdict_get_str(options, "port")); } qdict_del(options, "host"); qdict_del(options, "port"); } - s->client.is_unix = saddr->kind == SOCKET_ADDRESS_KIND_UNIX; + s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX; *export = g_strdup(qdict_get_try_str(options, "export")); if (*export) { diff --git a/block/parallels.c b/block/parallels.c index 4f79293826..f689fdeaff 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -220,7 +220,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier); s->data_end += s->tracks; bitmap_set(s->bat_dirty_bmap, - bat_entry_off(idx) / s->bat_dirty_block, 1); + bat_entry_off(idx + i) / s->bat_dirty_block, 1); } return bat2sect(s, idx) + sector_num % s->tracks; diff --git a/block/qapi.c b/block/qapi.c index ec0f5139e2..267f147fe3 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -64,7 +64,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) info->backing_file_depth = bdrv_get_backing_file_depth(bs); info->detect_zeroes = bs->detect_zeroes; - if (bs->io_limits_enabled) { + if (bs->throttle_state) { ThrottleConfig cfg; throttle_group_get_config(bs, &cfg); @@ -346,17 +346,68 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs, s->stats = g_malloc0(sizeof(*s->stats)); if (bs->blk) { BlockAcctStats *stats = blk_get_stats(bs->blk); + BlockAcctTimedStats *ts = NULL; s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ]; s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ]; s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE]; + + s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ]; + s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE]; + s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH]; + + s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ]; + s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE]; + s->stats->invalid_flush_operations = + stats->invalid_ops[BLOCK_ACCT_FLUSH]; + s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ]; s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE]; s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH]; s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE]; s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ]; s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH]; + + s->stats->has_idle_time_ns = stats->last_access_time_ns > 0; + if (s->stats->has_idle_time_ns) { + s->stats->idle_time_ns = block_acct_idle_time_ns(stats); + } + + s->stats->account_invalid = stats->account_invalid; + s->stats->account_failed = stats->account_failed; + + while ((ts = block_acct_interval_next(stats, ts))) { + BlockDeviceTimedStatsList *timed_stats = + g_malloc0(sizeof(*timed_stats)); + BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats)); + timed_stats->next = s->stats->timed_stats; + timed_stats->value = dev_stats; + s->stats->timed_stats = timed_stats; + + TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ]; + TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE]; + TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH]; + + dev_stats->interval_length = ts->interval_length; + + dev_stats->min_rd_latency_ns = timed_average_min(rd); + dev_stats->max_rd_latency_ns = timed_average_max(rd); + dev_stats->avg_rd_latency_ns = timed_average_avg(rd); + + dev_stats->min_wr_latency_ns = timed_average_min(wr); + dev_stats->max_wr_latency_ns = timed_average_max(wr); + dev_stats->avg_wr_latency_ns = timed_average_avg(wr); + + dev_stats->min_flush_latency_ns = timed_average_min(fl); + dev_stats->max_flush_latency_ns = timed_average_max(fl); + dev_stats->avg_flush_latency_ns = timed_average_avg(fl); + + dev_stats->avg_rd_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_READ); + dev_stats->avg_wr_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_WRITE); + } } s->stats->wr_highest_offset = bs->wr_highest_offset; @@ -385,7 +436,9 @@ BlockInfoList *qmp_query_block(Error **errp) bdrv_query_info(blk, &info->value, &local_err); if (local_err) { error_propagate(errp, local_err); - goto err; + g_free(info); + qapi_free_BlockInfoList(head); + return NULL; } *p_next = info; @@ -393,10 +446,6 @@ BlockInfoList *qmp_query_block(Error **errp) } return head; - - err: - qapi_free_BlockInfoList(head); - return NULL; } BlockStatsList *qmp_query_blockstats(bool has_query_nodes, diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 67be0ce2c9..24a60e2236 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -312,7 +312,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, if (!offset) return 0; - assert(qcow2_get_cluster_type(first_entry) != QCOW2_CLUSTER_COMPRESSED); + assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL); for (i = 0; i < nb_clusters; i++) { uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; @@ -324,14 +324,16 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, return i; } -static int count_contiguous_free_clusters(int nb_clusters, uint64_t *l2_table) +static int count_contiguous_clusters_by_type(int nb_clusters, + uint64_t *l2_table, + int wanted_type) { int i; for (i = 0; i < nb_clusters; i++) { int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i])); - if (type != QCOW2_CLUSTER_UNALLOCATED) { + if (type != wanted_type) { break; } } @@ -554,13 +556,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, ret = -EIO; goto fail; } - c = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], QCOW_OFLAG_ZERO); + c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], + QCOW2_CLUSTER_ZERO); *cluster_offset = 0; break; case QCOW2_CLUSTER_UNALLOCATED: /* how many empty clusters ? */ - c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]); + c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], + QCOW2_CLUSTER_UNALLOCATED); *cluster_offset = 0; break; case QCOW2_CLUSTER_NORMAL: diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 4b81c8db61..820f412ab6 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -560,13 +560,16 @@ static int alloc_refcount_block(BlockDriverState *bs, } /* Hook up the new refcount table in the qcow2 header */ - uint8_t data[12]; - cpu_to_be64w((uint64_t*)data, table_offset); - cpu_to_be32w((uint32_t*)(data + 8), table_clusters); + struct QEMU_PACKED { + uint64_t d64; + uint32_t d32; + } data; + cpu_to_be64w(&data.d64, table_offset); + cpu_to_be32w(&data.d32, table_clusters); BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE); ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, refcount_table_offset), - data, sizeof(data)); + &data, sizeof(data)); if (ret < 0) { goto fail_table; } @@ -1241,7 +1244,7 @@ fail: /* refcount checking functions */ -static size_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries) +static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries) { /* This assertion holds because there is no way we can address more than * 2^(64 - 9) clusters at once (with cluster size 512 = 2^9, and because diff --git a/block/qcow2.c b/block/qcow2.c index bacc4f2e11..88f56c8868 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2738,18 +2738,16 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); *spec_info = (ImageInfoSpecific){ - .kind = IMAGE_INFO_SPECIFIC_KIND_QCOW2, - { - .qcow2 = g_new(ImageInfoSpecificQCow2, 1), - }, + .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, + .u.qcow2 = g_new(ImageInfoSpecificQCow2, 1), }; if (s->qcow_version == 2) { - *spec_info->qcow2 = (ImageInfoSpecificQCow2){ + *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){ .compat = g_strdup("0.10"), .refcount_bits = s->refcount_bits, }; } else if (s->qcow_version == 3) { - *spec_info->qcow2 = (ImageInfoSpecificQCow2){ + *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){ .compat = g_strdup("1.1"), .lazy_refcounts = s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS, diff --git a/block/qed.c b/block/qed.c index 5ea05d4909..9b88895038 100644 --- a/block/qed.c +++ b/block/qed.c @@ -375,6 +375,18 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, } } +static void bdrv_qed_drain(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + /* Cancel timer and start doing I/O that were meant to happen as if it + * fired, that way we get bdrv_drain() taking care of the ongoing requests + * correctly. */ + qed_cancel_need_check_timer(s); + qed_plug_allocating_write_reqs(s); + bdrv_aio_flush(s->bs, qed_clear_need_check, s); +} + static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -1676,6 +1688,7 @@ static BlockDriver bdrv_qed = { .bdrv_check = bdrv_qed_check, .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, + .bdrv_drain = bdrv_qed_drain, }; static void bdrv_qed_init(void) diff --git a/block/raw-posix.c b/block/raw-posix.c index 918c756c2e..d9162fd306 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1976,8 +1976,8 @@ BlockDriver bdrv_file = { #if defined(__APPLE__) && defined(__MACH__) static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ); -static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize ); - +static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath, + CFIndex maxPathSize, int flags); kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ) { kern_return_t kernResult; @@ -2004,7 +2004,8 @@ kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ) return kernResult; } -kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize ) +kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath, + CFIndex maxPathSize, int flags) { io_object_t nextMedia; kern_return_t kernResult = KERN_FAILURE; @@ -2017,7 +2018,9 @@ kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex ma if ( bsdPathAsCFString ) { size_t devPathLength; strcpy( bsdPath, _PATH_DEV ); - strcat( bsdPath, "r" ); + if (flags & BDRV_O_NOCACHE) { + strcat(bsdPath, "r"); + } devPathLength = strlen( bsdPath ); if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) { kernResult = KERN_SUCCESS; @@ -2129,8 +2132,8 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, int fd; kernResult = FindEjectableCDMedia( &mediaIterator ); - kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) ); - + kernResult = GetBSDPath(mediaIterator, bsdPath, sizeof(bsdPath), + flags); if ( bsdPath[ 0 ] != '\0' ) { strcat(bsdPath,"s0"); /* some CDs don't have a partition 0 */ @@ -2175,12 +2178,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, } #if defined(__linux__) -static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - BDRVRawState *s = bs->opaque; - - return ioctl(s->fd, req, buf); -} static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs, unsigned long int req, void *buf, @@ -2338,7 +2335,6 @@ static BlockDriver bdrv_host_device = { /* generic scsi device */ #ifdef __linux__ - .bdrv_ioctl = hdev_ioctl, .bdrv_aio_ioctl = hdev_aio_ioctl, #endif }; @@ -2471,7 +2467,6 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_lock_medium = cdrom_lock_medium, /* generic scsi device */ - .bdrv_ioctl = hdev_ioctl, .bdrv_aio_ioctl = hdev_aio_ioctl, }; #endif /* __linux__ */ diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 0aded31c22..915d6fd0e6 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -169,11 +169,6 @@ static void raw_lock_medium(BlockDriverState *bs, bool locked) bdrv_lock_medium(bs->file->bs, locked); } -static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - return bdrv_ioctl(bs->file->bs, req, buf); -} - static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs, unsigned long int req, void *buf, BlockCompletionFunc *cb, @@ -262,7 +257,6 @@ BlockDriver bdrv_raw = { .bdrv_media_changed = &raw_media_changed, .bdrv_eject = &raw_eject, .bdrv_lock_medium = &raw_lock_medium, - .bdrv_ioctl = &raw_ioctl, .bdrv_aio_ioctl = &raw_aio_ioctl, .create_opts = &raw_create_opts, .bdrv_has_zero_init = &raw_has_zero_init diff --git a/block/snapshot.c b/block/snapshot.c index 89500f2f18..6e9fa8da98 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -253,9 +253,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs, return -ENOTSUP; } -void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, - const char *id_or_name, - Error **errp) +int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, + const char *id_or_name, + Error **errp) { int ret; Error *local_err = NULL; @@ -270,6 +270,7 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, if (ret < 0) { error_propagate(errp, local_err); } + return ret; } int bdrv_snapshot_list(BlockDriverState *bs, @@ -356,3 +357,130 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, return ret; } + + +/* Group operations. All block drivers are involved. + * These functions will properly handle dataplane (take aio_context_acquire + * when appropriate for appropriate block drivers) */ + +bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs) +{ + bool ok = true; + BlockDriverState *bs = NULL; + + while (ok && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) { + ok = bdrv_can_snapshot(bs); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return ok; +} + +int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs, + Error **err) +{ + int ret = 0; + BlockDriverState *bs = NULL; + QEMUSnapshotInfo sn1, *snapshot = &sn1; + + while (ret == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs) && + bdrv_snapshot_find(bs, snapshot, name) >= 0) { + ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return ret; +} + + +int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs) +{ + int err = 0; + BlockDriverState *bs = NULL; + + while (err == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs)) { + err = bdrv_snapshot_goto(bs, name); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return err; +} + +int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) +{ + QEMUSnapshotInfo sn; + int err = 0; + BlockDriverState *bs = NULL; + + while (err == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs)) { + err = bdrv_snapshot_find(bs, &sn, name); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return err; +} + +int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + BlockDriverState *vm_state_bs, + uint64_t vm_state_size, + BlockDriverState **first_bad_bs) +{ + int err = 0; + BlockDriverState *bs = NULL; + + while (err == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bs == vm_state_bs) { + sn->vm_state_size = vm_state_size; + err = bdrv_snapshot_create(bs, sn); + } else if (bdrv_can_snapshot(bs)) { + sn->vm_state_size = 0; + err = bdrv_snapshot_create(bs, sn); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return err; +} + +BlockDriverState *bdrv_all_find_vmstate_bs(void) +{ + bool not_found = true; + BlockDriverState *bs = NULL; + + while (not_found && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + not_found = !bdrv_can_snapshot(bs); + aio_context_release(ctx); + } + return bs; +} diff --git a/block/throttle-groups.c b/block/throttle-groups.c index 3419af7d96..13b5baa5d7 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -437,6 +437,9 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname) * list, destroying the timers and setting the throttle_state pointer * to NULL. * + * The BlockDriverState must not have pending throttled requests, so + * the caller has to drain them first. + * * The group will be destroyed if it's empty after this operation. * * @bs: the BlockDriverState to remove @@ -446,6 +449,10 @@ void throttle_group_unregister_bs(BlockDriverState *bs) ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); int i; + assert(bs->pending_reqs[0] == 0 && bs->pending_reqs[1] == 0); + assert(qemu_co_queue_empty(&bs->throttled_reqs[0])); + assert(qemu_co_queue_empty(&bs->throttled_reqs[1])); + qemu_mutex_lock(&tg->lock); for (i = 0; i < 2; i++) { if (tg->tokens[i] == bs) { diff --git a/block/vmdk.c b/block/vmdk.c index 0effb7d91c..6f819e413f 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2161,19 +2161,19 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs) ImageInfoList **next; *spec_info = (ImageInfoSpecific){ - .kind = IMAGE_INFO_SPECIFIC_KIND_VMDK, + .type = IMAGE_INFO_SPECIFIC_KIND_VMDK, { .vmdk = g_new0(ImageInfoSpecificVmdk, 1), }, }; - *spec_info->vmdk = (ImageInfoSpecificVmdk) { + *spec_info->u.vmdk = (ImageInfoSpecificVmdk) { .create_type = g_strdup(s->create_type), .cid = s->cid, .parent_cid = s->parent_cid, }; - next = &spec_info->vmdk->extents; + next = &spec_info->u.vmdk->extents; for (i = 0; i < s->num_extents; i++) { *next = g_new0(ImageInfoList, 1); (*next)->value = vmdk_get_extent_info(&s->extents[i]); diff --git a/blockdev.c b/blockdev.c index 18712d25cc..313841b0b4 100644 --- a/blockdev.c +++ b/blockdev.c @@ -283,32 +283,6 @@ typedef struct { BlockDriverState *bs; } BDRVPutRefBH; -static void bdrv_put_ref_bh(void *opaque) -{ - BDRVPutRefBH *s = opaque; - - bdrv_unref(s->bs); - qemu_bh_delete(s->bh); - g_free(s); -} - -/* - * Release a BDS reference in a BH - * - * It is not safe to use bdrv_unref() from a callback function when the callers - * still need the BlockDriverState. In such cases we schedule a BH to release - * the reference. - */ -static void bdrv_put_ref_bh_schedule(BlockDriverState *bs) -{ - BDRVPutRefBH *s; - - s = g_new(BDRVPutRefBH, 1); - s->bh = qemu_bh_new(bdrv_put_ref_bh, s); - s->bs = bs; - qemu_bh_schedule(s->bh); -} - static int parse_block_error_action(const char *buf, bool is_read, Error **errp) { if (!strcmp(buf, "ignore")) { @@ -326,6 +300,45 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp) } } +static bool parse_stats_intervals(BlockAcctStats *stats, QList *intervals, + Error **errp) +{ + const QListEntry *entry; + for (entry = qlist_first(intervals); entry; entry = qlist_next(entry)) { + switch (qobject_type(entry->value)) { + + case QTYPE_QSTRING: { + unsigned long long length; + const char *str = qstring_get_str(qobject_to_qstring(entry->value)); + if (parse_uint_full(str, &length, 10) == 0 && + length > 0 && length <= UINT_MAX) { + block_acct_add_interval(stats, (unsigned) length); + } else { + error_setg(errp, "Invalid interval length: %s", str); + return false; + } + break; + } + + case QTYPE_QINT: { + int64_t length = qint_get_int(qobject_to_qint(entry->value)); + if (length > 0 && length <= UINT_MAX) { + block_acct_add_interval(stats, (unsigned) length); + } else { + error_setg(errp, "Invalid interval length: %" PRId64, length); + return false; + } + break; + } + + default: + error_setg(errp, "The specification of stats-intervals is invalid"); + return false; + } + } + return true; +} + static bool check_throttle_config(ThrottleConfig *cfg, Error **errp) { if (throttle_conflicting(cfg)) { @@ -467,12 +480,15 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, const char *buf; int bdrv_flags = 0; int on_read_error, on_write_error; + bool account_invalid, account_failed; BlockBackend *blk; BlockDriverState *bs; ThrottleConfig cfg; int snapshot = 0; Error *error = NULL; QemuOpts *opts; + QDict *interval_dict = NULL; + QList *interval_list = NULL; const char *id; bool has_driver_specific_opts; BlockdevDetectZeroesOptions detect_zeroes = @@ -503,6 +519,18 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, /* extract parameters */ snapshot = qemu_opt_get_bool(opts, "snapshot", 0); + account_invalid = qemu_opt_get_bool(opts, "stats-account-invalid", true); + account_failed = qemu_opt_get_bool(opts, "stats-account-failed", true); + + qdict_extract_subqdict(bs_opts, &interval_dict, "stats-intervals."); + qdict_array_split(interval_dict, &interval_list); + + if (qdict_size(interval_dict) != 0) { + error_setg(errp, "Invalid option stats-intervals.%s", + qdict_first(interval_dict)->key); + goto early_err; + } + extract_common_blockdev_options(opts, &bdrv_flags, &throttling_group, &cfg, &detect_zeroes, &error); if (error) { @@ -599,16 +627,28 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, if (bdrv_key_required(bs)) { autostart = 0; } + + block_acct_init(blk_get_stats(blk), account_invalid, account_failed); + + if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) { + blk_unref(blk); + blk = NULL; + goto err_no_bs_opts; + } } blk_set_on_error(blk, on_read_error, on_write_error); err_no_bs_opts: qemu_opts_del(opts); + QDECREF(interval_dict); + QDECREF(interval_list); return blk; early_err: qemu_opts_del(opts); + QDECREF(interval_dict); + QDECREF(interval_list); err_no_opts: QDECREF(bs_opts); return NULL; @@ -1120,6 +1160,9 @@ void hmp_commit(Monitor *mon, const QDict *qdict) if (!strcmp(device, "all")) { ret = bdrv_commit_all(); } else { + BlockDriverState *bs; + AioContext *aio_context; + blk = blk_by_name(device); if (!blk) { monitor_printf(mon, "Device '%s' not found\n", device); @@ -1129,7 +1172,14 @@ void hmp_commit(Monitor *mon, const QDict *qdict) monitor_printf(mon, "Device '%s' has no medium\n", device); return; } - ret = bdrv_commit(blk_bs(blk)); + + bs = blk_bs(blk); + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + ret = bdrv_commit(bs); + + aio_context_release(aio_context); } if (ret < 0) { monitor_printf(mon, "'commit' error for '%s': %s\n", device, @@ -1137,16 +1187,17 @@ void hmp_commit(Monitor *mon, const QDict *qdict) } } -static void blockdev_do_action(int kind, void *data, Error **errp) +static void blockdev_do_action(TransactionActionKind type, void *data, + Error **errp) { TransactionAction action; TransactionActionList list; - action.kind = kind; - action.data = data; + action.type = type; + action.u.data = data; list.value = &action; list.next = NULL; - qmp_transaction(&list, errp); + qmp_transaction(&list, false, NULL, errp); } void qmp_blockdev_snapshot_sync(bool has_device, const char *device, @@ -1157,7 +1208,7 @@ void qmp_blockdev_snapshot_sync(bool has_device, const char *device, bool has_format, const char *format, bool has_mode, NewImageMode mode, Error **errp) { - BlockdevSnapshot snapshot = { + BlockdevSnapshotSync snapshot = { .has_device = has_device, .device = (char *) device, .has_node_name = has_node_name, @@ -1174,6 +1225,18 @@ void qmp_blockdev_snapshot_sync(bool has_device, const char *device, &snapshot, errp); } +void qmp_blockdev_snapshot(const char *node, const char *overlay, + Error **errp) +{ + BlockdevSnapshot snapshot_data = { + .node = (char *) node, + .overlay = (char *) overlay + }; + + blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT, + &snapshot_data, errp); +} + void qmp_blockdev_snapshot_internal_sync(const char *device, const char *name, Error **errp) @@ -1336,44 +1399,75 @@ static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, /* New and old BlockDriverState structs for atomic group operations */ -typedef struct BlkTransactionState BlkTransactionState; +typedef struct BlkActionState BlkActionState; -/* Only prepare() may fail. In a single transaction, only one of commit() or - abort() will be called, clean() will always be called if it present. */ -typedef struct BdrvActionOps { - /* Size of state struct, in bytes. */ +/** + * BlkActionOps: + * Table of operations that define an Action. + * + * @instance_size: Size of state struct, in bytes. + * @prepare: Prepare the work, must NOT be NULL. + * @commit: Commit the changes, can be NULL. + * @abort: Abort the changes on fail, can be NULL. + * @clean: Clean up resources after all transaction actions have called + * commit() or abort(). Can be NULL. + * + * Only prepare() may fail. In a single transaction, only one of commit() or + * abort() will be called. clean() will always be called if it is present. + */ +typedef struct BlkActionOps { size_t instance_size; - /* Prepare the work, must NOT be NULL. */ - void (*prepare)(BlkTransactionState *common, Error **errp); - /* Commit the changes, can be NULL. */ - void (*commit)(BlkTransactionState *common); - /* Abort the changes on fail, can be NULL. */ - void (*abort)(BlkTransactionState *common); - /* Clean up resource in the end, can be NULL. */ - void (*clean)(BlkTransactionState *common); -} BdrvActionOps; + void (*prepare)(BlkActionState *common, Error **errp); + void (*commit)(BlkActionState *common); + void (*abort)(BlkActionState *common); + void (*clean)(BlkActionState *common); +} BlkActionOps; -/* - * This structure must be arranged as first member in child type, assuming - * that compiler will also arrange it to the same address with parent instance. - * Later it will be used in free(). +/** + * BlkActionState: + * Describes one Action's state within a Transaction. + * + * @action: QAPI-defined enum identifying which Action to perform. + * @ops: Table of ActionOps this Action can perform. + * @block_job_txn: Transaction which this action belongs to. + * @entry: List membership for all Actions in this Transaction. + * + * This structure must be arranged as first member in a subclassed type, + * assuming that the compiler will also arrange it to the same offsets as the + * base class. */ -struct BlkTransactionState { +struct BlkActionState { TransactionAction *action; - const BdrvActionOps *ops; - QSIMPLEQ_ENTRY(BlkTransactionState) entry; + const BlkActionOps *ops; + BlockJobTxn *block_job_txn; + TransactionProperties *txn_props; + QSIMPLEQ_ENTRY(BlkActionState) entry; }; /* internal snapshot private data */ typedef struct InternalSnapshotState { - BlkTransactionState common; + BlkActionState common; BlockDriverState *bs; AioContext *aio_context; QEMUSnapshotInfo sn; bool created; } InternalSnapshotState; -static void internal_snapshot_prepare(BlkTransactionState *common, + +static int action_check_completion_mode(BlkActionState *s, Error **errp) +{ + if (s->txn_props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { + error_setg(errp, + "Action '%s' does not support Transaction property " + "completion-mode = %s", + TransactionActionKind_lookup[s->action->type], + ActionCompletionMode_lookup[s->txn_props->completion_mode]); + return -1; + } + return 0; +} + +static void internal_snapshot_prepare(BlkActionState *common, Error **errp) { Error *local_err = NULL; @@ -1388,9 +1482,9 @@ static void internal_snapshot_prepare(BlkTransactionState *common, InternalSnapshotState *state; int ret1; - g_assert(common->action->kind == + g_assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC); - internal = common->action->blockdev_snapshot_internal_sync; + internal = common->action->u.blockdev_snapshot_internal_sync; state = DO_UPCAST(InternalSnapshotState, common, common); /* 1. parse input */ @@ -1398,6 +1492,10 @@ static void internal_snapshot_prepare(BlkTransactionState *common, name = internal->name; /* 2. check for validation */ + if (action_check_completion_mode(common, errp) < 0) { + return; + } + blk = blk_by_name(device); if (!blk) { error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, @@ -1472,7 +1570,7 @@ static void internal_snapshot_prepare(BlkTransactionState *common, state->created = true; } -static void internal_snapshot_abort(BlkTransactionState *common) +static void internal_snapshot_abort(BlkActionState *common) { InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState, common, common); @@ -1495,7 +1593,7 @@ static void internal_snapshot_abort(BlkTransactionState *common) } } -static void internal_snapshot_clean(BlkTransactionState *common) +static void internal_snapshot_clean(BlkActionState *common) { InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState, common, common); @@ -1510,66 +1608,61 @@ static void internal_snapshot_clean(BlkTransactionState *common) /* external snapshot private data */ typedef struct ExternalSnapshotState { - BlkTransactionState common; + BlkActionState common; BlockDriverState *old_bs; BlockDriverState *new_bs; AioContext *aio_context; } ExternalSnapshotState; -static void external_snapshot_prepare(BlkTransactionState *common, +static void external_snapshot_prepare(BlkActionState *common, Error **errp) { - int flags, ret; - QDict *options; + int flags = 0, ret; + QDict *options = NULL; Error *local_err = NULL; - bool has_device = false; + /* Device and node name of the image to generate the snapshot from */ const char *device; - bool has_node_name = false; const char *node_name; - bool has_snapshot_node_name = false; - const char *snapshot_node_name; + /* Reference to the new image (for 'blockdev-snapshot') */ + const char *snapshot_ref; + /* File name of the new image (for 'blockdev-snapshot-sync') */ const char *new_image_file; - const char *format = "qcow2"; - enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; ExternalSnapshotState *state = DO_UPCAST(ExternalSnapshotState, common, common); TransactionAction *action = common->action; - /* get parameters */ - g_assert(action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC); - - has_device = action->blockdev_snapshot_sync->has_device; - device = action->blockdev_snapshot_sync->device; - has_node_name = action->blockdev_snapshot_sync->has_node_name; - node_name = action->blockdev_snapshot_sync->node_name; - has_snapshot_node_name = - action->blockdev_snapshot_sync->has_snapshot_node_name; - snapshot_node_name = action->blockdev_snapshot_sync->snapshot_node_name; - - new_image_file = action->blockdev_snapshot_sync->snapshot_file; - if (action->blockdev_snapshot_sync->has_format) { - format = action->blockdev_snapshot_sync->format; - } - if (action->blockdev_snapshot_sync->has_mode) { - mode = action->blockdev_snapshot_sync->mode; + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar + * purpose but a different set of parameters */ + switch (action->type) { + case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT: + { + BlockdevSnapshot *s = action->u.blockdev_snapshot; + device = s->node; + node_name = s->node; + new_image_file = NULL; + snapshot_ref = s->overlay; + } + break; + case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: + { + BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync; + device = s->has_device ? s->device : NULL; + node_name = s->has_node_name ? s->node_name : NULL; + new_image_file = s->snapshot_file; + snapshot_ref = NULL; + } + break; + default: + g_assert_not_reached(); } /* start processing */ - state->old_bs = bdrv_lookup_bs(has_device ? device : NULL, - has_node_name ? node_name : NULL, - &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - - if (has_node_name && !has_snapshot_node_name) { - error_setg(errp, "New snapshot node name missing"); + if (action_check_completion_mode(common, errp) < 0) { return; } - if (has_snapshot_node_name && bdrv_find_node(snapshot_node_name)) { - error_setg(errp, "New snapshot node name already existing"); + state->old_bs = bdrv_lookup_bs(device, node_name, errp); + if (!state->old_bs) { return; } @@ -1600,39 +1693,79 @@ static void external_snapshot_prepare(BlkTransactionState *common, return; } - flags = state->old_bs->open_flags; + if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) { + BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync; + const char *format = s->has_format ? s->format : "qcow2"; + enum NewImageMode mode; + const char *snapshot_node_name = + s->has_snapshot_node_name ? s->snapshot_node_name : NULL; - /* create new image w/backing file */ - if (mode != NEW_IMAGE_MODE_EXISTING) { - bdrv_img_create(new_image_file, format, - state->old_bs->filename, - state->old_bs->drv->format_name, - NULL, -1, flags, &local_err, false); - if (local_err) { - error_propagate(errp, local_err); + if (node_name && !snapshot_node_name) { + error_setg(errp, "New snapshot node name missing"); return; } - } - options = qdict_new(); - if (has_snapshot_node_name) { - qdict_put(options, "node-name", - qstring_from_str(snapshot_node_name)); + if (snapshot_node_name && + bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) { + error_setg(errp, "New snapshot node name already in use"); + return; + } + + flags = state->old_bs->open_flags; + + /* create new image w/backing file */ + mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS; + if (mode != NEW_IMAGE_MODE_EXISTING) { + bdrv_img_create(new_image_file, format, + state->old_bs->filename, + state->old_bs->drv->format_name, + NULL, -1, flags, &local_err, false); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + + options = qdict_new(); + if (s->has_snapshot_node_name) { + qdict_put(options, "node-name", + qstring_from_str(snapshot_node_name)); + } + qdict_put(options, "driver", qstring_from_str(format)); + + flags |= BDRV_O_NO_BACKING; } - qdict_put(options, "driver", qstring_from_str(format)); - /* TODO Inherit bs->options or only take explicit options with an - * extended QMP command? */ assert(state->new_bs == NULL); - ret = bdrv_open(&state->new_bs, new_image_file, NULL, options, - flags | BDRV_O_NO_BACKING, &local_err); + ret = bdrv_open(&state->new_bs, new_image_file, snapshot_ref, options, + flags, errp); /* We will manually add the backing_hd field to the bs later */ if (ret != 0) { - error_propagate(errp, local_err); + return; + } + + if (state->new_bs->blk != NULL) { + error_setg(errp, "The snapshot is already in use by %s", + blk_name(state->new_bs->blk)); + return; + } + + if (bdrv_op_is_blocked(state->new_bs, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, + errp)) { + return; + } + + if (state->new_bs->backing != NULL) { + error_setg(errp, "The snapshot already has a backing image"); + return; + } + + if (!state->new_bs->drv->supports_backing) { + error_setg(errp, "The snapshot does not support backing images"); } } -static void external_snapshot_commit(BlkTransactionState *common) +static void external_snapshot_commit(BlkActionState *common) { ExternalSnapshotState *state = DO_UPCAST(ExternalSnapshotState, common, common); @@ -1648,7 +1781,7 @@ static void external_snapshot_commit(BlkTransactionState *common) NULL); } -static void external_snapshot_abort(BlkTransactionState *common) +static void external_snapshot_abort(BlkActionState *common) { ExternalSnapshotState *state = DO_UPCAST(ExternalSnapshotState, common, common); @@ -1657,7 +1790,7 @@ static void external_snapshot_abort(BlkTransactionState *common) } } -static void external_snapshot_clean(BlkTransactionState *common) +static void external_snapshot_clean(BlkActionState *common) { ExternalSnapshotState *state = DO_UPCAST(ExternalSnapshotState, common, common); @@ -1668,21 +1801,33 @@ static void external_snapshot_clean(BlkTransactionState *common) } typedef struct DriveBackupState { - BlkTransactionState common; + BlkActionState common; BlockDriverState *bs; AioContext *aio_context; BlockJob *job; } DriveBackupState; -static void drive_backup_prepare(BlkTransactionState *common, Error **errp) +static void do_drive_backup(const char *device, const char *target, + bool has_format, const char *format, + enum MirrorSyncMode sync, + bool has_mode, enum NewImageMode mode, + bool has_speed, int64_t speed, + bool has_bitmap, const char *bitmap, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + BlockJobTxn *txn, Error **errp); + +static void drive_backup_prepare(BlkActionState *common, Error **errp) { DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); BlockBackend *blk; DriveBackup *backup; Error *local_err = NULL; - assert(common->action->kind == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); - backup = common->action->drive_backup; + assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); + backup = common->action->u.drive_backup; blk = blk_by_name(backup->device); if (!blk) { @@ -1702,15 +1847,15 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp) bdrv_drained_begin(blk_bs(blk)); state->bs = blk_bs(blk); - qmp_drive_backup(backup->device, backup->target, - backup->has_format, backup->format, - backup->sync, - backup->has_mode, backup->mode, - backup->has_speed, backup->speed, - backup->has_bitmap, backup->bitmap, - backup->has_on_source_error, backup->on_source_error, - backup->has_on_target_error, backup->on_target_error, - &local_err); + do_drive_backup(backup->device, backup->target, + backup->has_format, backup->format, + backup->sync, + backup->has_mode, backup->mode, + backup->has_speed, backup->speed, + backup->has_bitmap, backup->bitmap, + backup->has_on_source_error, backup->on_source_error, + backup->has_on_target_error, backup->on_target_error, + common->block_job_txn, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -1719,7 +1864,7 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp) state->job = state->bs->job; } -static void drive_backup_abort(BlkTransactionState *common) +static void drive_backup_abort(BlkActionState *common) { DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); BlockDriverState *bs = state->bs; @@ -1730,7 +1875,7 @@ static void drive_backup_abort(BlkTransactionState *common) } } -static void drive_backup_clean(BlkTransactionState *common) +static void drive_backup_clean(BlkActionState *common) { DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); @@ -1741,21 +1886,30 @@ static void drive_backup_clean(BlkTransactionState *common) } typedef struct BlockdevBackupState { - BlkTransactionState common; + BlkActionState common; BlockDriverState *bs; BlockJob *job; AioContext *aio_context; } BlockdevBackupState; -static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp) +static void do_blockdev_backup(const char *device, const char *target, + enum MirrorSyncMode sync, + bool has_speed, int64_t speed, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + BlockJobTxn *txn, Error **errp); + +static void blockdev_backup_prepare(BlkActionState *common, Error **errp) { BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); BlockdevBackup *backup; BlockBackend *blk, *target; Error *local_err = NULL; - assert(common->action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); - backup = common->action->blockdev_backup; + assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->u.blockdev_backup; blk = blk_by_name(backup->device); if (!blk) { @@ -1785,12 +1939,12 @@ static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp) state->bs = blk_bs(blk); bdrv_drained_begin(state->bs); - qmp_blockdev_backup(backup->device, backup->target, - backup->sync, - backup->has_speed, backup->speed, - backup->has_on_source_error, backup->on_source_error, - backup->has_on_target_error, backup->on_target_error, - &local_err); + do_blockdev_backup(backup->device, backup->target, + backup->sync, + backup->has_speed, backup->speed, + backup->has_on_source_error, backup->on_source_error, + backup->has_on_target_error, backup->on_target_error, + common->block_job_txn, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -1799,7 +1953,7 @@ static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp) state->job = state->bs->job; } -static void blockdev_backup_abort(BlkTransactionState *common) +static void blockdev_backup_abort(BlkActionState *common) { BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); BlockDriverState *bs = state->bs; @@ -1810,7 +1964,7 @@ static void blockdev_backup_abort(BlkTransactionState *common) } } -static void blockdev_backup_clean(BlkTransactionState *common) +static void blockdev_backup_clean(BlkActionState *common) { BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); @@ -1820,17 +1974,132 @@ static void blockdev_backup_clean(BlkTransactionState *common) } } -static void abort_prepare(BlkTransactionState *common, Error **errp) +typedef struct BlockDirtyBitmapState { + BlkActionState common; + BdrvDirtyBitmap *bitmap; + BlockDriverState *bs; + AioContext *aio_context; + HBitmap *backup; + bool prepared; +} BlockDirtyBitmapState; + +static void block_dirty_bitmap_add_prepare(BlkActionState *common, + Error **errp) +{ + Error *local_err = NULL; + BlockDirtyBitmapAdd *action; + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (action_check_completion_mode(common, errp) < 0) { + return; + } + + action = common->action->u.block_dirty_bitmap_add; + /* AIO context taken and released within qmp_block_dirty_bitmap_add */ + qmp_block_dirty_bitmap_add(action->node, action->name, + action->has_granularity, action->granularity, + &local_err); + + if (!local_err) { + state->prepared = true; + } else { + error_propagate(errp, local_err); + } +} + +static void block_dirty_bitmap_add_abort(BlkActionState *common) +{ + BlockDirtyBitmapAdd *action; + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + action = common->action->u.block_dirty_bitmap_add; + /* Should not be able to fail: IF the bitmap was added via .prepare(), + * then the node reference and bitmap name must have been valid. + */ + if (state->prepared) { + qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort); + } +} + +static void block_dirty_bitmap_clear_prepare(BlkActionState *common, + Error **errp) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + BlockDirtyBitmap *action; + + if (action_check_completion_mode(common, errp) < 0) { + return; + } + + action = common->action->u.block_dirty_bitmap_clear; + state->bitmap = block_dirty_bitmap_lookup(action->node, + action->name, + &state->bs, + &state->aio_context, + errp); + if (!state->bitmap) { + return; + } + + if (bdrv_dirty_bitmap_frozen(state->bitmap)) { + error_setg(errp, "Cannot modify a frozen bitmap"); + return; + } else if (!bdrv_dirty_bitmap_enabled(state->bitmap)) { + error_setg(errp, "Cannot clear a disabled bitmap"); + return; + } + + bdrv_clear_dirty_bitmap(state->bitmap, &state->backup); + /* AioContext is released in .clean() */ +} + +static void block_dirty_bitmap_clear_abort(BlkActionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + bdrv_undo_clear_dirty_bitmap(state->bitmap, state->backup); +} + +static void block_dirty_bitmap_clear_commit(BlkActionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + hbitmap_free(state->backup); +} + +static void block_dirty_bitmap_clear_clean(BlkActionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (state->aio_context) { + aio_context_release(state->aio_context); + } +} + +static void abort_prepare(BlkActionState *common, Error **errp) { error_setg(errp, "Transaction aborted using Abort action"); } -static void abort_commit(BlkTransactionState *common) +static void abort_commit(BlkActionState *common) { g_assert_not_reached(); /* this action never succeeds */ } -static const BdrvActionOps actions[] = { +static const BlkActionOps actions[] = { + [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = { + .instance_size = sizeof(ExternalSnapshotState), + .prepare = external_snapshot_prepare, + .commit = external_snapshot_commit, + .abort = external_snapshot_abort, + .clean = external_snapshot_clean, + }, [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { .instance_size = sizeof(ExternalSnapshotState), .prepare = external_snapshot_prepare, @@ -1851,7 +2120,7 @@ static const BdrvActionOps actions[] = { .clean = blockdev_backup_clean, }, [TRANSACTION_ACTION_KIND_ABORT] = { - .instance_size = sizeof(BlkTransactionState), + .instance_size = sizeof(BlkActionState), .prepare = abort_prepare, .commit = abort_commit, }, @@ -1861,40 +2130,85 @@ static const BdrvActionOps actions[] = { .abort = internal_snapshot_abort, .clean = internal_snapshot_clean, }, + [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = { + .instance_size = sizeof(BlockDirtyBitmapState), + .prepare = block_dirty_bitmap_add_prepare, + .abort = block_dirty_bitmap_add_abort, + }, + [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = { + .instance_size = sizeof(BlockDirtyBitmapState), + .prepare = block_dirty_bitmap_clear_prepare, + .commit = block_dirty_bitmap_clear_commit, + .abort = block_dirty_bitmap_clear_abort, + .clean = block_dirty_bitmap_clear_clean, + } }; +/** + * Allocate a TransactionProperties structure if necessary, and fill + * that structure with desired defaults if they are unset. + */ +static TransactionProperties *get_transaction_properties( + TransactionProperties *props) +{ + if (!props) { + props = g_new0(TransactionProperties, 1); + } + + if (!props->has_completion_mode) { + props->has_completion_mode = true; + props->completion_mode = ACTION_COMPLETION_MODE_INDIVIDUAL; + } + + return props; +} + /* * 'Atomic' group operations. The operations are performed as a set, and if * any fail then we roll back all operations in the group. */ -void qmp_transaction(TransactionActionList *dev_list, Error **errp) +void qmp_transaction(TransactionActionList *dev_list, + bool has_props, + struct TransactionProperties *props, + Error **errp) { TransactionActionList *dev_entry = dev_list; - BlkTransactionState *state, *next; + BlockJobTxn *block_job_txn = NULL; + BlkActionState *state, *next; Error *local_err = NULL; - QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionState) snap_bdrv_states; + QSIMPLEQ_HEAD(snap_bdrv_states, BlkActionState) snap_bdrv_states; QSIMPLEQ_INIT(&snap_bdrv_states); + /* Does this transaction get canceled as a group on failure? + * If not, we don't really need to make a BlockJobTxn. + */ + props = get_transaction_properties(props); + if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { + block_job_txn = block_job_txn_new(); + } + /* drain all i/o before any operations */ bdrv_drain_all(); /* We don't do anything in this loop that commits us to the operations */ while (NULL != dev_entry) { TransactionAction *dev_info = NULL; - const BdrvActionOps *ops; + const BlkActionOps *ops; dev_info = dev_entry->value; dev_entry = dev_entry->next; - assert(dev_info->kind < ARRAY_SIZE(actions)); + assert(dev_info->type < ARRAY_SIZE(actions)); - ops = &actions[dev_info->kind]; + ops = &actions[dev_info->type]; assert(ops->instance_size > 0); state = g_malloc0(ops->instance_size); state->ops = ops; state->action = dev_info; + state->block_job_txn = block_job_txn; + state->txn_props = props; QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, state, entry); state->ops->prepare(state, &local_err); @@ -1927,47 +2241,86 @@ exit: } g_free(state); } + if (!has_props) { + qapi_free_TransactionProperties(props); + } + block_job_txn_unref(block_job_txn); } +void qmp_eject(const char *device, bool has_force, bool force, Error **errp) +{ + Error *local_err = NULL; + + qmp_blockdev_open_tray(device, has_force, force, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + qmp_blockdev_remove_medium(device, errp); +} -static void eject_device(BlockBackend *blk, int force, Error **errp) +void qmp_block_passwd(bool has_device, const char *device, + bool has_node_name, const char *node_name, + const char *password, Error **errp) { - BlockDriverState *bs = blk_bs(blk); + Error *local_err = NULL; + BlockDriverState *bs; AioContext *aio_context; - if (!bs) { - /* No medium inserted, so there is nothing to do */ + bs = bdrv_lookup_bs(has_device ? device : NULL, + has_node_name ? node_name : NULL, + &local_err); + if (local_err) { + error_propagate(errp, local_err); return; } aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) { - goto out; + bdrv_add_key(bs, password, errp); + + aio_context_release(aio_context); +} + +void qmp_blockdev_open_tray(const char *device, bool has_force, bool force, + Error **errp) +{ + BlockBackend *blk; + bool locked; + + if (!has_force) { + force = false; } + + blk = blk_by_name(device); + if (!blk) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", device); + return; + } + if (!blk_dev_has_removable_media(blk)) { - error_setg(errp, "Device '%s' is not removable", - bdrv_get_device_name(bs)); - goto out; + error_setg(errp, "Device '%s' is not removable", device); + return; } - if (blk_dev_is_medium_locked(blk) && !blk_dev_is_tray_open(blk)) { - blk_dev_eject_request(blk, force); - if (!force) { - error_setg(errp, "Device '%s' is locked", - bdrv_get_device_name(bs)); - goto out; - } + if (blk_dev_is_tray_open(blk)) { + return; } - bdrv_close(bs); + locked = blk_dev_is_medium_locked(blk); + if (locked) { + blk_dev_eject_request(blk, force); + } -out: - aio_context_release(aio_context); + if (!locked || force) { + blk_dev_change_media_cb(blk, false); + } } -void qmp_eject(const char *device, bool has_force, bool force, Error **errp) +void qmp_blockdev_close_tray(const char *device, Error **errp) { BlockBackend *blk; @@ -1978,103 +2331,214 @@ void qmp_eject(const char *device, bool has_force, bool force, Error **errp) return; } - eject_device(blk, force, errp); + if (!blk_dev_has_removable_media(blk)) { + error_setg(errp, "Device '%s' is not removable", device); + return; + } + + if (!blk_dev_is_tray_open(blk)) { + return; + } + + blk_dev_change_media_cb(blk, true); } -void qmp_block_passwd(bool has_device, const char *device, - bool has_node_name, const char *node_name, - const char *password, Error **errp) +void qmp_blockdev_remove_medium(const char *device, Error **errp) { - Error *local_err = NULL; + BlockBackend *blk; BlockDriverState *bs; AioContext *aio_context; + bool has_device; - bs = bdrv_lookup_bs(has_device ? device : NULL, - has_node_name ? node_name : NULL, - &local_err); - if (local_err) { - error_propagate(errp, local_err); + blk = blk_by_name(device); + if (!blk) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", device); + return; + } + + /* For BBs without a device, we can exchange the BDS tree at will */ + has_device = blk_get_attached_dev(blk); + + if (has_device && !blk_dev_has_removable_media(blk)) { + error_setg(errp, "Device '%s' is not removable", device); + return; + } + + if (has_device && !blk_dev_is_tray_open(blk)) { + error_setg(errp, "Tray of device '%s' is not open", device); + return; + } + + bs = blk_bs(blk); + if (!bs) { return; } aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_add_key(bs, password, errp); + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) { + goto out; + } + /* This follows the convention established by bdrv_make_anon() */ + if (bs->device_list.tqe_prev) { + QTAILQ_REMOVE(&bdrv_states, bs, device_list); + bs->device_list.tqe_prev = NULL; + } + + blk_remove_bs(blk); + +out: aio_context_release(aio_context); } -/* Assumes AioContext is held */ -static void qmp_bdrv_open_encrypted(BlockDriverState **pbs, - const char *filename, - int bdrv_flags, const char *format, - const char *password, Error **errp) +static void qmp_blockdev_insert_anon_medium(const char *device, + BlockDriverState *bs, Error **errp) { - Error *local_err = NULL; - QDict *options = NULL; - int ret; + BlockBackend *blk; + bool has_device; - if (format) { - options = qdict_new(); - qdict_put(options, "driver", qstring_from_str(format)); + blk = blk_by_name(device); + if (!blk) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", device); + return; } - ret = bdrv_open(pbs, filename, NULL, options, bdrv_flags, &local_err); - if (ret < 0) { - error_propagate(errp, local_err); + /* For BBs without a device, we can exchange the BDS tree at will */ + has_device = blk_get_attached_dev(blk); + + if (has_device && !blk_dev_has_removable_media(blk)) { + error_setg(errp, "Device '%s' is not removable", device); + return; + } + + if (has_device && !blk_dev_is_tray_open(blk)) { + error_setg(errp, "Tray of device '%s' is not open", device); + return; + } + + if (blk_bs(blk)) { + error_setg(errp, "There already is a medium in device '%s'", device); return; } - bdrv_add_key(*pbs, password, errp); + blk_insert_bs(blk, bs); + + QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); } -void qmp_change_blockdev(const char *device, const char *filename, - const char *format, Error **errp) +void qmp_blockdev_insert_medium(const char *device, const char *node_name, + Error **errp) { - BlockBackend *blk; BlockDriverState *bs; - AioContext *aio_context; - int bdrv_flags; - bool new_bs; + + bs = bdrv_find_node(node_name); + if (!bs) { + error_setg(errp, "Node '%s' not found", node_name); + return; + } + + if (bs->blk) { + error_setg(errp, "Node '%s' is already in use by '%s'", node_name, + blk_name(bs->blk)); + return; + } + + qmp_blockdev_insert_anon_medium(device, bs, errp); +} + +void qmp_blockdev_change_medium(const char *device, const char *filename, + bool has_format, const char *format, + bool has_read_only, + BlockdevChangeReadOnlyMode read_only, + Error **errp) +{ + BlockBackend *blk; + BlockDriverState *medium_bs = NULL; + int bdrv_flags, ret; + QDict *options = NULL; Error *err = NULL; blk = blk_by_name(device); if (!blk) { error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, "Device '%s' not found", device); - return; + goto fail; } - bs = blk_bs(blk); - new_bs = !bs; - aio_context = blk_get_aio_context(blk); - aio_context_acquire(aio_context); + if (blk_bs(blk)) { + blk_update_root_state(blk); + } + + bdrv_flags = blk_get_open_flags_from_root_state(blk); - eject_device(blk, 0, &err); + if (!has_read_only) { + read_only = BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN; + } + + switch (read_only) { + case BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN: + break; + + case BLOCKDEV_CHANGE_READ_ONLY_MODE_READ_ONLY: + bdrv_flags &= ~BDRV_O_RDWR; + break; + + case BLOCKDEV_CHANGE_READ_ONLY_MODE_READ_WRITE: + bdrv_flags |= BDRV_O_RDWR; + break; + + default: + abort(); + } + + if (has_format) { + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str(format)); + } + + assert(!medium_bs); + ret = bdrv_open(&medium_bs, filename, NULL, options, bdrv_flags, errp); + if (ret < 0) { + goto fail; + } + + blk_apply_root_state(blk, medium_bs); + + bdrv_add_key(medium_bs, NULL, &err); if (err) { error_propagate(errp, err); - goto out; + goto fail; } - bdrv_flags = blk_is_read_only(blk) ? 0 : BDRV_O_RDWR; - bdrv_flags |= blk_get_root_state(blk)->open_flags & ~BDRV_O_RDWR; + qmp_blockdev_open_tray(device, false, false, &err); + if (err) { + error_propagate(errp, err); + goto fail; + } - qmp_bdrv_open_encrypted(&bs, filename, bdrv_flags, format, NULL, &err); + qmp_blockdev_remove_medium(device, &err); if (err) { error_propagate(errp, err); - goto out; + goto fail; } - if (new_bs) { - blk_insert_bs(blk, bs); - /* Has been sent automatically by bdrv_open() if blk_bs(blk) was not - * NULL */ - blk_dev_change_media_cb(blk, true); + qmp_blockdev_insert_anon_medium(device, medium_bs, &err); + if (err) { + error_propagate(errp, err); + goto fail; } -out: - aio_context_release(aio_context); + qmp_blockdev_close_tray(device, errp); + +fail: + /* If the medium has been inserted, the device has its own reference, so + * ours must be relinquished; and if it has not been inserted successfully, + * the reference must be relinquished anyway */ + bdrv_unref(medium_bs); } /* throttling disk I/O limits */ @@ -2160,14 +2624,14 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, if (throttle_enabled(&cfg)) { /* Enable I/O limits if they're not enabled yet, otherwise * just update the throttling group. */ - if (!bs->io_limits_enabled) { + if (!bs->throttle_state) { bdrv_io_limits_enable(bs, has_group ? group : device); } else if (has_group) { bdrv_io_limits_update_group(bs, group); } /* Set the new throttling configuration */ bdrv_set_io_limits(bs, &cfg); - } else if (bs->io_limits_enabled) { + } else if (bs->throttle_state) { /* If all throttling settings are set to 0, disable I/O limits */ bdrv_io_limits_disable(bs); } @@ -2266,7 +2730,7 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name, goto out; } - bdrv_clear_dirty_bitmap(bitmap); + bdrv_clear_dirty_bitmap(bitmap, NULL); out: aio_context_release(aio_context); @@ -2409,8 +2873,6 @@ static void block_job_cb(void *opaque, int ret) } else { block_job_event_completed(bs->job, msg); } - - bdrv_put_ref_bh_schedule(bs); } void qmp_block_stream(const char *device, @@ -2591,15 +3053,17 @@ out: aio_context_release(aio_context); } -void qmp_drive_backup(const char *device, const char *target, - bool has_format, const char *format, - enum MirrorSyncMode sync, - bool has_mode, enum NewImageMode mode, - bool has_speed, int64_t speed, - bool has_bitmap, const char *bitmap, - bool has_on_source_error, BlockdevOnError on_source_error, - bool has_on_target_error, BlockdevOnError on_target_error, - Error **errp) +static void do_drive_backup(const char *device, const char *target, + bool has_format, const char *format, + enum MirrorSyncMode sync, + bool has_mode, enum NewImageMode mode, + bool has_speed, int64_t speed, + bool has_bitmap, const char *bitmap, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + BlockJobTxn *txn, Error **errp) { BlockBackend *blk; BlockDriverState *bs; @@ -2708,13 +3172,14 @@ void qmp_drive_backup(const char *device, const char *target, bmap = bdrv_find_dirty_bitmap(bs, bitmap); if (!bmap) { error_setg(errp, "Bitmap '%s' could not be found", bitmap); + bdrv_unref(target_bs); goto out; } } backup_start(bs, target_bs, speed, sync, bmap, on_source_error, on_target_error, - block_job_cb, bs, &local_err); + block_job_cb, bs, txn, &local_err); if (local_err != NULL) { bdrv_unref(target_bs); error_propagate(errp, local_err); @@ -2725,19 +3190,37 @@ out: aio_context_release(aio_context); } +void qmp_drive_backup(const char *device, const char *target, + bool has_format, const char *format, + enum MirrorSyncMode sync, + bool has_mode, enum NewImageMode mode, + bool has_speed, int64_t speed, + bool has_bitmap, const char *bitmap, + bool has_on_source_error, BlockdevOnError on_source_error, + bool has_on_target_error, BlockdevOnError on_target_error, + Error **errp) +{ + return do_drive_backup(device, target, has_format, format, sync, + has_mode, mode, has_speed, speed, + has_bitmap, bitmap, + has_on_source_error, on_source_error, + has_on_target_error, on_target_error, + NULL, errp); +} + BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) { return bdrv_named_nodes_list(errp); } -void qmp_blockdev_backup(const char *device, const char *target, +void do_blockdev_backup(const char *device, const char *target, enum MirrorSyncMode sync, bool has_speed, int64_t speed, bool has_on_source_error, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, - Error **errp) + BlockJobTxn *txn, Error **errp) { BlockBackend *blk, *target_blk; BlockDriverState *bs; @@ -2785,7 +3268,7 @@ void qmp_blockdev_backup(const char *device, const char *target, bdrv_ref(target_bs); bdrv_set_aio_context(target_bs, aio_context); backup_start(bs, target_bs, speed, sync, NULL, on_source_error, - on_target_error, block_job_cb, bs, &local_err); + on_target_error, block_job_cb, bs, txn, &local_err); if (local_err != NULL) { bdrv_unref(target_bs); error_propagate(errp, local_err); @@ -2794,6 +3277,21 @@ out: aio_context_release(aio_context); } +void qmp_blockdev_backup(const char *device, const char *target, + enum MirrorSyncMode sync, + bool has_speed, int64_t speed, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + Error **errp) +{ + do_blockdev_backup(device, target, sync, has_speed, speed, + has_on_source_error, on_source_error, + has_on_target_error, on_target_error, + NULL, errp); +} + void qmp_drive_mirror(const char *device, const char *target, bool has_format, const char *format, bool has_node_name, const char *node_name, @@ -3273,6 +3771,72 @@ fail: qmp_output_visitor_cleanup(ov); } +void qmp_x_blockdev_del(bool has_id, const char *id, + bool has_node_name, const char *node_name, Error **errp) +{ + AioContext *aio_context; + BlockBackend *blk; + BlockDriverState *bs; + + if (has_id && has_node_name) { + error_setg(errp, "Only one of id and node-name must be specified"); + return; + } else if (!has_id && !has_node_name) { + error_setg(errp, "No block device specified"); + return; + } + + if (has_id) { + blk = blk_by_name(id); + if (!blk) { + error_setg(errp, "Cannot find block backend %s", id); + return; + } + if (blk_get_refcnt(blk) > 1) { + error_setg(errp, "Block backend %s is in use", id); + return; + } + bs = blk_bs(blk); + aio_context = blk_get_aio_context(blk); + } else { + bs = bdrv_find_node(node_name); + if (!bs) { + error_setg(errp, "Cannot find node %s", node_name); + return; + } + blk = bs->blk; + if (blk) { + error_setg(errp, "Node %s is in use by %s", + node_name, blk_name(blk)); + return; + } + aio_context = bdrv_get_aio_context(bs); + } + + aio_context_acquire(aio_context); + + if (bs) { + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) { + goto out; + } + + if (bs->refcnt > 1 || !QLIST_EMPTY(&bs->parents)) { + error_setg(errp, "Block device %s is in use", + bdrv_get_device_or_node_name(bs)); + goto out; + } + } + + if (blk) { + blk_unref(blk); + } else { + bdrv_unref(bs); + } + +out: + aio_context_release(aio_context); +} + BlockJobInfoList *qmp_query_block_jobs(Error **errp) { BlockJobInfoList *head = NULL, **p_next = &head; @@ -3404,6 +3968,16 @@ QemuOptsList qemu_common_drive_opts = { .name = "detect-zeroes", .type = QEMU_OPT_STRING, .help = "try to optimize zero writes (off, on, unmap)", + },{ + .name = "stats-account-invalid", + .type = QEMU_OPT_BOOL, + .help = "whether to account for invalid I/O operations " + "in the statistics", + },{ + .name = "stats-account-failed", + .type = QEMU_OPT_BOOL, + .help = "whether to account for failed I/O operations " + "in the statistics", }, { /* end of list */ } }, diff --git a/blockjob.c b/blockjob.c index c02fe598b8..80adb9d52a 100644 --- a/blockjob.c +++ b/blockjob.c @@ -37,6 +37,19 @@ #include "qemu/timer.h" #include "qapi-event.h" +/* Transactional group of block jobs */ +struct BlockJobTxn { + + /* Is this txn being cancelled? */ + bool aborting; + + /* List of jobs */ + QLIST_HEAD(, BlockJob) jobs; + + /* Reference count */ + int refcnt; +}; + void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, int64_t speed, BlockCompletionFunc *cb, void *opaque, Error **errp) @@ -60,6 +73,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, job->cb = cb; job->opaque = opaque; job->busy = true; + job->refcnt = 1; bs->job = job; /* Only set speed when necessary to avoid NotSupported error */ @@ -68,7 +82,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, block_job_set_speed(job, speed, &local_err); if (local_err) { - block_job_release(bs); + block_job_unref(job); error_propagate(errp, local_err); return NULL; } @@ -76,15 +90,101 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, return job; } -void block_job_release(BlockDriverState *bs) +void block_job_ref(BlockJob *job) { - BlockJob *job = bs->job; + ++job->refcnt; +} - bs->job = NULL; - bdrv_op_unblock_all(bs, job->blocker); - error_free(job->blocker); - g_free(job->id); - g_free(job); +void block_job_unref(BlockJob *job) +{ + if (--job->refcnt == 0) { + job->bs->job = NULL; + bdrv_op_unblock_all(job->bs, job->blocker); + bdrv_unref(job->bs); + error_free(job->blocker); + g_free(job->id); + g_free(job); + } +} + +static void block_job_completed_single(BlockJob *job) +{ + if (!job->ret) { + if (job->driver->commit) { + job->driver->commit(job); + } + } else { + if (job->driver->abort) { + job->driver->abort(job); + } + } + job->cb(job->opaque, job->ret); + if (job->txn) { + block_job_txn_unref(job->txn); + } + block_job_unref(job); +} + +static void block_job_completed_txn_abort(BlockJob *job) +{ + AioContext *ctx; + BlockJobTxn *txn = job->txn; + BlockJob *other_job, *next; + + if (txn->aborting) { + /* + * We are cancelled by another job, which will handle everything. + */ + return; + } + txn->aborting = true; + /* We are the first failed job. Cancel other jobs. */ + QLIST_FOREACH(other_job, &txn->jobs, txn_list) { + ctx = bdrv_get_aio_context(other_job->bs); + aio_context_acquire(ctx); + } + QLIST_FOREACH(other_job, &txn->jobs, txn_list) { + if (other_job == job || other_job->completed) { + /* Other jobs are "effectively" cancelled by us, set the status for + * them; this job, however, may or may not be cancelled, depending + * on the caller, so leave it. */ + if (other_job != job) { + other_job->cancelled = true; + } + continue; + } + block_job_cancel_sync(other_job); + assert(other_job->completed); + } + QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { + ctx = bdrv_get_aio_context(other_job->bs); + block_job_completed_single(other_job); + aio_context_release(ctx); + } +} + +static void block_job_completed_txn_success(BlockJob *job) +{ + AioContext *ctx; + BlockJobTxn *txn = job->txn; + BlockJob *other_job, *next; + /* + * Successful completion, see if there are other running jobs in this + * txn. + */ + QLIST_FOREACH(other_job, &txn->jobs, txn_list) { + if (!other_job->completed) { + return; + } + } + /* We are the last completed job, commit the transaction. */ + QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { + ctx = bdrv_get_aio_context(other_job->bs); + aio_context_acquire(ctx); + assert(other_job->ret == 0); + block_job_completed_single(other_job); + aio_context_release(ctx); + } } void block_job_completed(BlockJob *job, int ret) @@ -92,8 +192,16 @@ void block_job_completed(BlockJob *job, int ret) BlockDriverState *bs = job->bs; assert(bs->job == job); - job->cb(job->opaque, ret); - block_job_release(bs); + assert(!job->completed); + job->completed = true; + job->ret = ret; + if (!job->txn) { + block_job_completed_single(job); + } else if (ret < 0 || block_job_is_cancelled(job)) { + block_job_completed_txn_abort(job); + } else { + block_job_completed_txn_success(job); + } } void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) @@ -178,43 +286,29 @@ struct BlockFinishData { int ret; }; -static void block_job_finish_cb(void *opaque, int ret) -{ - struct BlockFinishData *data = opaque; - - data->cancelled = block_job_is_cancelled(data->job); - data->ret = ret; - data->cb(data->opaque, ret); -} - static int block_job_finish_sync(BlockJob *job, void (*finish)(BlockJob *, Error **errp), Error **errp) { - struct BlockFinishData data; BlockDriverState *bs = job->bs; Error *local_err = NULL; + int ret; assert(bs->job == job); - /* Set up our own callback to store the result and chain to - * the original callback. - */ - data.job = job; - data.cb = job->cb; - data.opaque = job->opaque; - data.ret = -EINPROGRESS; - job->cb = block_job_finish_cb; - job->opaque = &data; + block_job_ref(job); finish(job, &local_err); if (local_err) { error_propagate(errp, local_err); + block_job_unref(job); return -EBUSY; } - while (data.ret == -EINPROGRESS) { + while (!job->completed) { aio_poll(bdrv_get_aio_context(bs), true); } - return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret; + ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; + block_job_unref(job); + return ret; } /* A wrapper around block_job_cancel() taking an Error ** parameter so it may be @@ -406,3 +500,36 @@ void block_job_defer_to_main_loop(BlockJob *job, qemu_bh_schedule(data->bh); } + +BlockJobTxn *block_job_txn_new(void) +{ + BlockJobTxn *txn = g_new0(BlockJobTxn, 1); + QLIST_INIT(&txn->jobs); + txn->refcnt = 1; + return txn; +} + +static void block_job_txn_ref(BlockJobTxn *txn) +{ + txn->refcnt++; +} + +void block_job_txn_unref(BlockJobTxn *txn) +{ + if (txn && --txn->refcnt == 0) { + g_free(txn); + } +} + +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) +{ + if (!txn) { + return; + } + + assert(!job->txn); + job->txn = txn; + + QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); + block_job_txn_ref(txn); +} diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c index 351aab12e7..59a7bdf0cc 100644 --- a/bsd-user/elfload.c +++ b/bsd-user/elfload.c @@ -740,8 +740,7 @@ static void padzero(abi_ulong elf_bss, abi_ulong last_bss) size must be known */ if (qemu_real_host_page_size < qemu_host_page_size) { abi_ulong end_addr, end_addr1; - end_addr1 = (elf_bss + qemu_real_host_page_size - 1) & - ~(qemu_real_host_page_size - 1); + end_addr1 = REAL_HOST_PAGE_ALIGN(elf_bss); end_addr = HOST_PAGE_ALIGN(elf_bss); if (end_addr1 < end_addr) { mmap((void *)g2h(end_addr1), end_addr - end_addr1, @@ -8,6 +8,9 @@ CLICOLOR_FORCE= GREP_OPTIONS= unset CLICOLOR_FORCE GREP_OPTIONS +# Don't allow CCACHE, if present, to use cached results of compile tests! +export CCACHE_RECACHE=yes + # Temporary directory used for files created while # configure runs. Since it is in the build directory # we can safely blow away any previous version of it @@ -261,6 +264,7 @@ rdma="" gprof="no" debug_tcg="no" debug="no" +fortify_source="" strip_opt="yes" tcg_interpreter="no" bigendian="no" @@ -723,6 +727,8 @@ if test "$mingw32" = "yes" ; then QEMU_CFLAGS="-DWIN32_LEAN_AND_MEAN -DWINVER=0x501 $QEMU_CFLAGS" # enable C99/POSIX format strings (needs mingw32-runtime 3.15 or later) QEMU_CFLAGS="-D__USE_MINGW_ANSI_STDIO=1 $QEMU_CFLAGS" + # MinGW needs -mthreads for TLS and macro _MT. + QEMU_CFLAGS="-mthreads $QEMU_CFLAGS" LIBS="-lwinmm -lws2_32 -liphlpapi $LIBS" write_c_skeleton; if compile_prog "" "-liberty" ; then @@ -787,6 +793,9 @@ for opt do --enable-modules) modules="yes" ;; + --disable-modules) + modules="no" + ;; --cpu=*) ;; --target-list=*) target_list="$optarg" @@ -876,6 +885,7 @@ for opt do debug_tcg="yes" debug="yes" strip_opt="no" + fortify_source="no" ;; --enable-sparse) sparse="yes" ;; @@ -1340,7 +1350,6 @@ disabled with --disable-FEATURE, default is enabled if available: vte vte support for the gtk UI curses curses UI vnc VNC UI support - vnc-tls TLS encryption for VNC server vnc-sasl SASL encryption for VNC server vnc-jpeg JPEG lossy compression for VNC server vnc-png PNG compression for VNC server @@ -1419,6 +1428,9 @@ if compile_object ; then else error_exit "\"$cc\" either does not exist or does not work" fi +if ! compile_prog ; then + error_exit "\"$cc\" cannot build an executable (is your linker broken?)" +fi # Check that the C++ compiler exists and works with the C compiler if has $cxx; then @@ -1479,6 +1491,16 @@ for flag in $gcc_flags; do done if test "$stack_protector" != "no"; then + cat > $TMPC << EOF +int main(int argc, char *argv[]) +{ + char arr[64], *p = arr, *c = argv[0]; + while (*c) { + *p++ = *c++; + } + return 0; +} +EOF gcc_flags="-fstack-protector-strong -fstack-protector-all" sp_on=0 for flag in $gcc_flags; do @@ -1881,16 +1903,34 @@ fi # libseccomp check if test "$seccomp" != "no" ; then - if test "$cpu" = "i386" || test "$cpu" = "x86_64" && - $pkg_config --atleast-version=2.1.1 libseccomp; then + case "$cpu" in + i386|x86_64) + libseccomp_minver="2.1.0" + ;; + arm|aarch64) + libseccomp_minver="2.2.3" + ;; + *) + libseccomp_minver="" + ;; + esac + + if test "$libseccomp_minver" != "" && + $pkg_config --atleast-version=$libseccomp_minver libseccomp ; then libs_softmmu="$libs_softmmu `$pkg_config --libs libseccomp`" QEMU_CFLAGS="$QEMU_CFLAGS `$pkg_config --cflags libseccomp`" - seccomp="yes" + seccomp="yes" else - if test "$seccomp" = "yes"; then - feature_not_found "libseccomp" "Install libseccomp devel >= 2.1.1" - fi - seccomp="no" + if test "$seccomp" = "yes" ; then + if test "$libseccomp_minver" != "" ; then + feature_not_found "libseccomp" \ + "Install libseccomp devel >= $libseccomp_minver" + else + feature_not_found "libseccomp" \ + "libseccomp is not supported for host cpu $cpu" + fi + fi + seccomp="no" fi fi ########################################## @@ -1921,6 +1961,23 @@ EOF elif cat > $TMPC <<EOF && #include <xenctrl.h> +#include <stdint.h> +int main(void) { + xc_interface *xc = NULL; + xen_domain_handle_t handle; + xc_domain_create(xc, 0, handle, 0, NULL, NULL); + return 0; +} +EOF + compile_prog "" "$xen_libs" + then + xen_ctrl_version=470 + xen=yes + + # Xen 4.6 + elif + cat > $TMPC <<EOF && +#include <xenctrl.h> #include <xenstore.h> #include <stdint.h> #include <xen/hvm/hvm_info_table.h> @@ -3286,25 +3343,11 @@ fi libs_softmmu="$libs_softmmu $fdt_libs" ########################################## -# opengl probe (for sdl2, milkymist-tmu2) - -# GLX probe, used by milkymist-tmu2 -# this is temporary, code will be switched to egl mid-term. -cat > $TMPC << EOF -#include <X11/Xlib.h> -#include <GL/gl.h> -#include <GL/glx.h> -int main(void) { glBegin(0); glXQueryVersion(0,0,0); return 0; } -EOF -if compile_prog "" "-lGL -lX11" ; then - have_glx=yes -else - have_glx=no -fi +# opengl probe (for sdl2, gtk, milkymist-tmu2) if test "$opengl" != "no" ; then - opengl_pkgs="gl glesv2 epoxy egl" - if $pkg_config $opengl_pkgs x11 && test "$have_glx" = "yes"; then + opengl_pkgs="epoxy" + if $pkg_config $opengl_pkgs x11; then opengl_cflags="$($pkg_config --cflags $opengl_pkgs) $x11_cflags" opengl_libs="$($pkg_config --libs $opengl_pkgs) $x11_libs" opengl=yes @@ -4412,6 +4455,7 @@ fi # check if ccache is interfering with # semantic analysis of macros +unset CCACHE_CPP2 ccache_cpp2=no cat > $TMPC << EOF static const int Z = 1; @@ -4435,6 +4479,20 @@ if ! compile_object "-Werror"; then ccache_cpp2=yes fi +################################################# +# clang does not support glibc + FORTIFY_SOURCE. + +if test "$fortify_source" != "no"; then + if echo | $cc -dM -E - | grep __clang__ > /dev/null 2>&1 ; then + fortify_source="no"; + elif test -n "$cxx" && + echo | $cxx -dM -E - | grep __clang__ >/dev/null 2>&1 ; then + fortify_source="no"; + else + fortify_source="yes" + fi +fi + ########################################## # End of CC checks # After here, no more $cc or $ld runs @@ -4442,8 +4500,10 @@ fi if test "$gcov" = "yes" ; then CFLAGS="-fprofile-arcs -ftest-coverage -g $CFLAGS" LDFLAGS="-fprofile-arcs -ftest-coverage $LDFLAGS" -elif test "$debug" = "no" ; then +elif test "$fortify_source" = "yes" ; then CFLAGS="-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $CFLAGS" +elif test "$debug" = "no"; then + CFLAGS="-O2 $CFLAGS" fi ########################################## @@ -5653,6 +5713,7 @@ case "$target_name" in echo "CONFIG_KVM=y" >> $config_target_mak if test "$vhost_net" = "yes" ; then echo "CONFIG_VHOST_NET=y" >> $config_target_mak + echo "CONFIG_VHOST_NET_TEST_$target_name=y" >> $config_host_mak fi fi esac diff --git a/contrib/ivshmem-server/ivshmem-server.c b/contrib/ivshmem-server/ivshmem-server.c index 5e5239ce45..d9e26b0574 100644 --- a/contrib/ivshmem-server/ivshmem-server.c +++ b/contrib/ivshmem-server/ivshmem-server.c @@ -168,7 +168,9 @@ ivshmem_server_handle_new_conn(IvshmemServer *server) } if (i == G_MAXUINT16) { IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n"); - goto fail; + close(newfd); + g_free(peer); + return -1; } peer->id = server->cur_id++; diff --git a/cpu-exec.c b/cpu-exec.c index 7eef0830fe..c88d0ffdcd 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -30,6 +30,7 @@ #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY) #include "hw/i386/apic.h" #endif +#include "sysemu/replay.h" /* -icount align implementation. */ @@ -184,7 +185,7 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr) /* Execute the code without caching the generated code. An interpreter could be used if available. */ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, - TranslationBlock *orig_tb) + TranslationBlock *orig_tb, bool ignore_icount) { TranslationBlock *tb; @@ -194,7 +195,8 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, max_cycles = CF_COUNT_MASK; tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags, - max_cycles | CF_NOCACHE); + max_cycles | CF_NOCACHE + | (ignore_icount ? CF_IGNORE_ICOUNT : 0)); tb->orig_tb = tcg_ctx.tb_ctx.tb_invalidated_flag ? NULL : orig_tb; cpu->current_tb = tb; /* execute the generated code */ @@ -345,21 +347,25 @@ int cpu_exec(CPUState *cpu) uintptr_t next_tb; SyncClocks sc; + /* replay_interrupt may need current_cpu */ + current_cpu = cpu; + if (cpu->halted) { #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY) - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { + if ((cpu->interrupt_request & CPU_INTERRUPT_POLL) + && replay_interrupt()) { apic_poll_irq(x86_cpu->apic_state); cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); } #endif if (!cpu_has_work(cpu)) { + current_cpu = NULL; return EXCP_HALTED; } cpu->halted = 0; } - current_cpu = cpu; atomic_mb_set(&tcg_current_cpu, cpu); rcu_read_lock(); @@ -401,10 +407,22 @@ int cpu_exec(CPUState *cpu) cpu->exception_index = -1; break; #else - cc->do_interrupt(cpu); - cpu->exception_index = -1; + if (replay_exception()) { + cc->do_interrupt(cpu); + cpu->exception_index = -1; + } else if (!replay_has_interrupt()) { + /* give a chance to iothread in replay mode */ + ret = EXCP_INTERRUPT; + break; + } #endif } + } else if (replay_has_exception() + && cpu->icount_decr.u16.low + cpu->icount_extra == 0) { + /* try to cause an exception pending in the log */ + cpu_exec_nocache(cpu, 1, tb_find_fast(cpu), true); + ret = -1; + break; } next_tb = 0; /* force lookup of first TB */ @@ -420,30 +438,40 @@ int cpu_exec(CPUState *cpu) cpu->exception_index = EXCP_DEBUG; cpu_loop_exit(cpu); } - if (interrupt_request & CPU_INTERRUPT_HALT) { + if (replay_mode == REPLAY_MODE_PLAY + && !replay_has_interrupt()) { + /* Do nothing */ + } else if (interrupt_request & CPU_INTERRUPT_HALT) { + replay_interrupt(); cpu->interrupt_request &= ~CPU_INTERRUPT_HALT; cpu->halted = 1; cpu->exception_index = EXCP_HLT; cpu_loop_exit(cpu); } #if defined(TARGET_I386) - if (interrupt_request & CPU_INTERRUPT_INIT) { + else if (interrupt_request & CPU_INTERRUPT_INIT) { + replay_interrupt(); cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0); do_cpu_init(x86_cpu); cpu->exception_index = EXCP_HALTED; cpu_loop_exit(cpu); } #else - if (interrupt_request & CPU_INTERRUPT_RESET) { + else if (interrupt_request & CPU_INTERRUPT_RESET) { + replay_interrupt(); cpu_reset(cpu); + cpu_loop_exit(cpu); } #endif /* The target hook has 3 exit conditions: False when the interrupt isn't processed, True when it is, and we should restart on a new TB, and via longjmp via cpu_loop_exit. */ - if (cc->cpu_exec_interrupt(cpu, interrupt_request)) { - next_tb = 0; + else { + replay_interrupt(); + if (cc->cpu_exec_interrupt(cpu, interrupt_request)) { + next_tb = 0; + } } /* Don't use the cached interrupt_request value, do_interrupt may have updated the EXITTB flag. */ @@ -454,7 +482,8 @@ int cpu_exec(CPUState *cpu) next_tb = 0; } } - if (unlikely(cpu->exit_request)) { + if (unlikely(cpu->exit_request + || replay_has_interrupt())) { cpu->exit_request = 0; cpu->exception_index = EXCP_INTERRUPT; cpu_loop_exit(cpu); @@ -519,7 +548,7 @@ int cpu_exec(CPUState *cpu) if (insns_left > 0) { /* Execute remaining instructions. */ tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK); - cpu_exec_nocache(cpu, insns_left, tb); + cpu_exec_nocache(cpu, insns_left, tb, false); align_clocks(&sc, cpu); } cpu->exception_index = EXCP_INTERRUPT; @@ -539,15 +568,27 @@ int cpu_exec(CPUState *cpu) only be set by a memory fault) */ } /* for(;;) */ } else { - /* Reload env after longjmp - the compiler may have smashed all - * local variables as longjmp is marked 'noreturn'. */ +#if defined(__clang__) || !QEMU_GNUC_PREREQ(4, 6) + /* Some compilers wrongly smash all local variables after + * siglongjmp. There were bug reports for gcc 4.5.0 and clang. + * Reload essential local variables here for those compilers. + * Newer versions of gcc would complain about this code (-Wclobbered). */ cpu = current_cpu; cc = CPU_GET_CLASS(cpu); - cpu->can_do_io = 1; #ifdef TARGET_I386 x86_cpu = X86_CPU(cpu); env = &x86_cpu->env; #endif +#else /* buggy compiler */ + /* Assert that the compiler does not smash local variables. */ + g_assert(cpu == current_cpu); + g_assert(cc == CPU_GET_CLASS(cpu)); +#ifdef TARGET_I386 + g_assert(x86_cpu == X86_CPU(cpu)); + g_assert(env == &x86_cpu->env); +#endif +#endif /* buggy compiler */ + cpu->can_do_io = 1; tb_lock_reset(); } } /* for(;;) */ @@ -42,6 +42,7 @@ #include "qemu/seqlock.h" #include "qapi-event.h" #include "hw/nmi.h" +#include "sysemu/replay.h" #ifndef _WIN32 #include "qemu/compatfd.h" @@ -334,7 +335,7 @@ static int64_t qemu_icount_round(int64_t count) return (count + (1 << icount_time_shift) - 1) >> icount_time_shift; } -static void icount_warp_rt(void *opaque) +static void icount_warp_rt(void) { /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start * changes from -1 to another value, so the race here is okay. @@ -345,7 +346,8 @@ static void icount_warp_rt(void *opaque) seqlock_write_lock(&timers_state.vm_clock_seqlock); if (runstate_is_running()) { - int64_t clock = cpu_get_clock_locked(); + int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, + cpu_get_clock_locked()); int64_t warp_delta; warp_delta = clock - vm_clock_warp_start; @@ -368,6 +370,11 @@ static void icount_warp_rt(void *opaque) } } +static void icount_dummy_timer(void *opaque) +{ + (void)opaque; +} + void qtest_clock_warp(int64_t dest) { int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); @@ -403,6 +410,18 @@ void qemu_clock_warp(QEMUClockType type) return; } + /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers + * do not fire, so computing the deadline does not make sense. + */ + if (!runstate_is_running()) { + return; + } + + /* warp clock deterministically in record/replay mode */ + if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP)) { + return; + } + if (icount_sleep) { /* * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now. @@ -412,7 +431,7 @@ void qemu_clock_warp(QEMUClockType type) * the CPU starts running, in case the CPU is woken by an event other * than the earliest QEMU_CLOCK_VIRTUAL timer. */ - icount_warp_rt(NULL); + icount_warp_rt(); timer_del(icount_warp_timer); } if (!all_cpu_threads_idle()) { @@ -605,7 +624,7 @@ void configure_icount(QemuOpts *opts, Error **errp) icount_sleep = qemu_opt_get_bool(opts, "sleep", true); if (icount_sleep) { icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, - icount_warp_rt, NULL); + icount_dummy_timer, NULL); } icount_align_option = qemu_opt_get_bool(opts, "align", false); @@ -694,15 +713,6 @@ void cpu_synchronize_all_post_init(void) } } -void cpu_clean_all_dirty(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_clean_state(cpu); - } -} - static int do_vm_stop(RunState state) { int ret = 0; @@ -1405,12 +1415,36 @@ int vm_stop_force_state(RunState state) return vm_stop(state); } else { runstate_set(state); + + bdrv_drain_all(); /* Make sure to return an error if the flush in a previous vm_stop() * failed. */ return bdrv_flush_all(); } } +static int64_t tcg_get_icount_limit(void) +{ + int64_t deadline; + + if (replay_mode != REPLAY_MODE_PLAY) { + deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); + + /* Maintain prior (possibly buggy) behaviour where if no deadline + * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than + * INT32_MAX nanoseconds ahead, we still use INT32_MAX + * nanoseconds. + */ + if ((deadline < 0) || (deadline > INT32_MAX)) { + deadline = INT32_MAX; + } + + return qemu_icount_round(deadline); + } else { + return replay_get_instructions(); + } +} + static int tcg_cpu_exec(CPUState *cpu) { int ret; @@ -1423,24 +1457,12 @@ static int tcg_cpu_exec(CPUState *cpu) #endif if (use_icount) { int64_t count; - int64_t deadline; int decr; timers_state.qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra); cpu->icount_decr.u16.low = 0; cpu->icount_extra = 0; - deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); - - /* Maintain prior (possibly buggy) behaviour where if no deadline - * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than - * INT32_MAX nanoseconds ahead, we still use INT32_MAX - * nanoseconds. - */ - if ((deadline < 0) || (deadline > INT32_MAX)) { - deadline = INT32_MAX; - } - - count = qemu_icount_round(deadline); + count = tcg_get_icount_limit(); timers_state.qemu_icount += count; decr = (count > 0xffff) ? 0xffff : count; count -= decr; @@ -1458,6 +1480,7 @@ static int tcg_cpu_exec(CPUState *cpu) + cpu->icount_extra); cpu->icount_decr.u32 = 0; cpu->icount_extra = 0; + replay_account_executed_instructions(); } return ret; } diff --git a/crypto/tlscreds.c b/crypto/tlscreds.c index 5ec982c6ee..e7d9c1cfac 100644 --- a/crypto/tlscreds.c +++ b/crypto/tlscreds.c @@ -123,10 +123,10 @@ qcrypto_tls_creds_get_path(QCryptoTLSCreds *creds, goto cleanup; } - trace_qcrypto_tls_creds_get_path(creds, filename, - *cred ? *cred : "<none>"); ret = 0; cleanup: + trace_qcrypto_tls_creds_get_path(creds, filename, + *cred ? *cred : "<none>"); return ret; } diff --git a/crypto/tlscredsx509.c b/crypto/tlscredsx509.c index dc46bc40f7..26f18cbb4a 100644 --- a/crypto/tlscredsx509.c +++ b/crypto/tlscredsx509.c @@ -255,6 +255,7 @@ qcrypto_tls_creds_check_cert_key_purpose(QCryptoTLSCredsX509 *creds, } g_free(buffer); + buffer = NULL; } if (isServer) { @@ -485,7 +486,8 @@ qcrypto_tls_creds_x509_sanity_check(QCryptoTLSCredsX509 *creds, int ret = -1; memset(cacerts, 0, sizeof(cacerts)); - if (access(certFile, R_OK) == 0) { + if (certFile && + access(certFile, R_OK) == 0) { cert = qcrypto_tls_creds_load_cert(creds, certFile, isServer, errp); @@ -654,6 +656,10 @@ qcrypto_tls_creds_x509_unload(QCryptoTLSCredsX509 *creds) gnutls_certificate_free_credentials(creds->data); creds->data = NULL; } + if (creds->parent_obj.dh_params) { + gnutls_dh_params_deinit(creds->parent_obj.dh_params); + creds->parent_obj.dh_params = NULL; + } } diff --git a/crypto/tlssession.c b/crypto/tlssession.c index ffc5c47949..373552942c 100644 --- a/crypto/tlssession.c +++ b/crypto/tlssession.c @@ -304,9 +304,9 @@ qcrypto_tls_session_check_certificate(QCryptoTLSSession *session, allow = qemu_acl_party_is_allowed(acl, session->peername); - error_setg(errp, "TLS x509 ACL check for %s is %s", - session->peername, allow ? "allowed" : "denied"); if (!allow) { + error_setg(errp, "TLS x509 ACL check for %s is denied", + session->peername); goto error; } } diff --git a/default-configs/aarch64-linux-user.mak b/default-configs/aarch64-linux-user.mak index 3df7de5b8f..0a5b08a007 100644 --- a/default-configs/aarch64-linux-user.mak +++ b/default-configs/aarch64-linux-user.mak @@ -1,3 +1 @@ # Default configuration for aarch64-linux-user - -CONFIG_GDBSTUB_XML=y diff --git a/disas/arm.c b/disas/arm.c index 6165246539..7a7354b76a 100644 --- a/disas/arm.c +++ b/disas/arm.c @@ -1779,7 +1779,7 @@ print_insn_coprocessor (bfd_vma pc, struct disassemble_info *info, long given, /* Is ``imm'' a negative number? */ if (imm & 0x40) - imm |= (-1 << 7); + imm |= (~0u << 7); func (stream, "%d", imm); } diff --git a/docs/bitmaps.md b/docs/bitmaps.md index fa87f077fe..a2e8d51163 100644 --- a/docs/bitmaps.md +++ b/docs/bitmaps.md @@ -19,12 +19,20 @@ which is included at the end of this document. * A dirty bitmap's name is unique to the node, but bitmaps attached to different nodes can share the same name. +* Dirty bitmaps created for internal use by QEMU may be anonymous and have no + name, but any user-created bitmaps may not be. There can be any number of + anonymous bitmaps per node. + +* The name of a user-created bitmap must not be empty (""). + ## Bitmap Modes * A Bitmap can be "frozen," which means that it is currently in-use by a backup operation and cannot be deleted, renamed, written to, reset, etc. +* The normal operating mode for a bitmap is "active." + ## Basic QMP Usage ### Supported Commands ### @@ -97,11 +105,7 @@ which is included at the end of this document. } ``` -## Transactions (Not yet implemented) - -* Transactional commands are forthcoming in a future version, - and are not yet available for use. This section serves as - documentation of intent for their design and usage. +## Transactions ### Justification @@ -323,6 +327,155 @@ full backup as a backing image. "event": "BLOCK_JOB_COMPLETED" } ``` +### Partial Transactional Failures + +* Sometimes, a transaction will succeed in launching and return success, + but then later the backup jobs themselves may fail. It is possible that + a management application may have to deal with a partial backup failure + after a successful transaction. + +* If multiple backup jobs are specified in a single transaction, when one of + them fails, it will not interact with the other backup jobs in any way. + +* The job(s) that succeeded will clear the dirty bitmap associated with the + operation, but the job(s) that failed will not. It is not "safe" to delete + any incremental backups that were created successfully in this scenario, + even though others failed. + +#### Example + +* QMP example highlighting two backup jobs: + + ```json + { "execute": "transaction", + "arguments": { + "actions": [ + { "type": "drive-backup", + "data": { "device": "drive0", "bitmap": "bitmap0", + "format": "qcow2", "mode": "existing", + "sync": "incremental", "target": "d0-incr-1.qcow2" } }, + { "type": "drive-backup", + "data": { "device": "drive1", "bitmap": "bitmap1", + "format": "qcow2", "mode": "existing", + "sync": "incremental", "target": "d1-incr-1.qcow2" } }, + ] + } + } + ``` + +* QMP example response, highlighting one success and one failure: + * Acknowledgement that the Transaction was accepted and jobs were launched: + ```json + { "return": {} } + ``` + + * Later, QEMU sends notice that the first job was completed: + ```json + { "timestamp": { "seconds": 1447192343, "microseconds": 615698 }, + "data": { "device": "drive0", "type": "backup", + "speed": 0, "len": 67108864, "offset": 67108864 }, + "event": "BLOCK_JOB_COMPLETED" + } + ``` + + * Later yet, QEMU sends notice that the second job has failed: + ```json + { "timestamp": { "seconds": 1447192399, "microseconds": 683015 }, + "data": { "device": "drive1", "action": "report", + "operation": "read" }, + "event": "BLOCK_JOB_ERROR" } + ``` + + ```json + { "timestamp": { "seconds": 1447192399, "microseconds": 685853 }, + "data": { "speed": 0, "offset": 0, "len": 67108864, + "error": "Input/output error", + "device": "drive1", "type": "backup" }, + "event": "BLOCK_JOB_COMPLETED" } + +* In the above example, "d0-incr-1.qcow2" is valid and must be kept, + but "d1-incr-1.qcow2" is invalid and should be deleted. If a VM-wide + incremental backup of all drives at a point-in-time is to be made, + new backups for both drives will need to be made, taking into account + that a new incremental backup for drive0 needs to be based on top of + "d0-incr-1.qcow2." + +### Grouped Completion Mode + +* While jobs launched by transactions normally complete or fail on their own, + it is possible to instruct them to complete or fail together as a group. + +* QMP transactions take an optional properties structure that can affect + the semantics of the transaction. + +* The "completion-mode" transaction property can be either "individual" + which is the default, legacy behavior described above, or "grouped," + a new behavior detailed below. + +* Delayed Completion: In grouped completion mode, no jobs will report + success until all jobs are ready to report success. + +* Grouped failure: If any job fails in grouped completion mode, all remaining + jobs will be cancelled. Any incremental backups will restore their dirty + bitmap objects as if no backup command was ever issued. + + * Regardless of if QEMU reports a particular incremental backup job as + CANCELLED or as an ERROR, the in-memory bitmap will be restored. + +#### Example + +* Here's the same example scenario from above with the new property: + + ```json + { "execute": "transaction", + "arguments": { + "actions": [ + { "type": "drive-backup", + "data": { "device": "drive0", "bitmap": "bitmap0", + "format": "qcow2", "mode": "existing", + "sync": "incremental", "target": "d0-incr-1.qcow2" } }, + { "type": "drive-backup", + "data": { "device": "drive1", "bitmap": "bitmap1", + "format": "qcow2", "mode": "existing", + "sync": "incremental", "target": "d1-incr-1.qcow2" } }, + ], + "properties": { + "completion-mode": "grouped" + } + } + } + ``` + +* QMP example response, highlighting a failure for drive2: + * Acknowledgement that the Transaction was accepted and jobs were launched: + ```json + { "return": {} } + ``` + + * Later, QEMU sends notice that the second job has errored out, + but that the first job was also cancelled: + ```json + { "timestamp": { "seconds": 1447193702, "microseconds": 632377 }, + "data": { "device": "drive1", "action": "report", + "operation": "read" }, + "event": "BLOCK_JOB_ERROR" } + ``` + + ```json + { "timestamp": { "seconds": 1447193702, "microseconds": 640074 }, + "data": { "speed": 0, "offset": 0, "len": 67108864, + "error": "Input/output error", + "device": "drive1", "type": "backup" }, + "event": "BLOCK_JOB_COMPLETED" } + ``` + + ```json + { "timestamp": { "seconds": 1447193702, "microseconds": 640163 }, + "data": { "device": "drive0", "type": "backup", "speed": 0, + "len": 67108864, "offset": 16777216 }, + "event": "BLOCK_JOB_CANCELLED" } + ``` + <!-- The FreeBSD Documentation License diff --git a/docs/migration.txt b/docs/migration.txt index f6df4beb2a..fda8d61d69 100644 --- a/docs/migration.txt +++ b/docs/migration.txt @@ -291,3 +291,194 @@ save/send this state when we are in the middle of a pio operation (that is what ide_drive_pio_state_needed() checks). If DRQ_STAT is not enabled, the values on that fields are garbage and don't need to be sent. + += Return path = + +In most migration scenarios there is only a single data path that runs +from the source VM to the destination, typically along a single fd (although +possibly with another fd or similar for some fast way of throwing pages across). + +However, some uses need two way communication; in particular the Postcopy +destination needs to be able to request pages on demand from the source. + +For these scenarios there is a 'return path' from the destination to the source; +qemu_file_get_return_path(QEMUFile* fwdpath) gives the QEMUFile* for the return +path. + + Source side + Forward path - written by migration thread + Return path - opened by main thread, read by return-path thread + + Destination side + Forward path - read by main thread + Return path - opened by main thread, written by main thread AND postcopy + thread (protected by rp_mutex) + += Postcopy = +'Postcopy' migration is a way to deal with migrations that refuse to converge +(or take too long to converge) its plus side is that there is an upper bound on +the amount of migration traffic and time it takes, the down side is that during +the postcopy phase, a failure of *either* side or the network connection causes +the guest to be lost. + +In postcopy the destination CPUs are started before all the memory has been +transferred, and accesses to pages that are yet to be transferred cause +a fault that's translated by QEMU into a request to the source QEMU. + +Postcopy can be combined with precopy (i.e. normal migration) so that if precopy +doesn't finish in a given time the switch is made to postcopy. + +=== Enabling postcopy === + +To enable postcopy, issue this command on the monitor prior to the +start of migration: + +migrate_set_capability x-postcopy-ram on + +The normal commands are then used to start a migration, which is still +started in precopy mode. Issuing: + +migrate_start_postcopy + +will now cause the transition from precopy to postcopy. +It can be issued immediately after migration is started or any +time later on. Issuing it after the end of a migration is harmless. + +Note: During the postcopy phase, the bandwidth limits set using +migrate_set_speed is ignored (to avoid delaying requested pages that +the destination is waiting for). + +=== Postcopy device transfer === + +Loading of device data may cause the device emulation to access guest RAM +that may trigger faults that have to be resolved by the source, as such +the migration stream has to be able to respond with page data *during* the +device load, and hence the device data has to be read from the stream completely +before the device load begins to free the stream up. This is achieved by +'packaging' the device data into a blob that's read in one go. + +Source behaviour + +Until postcopy is entered the migration stream is identical to normal +precopy, except for the addition of a 'postcopy advise' command at +the beginning, to tell the destination that postcopy might happen. +When postcopy starts the source sends the page discard data and then +forms the 'package' containing: + + Command: 'postcopy listen' + The device state + A series of sections, identical to the precopy streams device state stream + containing everything except postcopiable devices (i.e. RAM) + Command: 'postcopy run' + +The 'package' is sent as the data part of a Command: 'CMD_PACKAGED', and the +contents are formatted in the same way as the main migration stream. + +During postcopy the source scans the list of dirty pages and sends them +to the destination without being requested (in much the same way as precopy), +however when a page request is received from the destination, the dirty page +scanning restarts from the requested location. This causes requested pages +to be sent quickly, and also causes pages directly after the requested page +to be sent quickly in the hope that those pages are likely to be used +by the destination soon. + +Destination behaviour + +Initially the destination looks the same as precopy, with a single thread +reading the migration stream; the 'postcopy advise' and 'discard' commands +are processed to change the way RAM is managed, but don't affect the stream +processing. + +------------------------------------------------------------------------------ + 1 2 3 4 5 6 7 +main -----DISCARD-CMD_PACKAGED ( LISTEN DEVICE DEVICE DEVICE RUN ) +thread | | + | (page request) + | \___ + v \ +listen thread: --- page -- page -- page -- page -- page -- + + a b c +------------------------------------------------------------------------------ + +On receipt of CMD_PACKAGED (1) + All the data associated with the package - the ( ... ) section in the +diagram - is read into memory (into a QEMUSizedBuffer), and the main thread +recurses into qemu_loadvm_state_main to process the contents of the package (2) +which contains commands (3,6) and devices (4...) + +On receipt of 'postcopy listen' - 3 -(i.e. the 1st command in the package) +a new thread (a) is started that takes over servicing the migration stream, +while the main thread carries on loading the package. It loads normal +background page data (b) but if during a device load a fault happens (5) the +returned page (c) is loaded by the listen thread allowing the main threads +device load to carry on. + +The last thing in the CMD_PACKAGED is a 'RUN' command (6) letting the destination +CPUs start running. +At the end of the CMD_PACKAGED (7) the main thread returns to normal running behaviour +and is no longer used by migration, while the listen thread carries +on servicing page data until the end of migration. + +=== Postcopy states === + +Postcopy moves through a series of states (see postcopy_state) from +ADVISE->DISCARD->LISTEN->RUNNING->END + + Advise: Set at the start of migration if postcopy is enabled, even + if it hasn't had the start command; here the destination + checks that its OS has the support needed for postcopy, and performs + setup to ensure the RAM mappings are suitable for later postcopy. + The destination will fail early in migration at this point if the + required OS support is not present. + (Triggered by reception of POSTCOPY_ADVISE command) + + Discard: Entered on receipt of the first 'discard' command; prior to + the first Discard being performed, hugepages are switched off + (using madvise) to ensure that no new huge pages are created + during the postcopy phase, and to cause any huge pages that + have discards on them to be broken. + + Listen: The first command in the package, POSTCOPY_LISTEN, switches + the destination state to Listen, and starts a new thread + (the 'listen thread') which takes over the job of receiving + pages off the migration stream, while the main thread carries + on processing the blob. With this thread able to process page + reception, the destination now 'sensitises' the RAM to detect + any access to missing pages (on Linux using the 'userfault' + system). + + Running: POSTCOPY_RUN causes the destination to synchronise all + state and start the CPUs and IO devices running. The main + thread now finishes processing the migration package and + now carries on as it would for normal precopy migration + (although it can't do the cleanup it would do as it + finishes a normal migration). + + End: The listen thread can now quit, and perform the cleanup of migration + state, the migration is now complete. + +=== Source side page maps === + +The source side keeps two bitmaps during postcopy; 'the migration bitmap' +and 'unsent map'. The 'migration bitmap' is basically the same as in +the precopy case, and holds a bit to indicate that page is 'dirty' - +i.e. needs sending. During the precopy phase this is updated as the CPU +dirties pages, however during postcopy the CPUs are stopped and nothing +should dirty anything any more. + +The 'unsent map' is used for the transition to postcopy. It is a bitmap that +has a bit cleared whenever a page is sent to the destination, however during +the transition to postcopy mode it is combined with the migration bitmap +to form a set of pages that: + a) Have been sent but then redirtied (which must be discarded) + b) Have not yet been sent - which also must be discarded to cause any + transparent huge pages built during precopy to be broken. + +Note that the contents of the unsentmap are sacrificed during the calculation +of the discard set and thus aren't valid once in postcopy. The dirtymap +is still valid and is used to ensure that no page is sent more than once. Any +request for a page that has already been sent is ignored. Duplicate requests +such as this can happen as a page is sent at about the same time the +destination accesses it. + diff --git a/docs/qapi-code-gen.txt b/docs/qapi-code-gen.txt index 2afab20f55..ceb9a782d0 100644 --- a/docs/qapi-code-gen.txt +++ b/docs/qapi-code-gen.txt @@ -106,12 +106,15 @@ Types, commands, and events share a common namespace. Therefore, generally speaking, type definitions should always use CamelCase for user-defined type names, while built-in types are lowercase. Type definitions should not end in 'Kind', as this namespace is used for -creating implicit C enums for visiting union types. Command names, +creating implicit C enums for visiting union types, or in 'List', as +this namespace is used for creating array types. Command names, and field names within a type, should be all lower case with words separated by a hyphen. However, some existing older commands and complex types use underscore; when extending such expressions, consistency is preferred over blindly avoiding underscore. Event -names should be ALL_CAPS with words separated by underscore. +names should be ALL_CAPS with words separated by underscore. Field +names cannot start with 'has-' or 'has_', as this is reserved for +tracking optional fields. Any name (command, event, type, field, or enum value) beginning with "x-" is marked experimental, and may be withdrawn or changed @@ -122,9 +125,10 @@ vendor), even if the rest of the name uses dash (example: __com.redhat_drive-mirror). Other than downstream extensions (with leading underscore and the use of dots), all names should begin with a letter, and contain only ASCII letters, digits, dash, and underscore. -It is okay to reuse names that match C keywords; the generator will -rename a field named "default" in the QAPI to "q_default" in the -generated C code. +Names beginning with 'q_' are reserved for the generator: QMP names +that resemble C keywords or other problematic strings will be munged +in C to use this prefix. For example, a field named "default" in +qapi becomes "q_default" in the generated C code. In the rest of this document, usage lines are given for each expression type, with literal strings written in lower case and @@ -510,8 +514,23 @@ exactly the server (QEMU) supports. For this purpose, QMP provides introspection via command query-qmp-schema. QGA currently doesn't support introspection. +While Client JSON Protocol wire compatibility should be maintained +between qemu versions, we cannot make the same guarantees for +introspection stability. For example, one version of qemu may provide +a non-variant optional member of a struct, and a later version rework +the member to instead be non-optional and associated with a variant. +Likewise, one version of qemu may list a member with open-ended type +'str', and a later version could convert it to a finite set of strings +via an enum type; or a member may be converted from a specific type to +an alternate that represents a choice between the original type and +something else. + query-qmp-schema returns a JSON array of SchemaInfo objects. These objects together describe the wire ABI, as defined in the QAPI schema. +There is no specified order to the SchemaInfo objects returned; a +client must search for a particular name throughout the entire array +to learn more about that name, but is at least guaranteed that there +will be no collisions between type, command, and event names. However, the SchemaInfo can't reflect all the rules and restrictions that apply to QMP. It's interface introspection (figuring out what's @@ -592,7 +611,9 @@ any. Each element is a JSON object with members "name" (the member's name), "type" (the name of its type), and optionally "default". The member is optional if "default" is present. Currently, "default" can only have value null. Other values are reserved for future -extensions. +extensions. The "members" array is in no particular order; clients +must search the entire object when learning whether a particular +member is supported. Example: the SchemaInfo for MyType from section Struct types @@ -606,7 +627,9 @@ Example: the SchemaInfo for MyType from section Struct types "variants" is a JSON array describing the object's variant members. Each element is a JSON object with members "case" (the value of type tag this element applies to) and "type" (the name of an object type -that provides the variant members for this type tag value). +that provides the variant members for this type tag value). The +"variants" array is in no particular order, and is not guaranteed to +list cases in the same order as the corresponding "tag" enum type. Example: the SchemaInfo for flat union BlockdevOptions from section Union types @@ -647,7 +670,8 @@ Union types The SchemaInfo for an alternate type has meta-type "alternate", and variant member "members". "members" is a JSON array. Each element is a JSON object with member "type", which names a type. Values of the -alternate type conform to exactly one of its member types. +alternate type conform to exactly one of its member types. There is +no guarantee on the order in which "members" will be listed. Example: the SchemaInfo for BlockRef from section Alternate types @@ -658,15 +682,20 @@ Example: the SchemaInfo for BlockRef from section Alternate types The SchemaInfo for an array type has meta-type "array", and variant member "element-type", which names the array's element type. Array -types are implicitly defined. +types are implicitly defined. For convenience, the array's name may +resemble the element type; however, clients should examine member +"element-type" instead of making assumptions based on parsing member +"name". Example: the SchemaInfo for ['str'] - { "name": "strList", "meta-type": "array", + { "name": "[str]", "meta-type": "array", "element-type": "str" } The SchemaInfo for an enumeration type has meta-type "enum" and -variant member "values". +variant member "values". The values are listed in no particular +order; clients must search the entire enum when learning whether a +particular value is supported. Example: the SchemaInfo for MyEnum from section Enumeration types diff --git a/docs/qmp-events.txt b/docs/qmp-events.txt index d92cc4833b..d2f1ce497e 100644 --- a/docs/qmp-events.txt +++ b/docs/qmp-events.txt @@ -28,6 +28,8 @@ Example: "data": { "actual": 944766976 }, "timestamp": { "seconds": 1267020223, "microseconds": 435656 } } +Note: this event is rate-limited. + BLOCK_IMAGE_CORRUPTED --------------------- @@ -296,6 +298,8 @@ Example: "data": { "reference": "usr1", "sector-num": 345435, "sectors-count": 5 }, "timestamp": { "seconds": 1344522075, "microseconds": 745528 } } +Note: this event is rate-limited. + QUORUM_REPORT_BAD ----------------- @@ -318,6 +322,8 @@ Example: "data": { "node-name": "1.raw", "sector-num": 345435, "sectors-count": 5 }, "timestamp": { "seconds": 1344522075, "microseconds": 745528 } } +Note: this event is rate-limited. + RESET ----- @@ -358,6 +364,8 @@ Example: "data": { "offset": 78 }, "timestamp": { "seconds": 1267020223, "microseconds": 435656 } } +Note: this event is rate-limited. + SHUTDOWN -------- @@ -632,6 +640,8 @@ Example: "data": { "id": "channel0", "open": true }, "timestamp": { "seconds": 1401385907, "microseconds": 422329 } } +Note: this event is rate-limited separately for each "id". + WAKEUP ------ @@ -662,3 +672,5 @@ Example: Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is followed respectively by the RESET, SHUTDOWN, or STOP events. + +Note: this event is rate-limited. diff --git a/docs/qmp-spec.txt b/docs/qmp-spec.txt index 4c28cd9438..4fb10a5d6b 100644 --- a/docs/qmp-spec.txt +++ b/docs/qmp-spec.txt @@ -175,6 +175,11 @@ The format of asynchronous events is: For a listing of supported asynchronous events, please, refer to the qmp-events.txt file. +Some events are rate-limited to at most one per second. If additional +"similar" events arrive within one second, all but the last one are +dropped, and the last one is delayed. "Similar" normally means same +event type. See qmp-events.txt for details. + 2.5 QGA Synchronization ----------------------- diff --git a/docs/replay.txt b/docs/replay.txt new file mode 100644 index 0000000000..149727e2a6 --- /dev/null +++ b/docs/replay.txt @@ -0,0 +1,168 @@ +Copyright (c) 2010-2015 Institute for System Programming + of the Russian Academy of Sciences. + +This work is licensed under the terms of the GNU GPL, version 2 or later. +See the COPYING file in the top-level directory. + +Record/replay +------------- + +Record/replay functions are used for the reverse execution and deterministic +replay of qemu execution. This implementation of deterministic replay can +be used for deterministic debugging of guest code through a gdb remote +interface. + +Execution recording writes a non-deterministic events log, which can be later +used for replaying the execution anywhere and for unlimited number of times. +It also supports checkpointing for faster rewinding during reverse debugging. +Execution replaying reads the log and replays all non-deterministic events +including external input, hardware clocks, and interrupts. + +Deterministic replay has the following features: + * Deterministically replays whole system execution and all contents of + the memory, state of the hardware devices, clocks, and screen of the VM. + * Writes execution log into the file for later replaying for multiple times + on different machines. + * Supports i386, x86_64, and ARM hardware platforms. + * Performs deterministic replay of all operations with keyboard and mouse + input devices. + +Usage of the record/replay: + * First, record the execution, by adding the following arguments to the command line: + '-icount shift=7,rr=record,rrfile=replay.bin -net none'. + Block devices' images are not actually changed in the recording mode, + because all of the changes are written to the temporary overlay file. + * Then you can replay it by using another command + line option: '-icount shift=7,rr=replay,rrfile=replay.bin -net none' + * '-net none' option should also be specified if network replay patches + are not applied. + +Papers with description of deterministic replay implementation: +http://www.computer.org/csdl/proceedings/csmr/2012/4666/00/4666a553-abs.html +http://dl.acm.org/citation.cfm?id=2786805.2803179 + +Modifications of qemu include: + * wrappers for clock and time functions to save their return values in the log + * saving different asynchronous events (e.g. system shutdown) into the log + * synchronization of the bottom halves execution + * synchronization of the threads from thread pool + * recording/replaying user input (mouse and keyboard) + * adding internal checkpoints for cpu and io synchronization + +Non-deterministic events +------------------------ + +Our record/replay system is based on saving and replaying non-deterministic +events (e.g. keyboard input) and simulating deterministic ones (e.g. reading +from HDD or memory of the VM). Saving only non-deterministic events makes +log file smaller, simulation faster, and allows using reverse debugging even +for realtime applications. + +The following non-deterministic data from peripheral devices is saved into +the log: mouse and keyboard input, network packets, audio controller input, +USB packets, serial port input, and hardware clocks (they are non-deterministic +too, because their values are taken from the host machine). Inputs from +simulated hardware, memory of VM, software interrupts, and execution of +instructions are not saved into the log, because they are deterministic and +can be replayed by simulating the behavior of virtual machine starting from +initial state. + +We had to solve three tasks to implement deterministic replay: recording +non-deterministic events, replaying non-deterministic events, and checking +that there is no divergence between record and replay modes. + +We changed several parts of QEMU to make event log recording and replaying. +Devices' models that have non-deterministic input from external devices were +changed to write every external event into the execution log immediately. +E.g. network packets are written into the log when they arrive into the virtual +network adapter. + +All non-deterministic events are coming from these devices. But to +replay them we need to know at which moments they occur. We specify +these moments by counting the number of instructions executed between +every pair of consecutive events. + +Instruction counting +-------------------- + +QEMU should work in icount mode to use record/replay feature. icount was +designed to allow deterministic execution in absence of external inputs +of the virtual machine. We also use icount to control the occurrence of the +non-deterministic events. The number of instructions elapsed from the last event +is written to the log while recording the execution. In replay mode we +can predict when to inject that event using the instruction counter. + +Timers +------ + +Timers are used to execute callbacks from different subsystems of QEMU +at the specified moments of time. There are several kinds of timers: + * Real time clock. Based on host time and used only for callbacks that + do not change the virtual machine state. For this reason real time + clock and timers does not affect deterministic replay at all. + * Virtual clock. These timers run only during the emulation. In icount + mode virtual clock value is calculated using executed instructions counter. + That is why it is completely deterministic and does not have to be recorded. + * Host clock. This clock is used by device models that simulate real time + sources (e.g. real time clock chip). Host clock is the one of the sources + of non-determinism. Host clock read operations should be logged to + make the execution deterministic. + * Real time clock for icount. This clock is similar to real time clock but + it is used only for increasing virtual clock while virtual machine is + sleeping. Due to its nature it is also non-deterministic as the host clock + and has to be logged too. + +Checkpoints +----------- + +Replaying of the execution of virtual machine is bound by sources of +non-determinism. These are inputs from clock and peripheral devices, +and QEMU thread scheduling. Thread scheduling affect on processing events +from timers, asynchronous input-output, and bottom halves. + +Invocations of timers are coupled with clock reads and changing the state +of the virtual machine. Reads produce non-deterministic data taken from +host clock. And VM state changes should preserve their order. Their relative +order in replay mode must replicate the order of callbacks in record mode. +To preserve this order we use checkpoints. When a specific clock is processed +in record mode we save to the log special "checkpoint" event. +Checkpoints here do not refer to virtual machine snapshots. They are just +record/replay events used for synchronization. + +QEMU in replay mode will try to invoke timers processing in random moment +of time. That's why we do not process a group of timers until the checkpoint +event will be read from the log. Such an event allows synchronizing CPU +execution and timer events. + +Another checkpoints application in record/replay is instruction counting +while the virtual machine is idle. This function (qemu_clock_warp) is called +from the wait loop. It changes virtual machine state and must be deterministic +then. That is why we added checkpoint to this function to prevent its +operation in replay mode when it does not correspond to record mode. + +Bottom halves +------------- + +Disk I/O events are completely deterministic in our model, because +in both record and replay modes we start virtual machine from the same +disk state. But callbacks that virtual disk controller uses for reading and +writing the disk may occur at different moments of time in record and replay +modes. + +Reading and writing requests are created by CPU thread of QEMU. Later these +requests proceed to block layer which creates "bottom halves". Bottom +halves consist of callback and its parameters. They are processed when +main loop locks the global mutex. These locks are not synchronized with +replaying process because main loop also processes the events that do not +affect the virtual machine state (like user interaction with monitor). + +That is why we had to implement saving and replaying bottom halves callbacks +synchronously to the CPU execution. When the callback is about to execute +it is added to the queue in the replay module. This queue is written to the +log when its callbacks are executed. In replay mode callbacks are not processed +until the corresponding event is read from the events log file. + +Sometimes the block layer uses asynchronous callbacks for its internal purposes +(like reading or writing VM snapshots or disk image cluster tables). In this +case bottom halves are not marked as "replayable" and do not saved +into the log. diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt index e0d71e27e6..0312d40af0 100644 --- a/docs/specs/vhost-user.txt +++ b/docs/specs/vhost-user.txt @@ -87,6 +87,14 @@ Depending on the request type, payload can be: User address: a 64-bit user address mmap offset: 64-bit offset where region starts in the mapped memory +* Log description + --------------------------- + | log size | log offset | + --------------------------- + log size: size of area used for logging + log offset: offset from start of supplied file descriptor + where logging starts (i.e. where guest address 0 would be logged) + In QEMU the vhost-user message is implemented with the following struct: typedef struct VhostUserMsg { @@ -98,6 +106,7 @@ typedef struct VhostUserMsg { struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; + VhostUserLog log; }; } QEMU_PACKED VhostUserMsg; @@ -137,6 +146,39 @@ As older slaves don't support negotiating protocol features, a feature bit was dedicated for this purpose: #define VHOST_USER_F_PROTOCOL_FEATURES 30 +Starting and stopping rings +---------------------- +Client must only process each ring when it is started. + +Client must only pass data between the ring and the +backend, when the ring is enabled. + +If ring is started but disabled, client must process the +ring without talking to the backend. + +For example, for a networking device, in the disabled state +client must not supply any new RX packets, but must process +and discard any TX packets. + +If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated, the ring is initialized +in an enabled state. + +If VHOST_USER_F_PROTOCOL_FEATURES has been negotiated, the ring is initialized +in a disabled state. Client must not pass data to/from the backend until ring is enabled by +VHOST_USER_SET_VRING_ENABLE with parameter 1, or after it has been disabled by +VHOST_USER_SET_VRING_ENABLE with parameter 0. + +Each ring is initialized in a stopped state, client must not process it until +ring is started, or after it has been stopped. + +Client must start ring upon receiving a kick (that is, detecting that file +descriptor is readable) on the descriptor specified by +VHOST_USER_SET_VRING_KICK, and stop ring upon receiving +VHOST_USER_GET_VRING_BASE. + +While processing the rings (whether they are enabled or not), client must +support changing some configuration aspects on the fly. + Multiple queue support ---------------------- @@ -161,9 +203,13 @@ the slave makes to the memory mapped regions. The client should mark the dirty pages in a log. Once it complies to this logging, it may declare the VHOST_F_LOG_ALL vhost feature. +To start/stop logging of data/used ring writes, server may send messages +VHOST_USER_SET_FEATURES with VHOST_F_LOG_ALL and VHOST_USER_SET_VRING_ADDR with +VHOST_VRING_F_LOG in ring's flags set to 1/0, respectively. + All the modifications to memory pointed by vring "descriptor" should be marked. Modifications to "used" vring should be marked if -VHOST_VRING_F_LOG is part of ring's features. +VHOST_VRING_F_LOG is part of ring's flags. Dirty pages are of size: #define VHOST_LOG_PAGE 0x1000 @@ -172,22 +218,35 @@ The log memory fd is provided in the ancillary data of VHOST_USER_SET_LOG_BASE message when the slave has VHOST_USER_PROTOCOL_F_LOG_SHMFD protocol feature. -The size of the log may be computed by using all the known guest -addresses. The log covers from address 0 to the maximum of guest +The size of the log is supplied as part of VhostUserMsg +which should be large enough to cover all known guest +addresses. Log starts at the supplied offset in the +supplied file descriptor. +The log covers from address 0 to the maximum of guest regions. In pseudo-code, to mark page at "addr" as dirty: page = addr / VHOST_LOG_PAGE log[page / 8] |= 1 << page % 8 +Where addr is the guest physical address. + Use atomic operations, as the log may be concurrently manipulated. +Note that when logging modifications to the used ring (when VHOST_VRING_F_LOG +is set for this ring), log_guest_addr should be used to calculate the log +offset: the write to first byte of the used ring is logged at this offset from +log start. Also note that this value might be outside the legal guest physical +address range (i.e. does not have to be covered by the VhostUserMemory table), +but the bit offset of the last byte of the ring must fall within +the size supplied by VhostUserLog. + VHOST_USER_SET_LOG_FD is an optional message with an eventfd in ancillary data, it may be used to inform the master that the log has been modified. -Once the source has finished migration, VHOST_USER_RESET_OWNER message -will be sent by the source. No further update must be done before the -destination takes over with new regions & rings. +Once the source has finished migration, rings will be stopped by +the source. No further update must be done before rings are +restarted. Protocol features ----------------- @@ -255,14 +314,16 @@ Message types as an owner of the session. This can be used on the Slave as a "session start" flag. - * VHOST_USER_RESET_DEVICE + * VHOST_USER_RESET_OWNER Id: 4 - Equivalent ioctl: VHOST_RESET_DEVICE Master payload: N/A - Issued when a new connection is about to be closed. The Master will no - longer own this connection (and will usually close it). + This is no longer used. Used to be sent to request disabling + all rings, but some clients interpreted it to also discard + connection state (this interpretation would lead to bugs). + It is recommended that clients either ignore this message, + or use it to disable all rings. * VHOST_USER_SET_MEM_TABLE @@ -282,7 +343,12 @@ Message types Master payload: u64 Slave payload: N/A - Sets the logging base address. + Sets logging shared memory space. + When slave has VHOST_USER_PROTOCOL_F_LOG_SHMFD protocol + feature, the log memory fd is provided in the ancillary data of + VHOST_USER_SET_LOG_BASE message, the size and offset of shared + memory area provided in the message. + * VHOST_USER_SET_LOG_FD @@ -382,6 +448,8 @@ Message types Master payload: vring state description Signal slave to enable or disable corresponding vring. + This request should be sent only when VHOST_USER_F_PROTOCOL_FEATURES + has been negotiated. * VHOST_USER_SEND_RARP diff --git a/docs/writing-qmp-commands.txt b/docs/writing-qmp-commands.txt index 8647cac4e7..59aa77ae25 100644 --- a/docs/writing-qmp-commands.txt +++ b/docs/writing-qmp-commands.txt @@ -210,7 +210,7 @@ if you don't see these strings, then something went wrong. === Errors === QMP commands should use the error interface exported by the error.h header -file. Basically, errors are set by calling the error_set() function. +file. Basically, most errors are set by calling the error_setg() function. Let's say we don't accept the string "message" to contain the word "love". If it does contain it, we want the "hello-world" command to return an error: @@ -219,8 +219,7 @@ void qmp_hello_world(bool has_message, const char *message, Error **errp) { if (has_message) { if (strstr(message, "love")) { - error_set(errp, ERROR_CLASS_GENERIC_ERROR, - "the word 'love' is not allowed"); + error_setg(errp, "the word 'love' is not allowed"); return; } printf("%s\n", message); @@ -229,10 +228,8 @@ void qmp_hello_world(bool has_message, const char *message, Error **errp) } } -The first argument to the error_set() function is the Error pointer to pointer, -which is passed to all QMP functions. The second argument is a ErrorClass -value, which should be ERROR_CLASS_GENERIC_ERROR most of the time (more -details about error classes are given below). The third argument is a human +The first argument to the error_setg() function is the Error pointer +to pointer, which is passed to all QMP functions. The next argument is a human description of the error, this is a free-form printf-like string. Let's test the example above. Build qemu, run it as defined in the "Testing" @@ -249,8 +246,9 @@ The QMP server's response should be: } } -As a general rule, all QMP errors should use ERROR_CLASS_GENERIC_ERROR. There -are two exceptions to this rule: +As a general rule, all QMP errors should use ERROR_CLASS_GENERIC_ERROR +(done by default when using error_setg()). There are two exceptions to +this rule: 1. A non-generic ErrorClass value exists* for the failure you want to report (eg. DeviceNotFound) @@ -259,8 +257,8 @@ are two exceptions to this rule: want to report, hence you have to add a new ErrorClass value so that they can check for it -If the failure you want to report doesn't fall in one of the two cases above, -just report ERROR_CLASS_GENERIC_ERROR. +If the failure you want to report falls into one of the two cases above, +use error_set() with a second argument of an ErrorClass value. * All existing ErrorClass values are defined in the qapi-schema.json file @@ -50,6 +50,7 @@ #include "qemu/rcu_queue.h" #include "qemu/main-loop.h" #include "translate-all.h" +#include "sysemu/replay.h" #include "exec/memory-internal.h" #include "exec/ram_addr.h" @@ -882,6 +883,7 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...) } va_end(ap2); va_end(ap); + replay_finish(); #if defined(CONFIG_USER_ONLY) { struct sigaction act; @@ -901,7 +903,7 @@ static RAMBlock *qemu_get_ram_block(ram_addr_t addr) block = atomic_rcu_read(&ram_list.mru_block); if (block && addr - block->offset < block->max_length) { - goto found; + return block; } QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (addr - block->offset < block->max_length) { @@ -1062,9 +1064,11 @@ static uint16_t phys_section_add(PhysPageMap *map, static void phys_section_destroy(MemoryRegion *mr) { + bool have_sub_page = mr->subpage; + memory_region_unref(mr); - if (mr->subpage) { + if (have_sub_page) { subpage_t *subpage = container_of(mr, subpage_t, iomem); object_unref(OBJECT(&subpage->iomem)); g_free(subpage); @@ -1194,9 +1198,6 @@ static long gethugepagesize(const char *path, Error **errp) return 0; } - if (fs.f_type != HUGETLBFS_MAGIC) - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); - return fs.f_bsize; } @@ -1205,6 +1206,7 @@ static void *file_ram_alloc(RAMBlock *block, const char *path, Error **errp) { + struct stat st; char *filename; char *sanitized_name; char *c; @@ -1233,26 +1235,33 @@ static void *file_ram_alloc(RAMBlock *block, goto error; } - /* Make name safe to use with mkstemp by replacing '/' with '_'. */ - sanitized_name = g_strdup(memory_region_name(block->mr)); - for (c = sanitized_name; *c != '\0'; c++) { - if (*c == '/') - *c = '_'; - } + if (!stat(path, &st) && S_ISDIR(st.st_mode)) { + /* Make name safe to use with mkstemp by replacing '/' with '_'. */ + sanitized_name = g_strdup(memory_region_name(block->mr)); + for (c = sanitized_name; *c != '\0'; c++) { + if (*c == '/') { + *c = '_'; + } + } - filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path, - sanitized_name); - g_free(sanitized_name); + filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path, + sanitized_name); + g_free(sanitized_name); + + fd = mkstemp(filename); + if (fd >= 0) { + unlink(filename); + } + g_free(filename); + } else { + fd = open(path, O_RDWR | O_CREAT, 0644); + } - fd = mkstemp(filename); if (fd < 0) { error_setg_errno(errp, errno, "unable to create backing store for hugepages"); - g_free(filename); goto error; } - unlink(filename); - g_free(filename); memory = ROUND_UP(memory, hpagesize); @@ -1282,10 +1291,6 @@ static void *file_ram_alloc(RAMBlock *block, return area; error: - if (mem_prealloc) { - error_report("%s", error_get_pretty(*errp)); - exit(1); - } return NULL; } #endif @@ -1371,6 +1376,11 @@ static RAMBlock *find_ram_block(ram_addr_t addr) return NULL; } +const char *qemu_ram_get_idstr(RAMBlock *rb) +{ + return rb->idstr; +} + /* Called with iothread lock held. */ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev) { @@ -1441,7 +1451,7 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp) assert(block); - newsize = TARGET_PAGE_ALIGN(newsize); + newsize = HOST_PAGE_ALIGN(newsize); if (block->used_length == newsize) { return 0; @@ -1585,7 +1595,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return -1; } - size = TARGET_PAGE_ALIGN(size); + size = HOST_PAGE_ALIGN(size); new_block = g_malloc0(sizeof(*new_block)); new_block->mr = mr; new_block->used_length = size; @@ -1621,8 +1631,8 @@ ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, ram_addr_t addr; Error *local_err = NULL; - size = TARGET_PAGE_ALIGN(size); - max_size = TARGET_PAGE_ALIGN(max_size); + size = HOST_PAGE_ALIGN(size); + max_size = HOST_PAGE_ALIGN(max_size); new_block = g_malloc0(sizeof(*new_block)); new_block->mr = mr; new_block->resized = resized; @@ -1871,8 +1881,16 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) } } -/* Some of the softmmu routines need to translate from a host pointer - * (typically a TLB entry) back to a ram offset. +/* + * Translates a host ptr back to a RAMBlock, a ram_addr and an offset + * in that RAMBlock. + * + * ptr: Host pointer to look up + * round_offset: If true round the result offset down to a page boundary + * *ram_addr: set to result ram_addr + * *offset: set to result offset within the RAMBlock + * + * Returns: RAMBlock (or NULL if not found) * * By the time this function returns, the returned pointer is not protected * by RCU anymore. If the caller is not within an RCU critical section and @@ -1880,18 +1898,22 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) * pointer, such as a reference to the region that includes the incoming * ram_addr_t. */ -MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) +RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, + ram_addr_t *ram_addr, + ram_addr_t *offset) { RAMBlock *block; uint8_t *host = ptr; - MemoryRegion *mr; if (xen_enabled()) { rcu_read_lock(); *ram_addr = xen_ram_addr_from_mapcache(ptr); - mr = qemu_get_ram_block(*ram_addr)->mr; + block = qemu_get_ram_block(*ram_addr); + if (block) { + *offset = (host - block->host); + } rcu_read_unlock(); - return mr; + return block; } rcu_read_lock(); @@ -1914,10 +1936,49 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) return NULL; found: - *ram_addr = block->offset + (host - block->host); - mr = block->mr; + *offset = (host - block->host); + if (round_offset) { + *offset &= TARGET_PAGE_MASK; + } + *ram_addr = block->offset + *offset; rcu_read_unlock(); - return mr; + return block; +} + +/* + * Finds the named RAMBlock + * + * name: The name of RAMBlock to find + * + * Returns: RAMBlock (or NULL if not found) + */ +RAMBlock *qemu_ram_block_by_name(const char *name) +{ + RAMBlock *block; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + if (!strcmp(name, block->idstr)) { + return block; + } + } + + return NULL; +} + +/* Some of the softmmu routines need to translate from a host pointer + (typically a TLB entry) back to a ram offset. */ +MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) +{ + RAMBlock *block; + ram_addr_t offset; /* Not used */ + + block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset); + + if (!block) { + return NULL; + } + + return block->mr; } static void notdirty_mem_write(void *opaque, hwaddr ram_addr, @@ -2698,8 +2759,8 @@ void cpu_register_map_client(QEMUBH *bh) void cpu_exec_init_all(void) { qemu_mutex_init(&ram_list.mutex); - memory_map_init(); io_mem_init(); + memory_map_init(); qemu_mutex_init(&map_client_list_lock); } @@ -3496,6 +3557,16 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, } return 0; } + +/* + * Allows code that needs to deal with migration bitmaps etc to still be built + * target independent. + */ +size_t qemu_target_page_bits(void) +{ + return TARGET_PAGE_BITS; +} + #endif /* diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c index 9097d15c98..ad1da0d6f5 100644 --- a/fsdev/virtfs-proxy-helper.c +++ b/fsdev/virtfs-proxy-helper.c @@ -1128,10 +1128,19 @@ int main(int argc, char **argv) } } + if (chdir("/") < 0) { + do_perror("chdir"); + goto error; + } + if (chroot(rpath) < 0) { + do_perror("chroot"); + goto error; + } + get_version = false; #ifdef FS_IOC_GETVERSION /* check whether underlying FS support IOC_GETVERSION */ - retval = statfs(rpath, &st_fs); + retval = statfs("/", &st_fs); if (!retval) { switch (st_fs.f_type) { case EXT2_SUPER_MAGIC: @@ -1144,16 +1153,7 @@ int main(int argc, char **argv) } #endif - if (chdir("/") < 0) { - do_perror("chdir"); - goto error; - } - if (chroot(rpath) < 0) { - do_perror("chroot"); - goto error; - } umask(0); - if (init_capabilities() < 0) { goto error; } @@ -956,6 +956,13 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf) if (*p == ',') p++; len = strtoull(p, NULL, 16); + + /* memtohex() doubles the required space */ + if (len > MAX_PACKET_LENGTH / 2) { + put_packet (s, "E22"); + break; + } + if (target_memory_rw_debug(s->g_cpu, addr, mem_buf, len, false) != 0) { put_packet (s, "E14"); } else { @@ -970,6 +977,12 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf) len = strtoull(p, (char **)&p, 16); if (*p == ':') p++; + + /* hextomem() reads 2*len bytes */ + if (len > strlen(p) / 2) { + put_packet (s, "E22"); + break; + } hextomem(mem_buf, p, len); if (target_memory_rw_debug(s->g_cpu, addr, mem_buf, len, true) != 0) { @@ -1107,7 +1120,8 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf) cpu = find_cpu(thread); if (cpu != NULL) { cpu_synchronize_state(cpu); - len = snprintf((char *)mem_buf, sizeof(mem_buf), + /* memtohex() doubles the required space */ + len = snprintf((char *)mem_buf, sizeof(buf) / 2, "CPU#%d [%s]", cpu->cpu_index, cpu->halted ? "halted " : "running"); memtohex(buf, mem_buf, len); @@ -1136,8 +1150,8 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf) put_packet(s, "E01"); break; } - hextomem(mem_buf, p + 5, len); len = len / 2; + hextomem(mem_buf, p + 5, len); mem_buf[len++] = 0; qemu_chr_be_write(s->mon_chr, mem_buf, len); put_packet(s, "OK"); diff --git a/hmp-commands.hx b/hmp-commands.hx index 3a4ae3950a..bb52e4d3bd 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -194,8 +194,8 @@ ETEXI { .name = "change", - .args_type = "device:B,target:F,arg:s?", - .params = "device filename [format]", + .args_type = "device:B,target:F,arg:s?,read-only-mode:s?", + .params = "device filename [format [read-only-mode]]", .help = "change a removable medium, optional format", .mhandler.cmd = hmp_change, }, @@ -206,7 +206,7 @@ STEXI Change the configuration of a device. @table @option -@item change @var{diskdevice} @var{filename} [@var{format}] +@item change @var{diskdevice} @var{filename} [@var{format} [@var{read-only-mode}]] Change the medium for a removable disk device to point to @var{filename}. eg @example @@ -215,6 +215,20 @@ Change the medium for a removable disk device to point to @var{filename}. eg @var{format} is optional. +@var{read-only-mode} may be used to change the read-only status of the device. +It accepts the following values: + +@table @var +@item retain +Retains the current status; this is the default. + +@item read-only +Makes the device read-only. + +@item read-write +Makes the device writable. +@end table + @item change vnc @var{display},@var{options} Change the configuration of the VNC server. The valid syntax for @var{display} and @var{options} are described at @ref{sec_invocation}. eg @@ -1008,6 +1022,23 @@ Set the parameter @var{parameter} for migration. ETEXI { + .name = "migrate_start_postcopy", + .args_type = "", + .params = "", + .help = "Followup to a migration command to switch the migration" + " to postcopy mode. The x-postcopy-ram capability must " + "be set before the original migration command.", + .mhandler.cmd = hmp_migrate_start_postcopy, + }, + +STEXI +@item migrate_start_postcopy +@findex migrate_start_postcopy +Switch in-progress migration to postcopy mode. Ignored after the end of +migration (or once already in postcopy). +ETEXI + + { .name = "client_migrate_info", .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", .params = "protocol hostname port tls-port cert-subject", @@ -27,6 +27,7 @@ #include "qapi/opts-visitor.h" #include "qapi/qmp/qerror.h" #include "qapi/string-output-visitor.h" +#include "qapi/util.h" #include "qapi-visit.h" #include "ui/console.h" #include "block/qapi.h" @@ -521,6 +522,7 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict) " flush_total_time_ns=%" PRId64 " rd_merged=%" PRId64 " wr_merged=%" PRId64 + " idle_time_ns=%" PRId64 "\n", stats->value->stats->rd_bytes, stats->value->stats->wr_bytes, @@ -531,7 +533,8 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict) stats->value->stats->rd_total_time_ns, stats->value->stats->flush_total_time_ns, stats->value->stats->rd_merged, - stats->value->stats->wr_merged); + stats->value->stats->wr_merged, + stats->value->stats->idle_time_ns); } qapi_free_BlockStatsList(stats_list); @@ -569,8 +572,8 @@ void hmp_info_vnc(Monitor *mon, const QDict *qdict) for (client = info->clients; client; client = client->next) { monitor_printf(mon, "Client:\n"); monitor_printf(mon, " address: %s:%s\n", - client->value->base->host, - client->value->base->service); + client->value->host, + client->value->service); monitor_printf(mon, " x509_dname: %s\n", client->value->x509_dname ? client->value->x509_dname : "none"); @@ -638,7 +641,7 @@ void hmp_info_spice(Monitor *mon, const QDict *qdict) for (chan = info->channels; chan; chan = chan->next) { monitor_printf(mon, "Channel:\n"); monitor_printf(mon, " address: %s:%s%s\n", - chan->value->base->host, chan->value->base->port, + chan->value->host, chan->value->port, chan->value->tls ? " [tls]" : ""); monitor_printf(mon, " session: %" PRId64 "\n", chan->value->connection_id); @@ -841,11 +844,11 @@ void hmp_info_tpm(Monitor *mon, const QDict *qdict) c, TpmModel_lookup[ti->model]); monitor_printf(mon, " \\ %s: type=%s", - ti->id, TpmTypeOptionsKind_lookup[ti->options->kind]); + ti->id, TpmTypeOptionsKind_lookup[ti->options->type]); - switch (ti->options->kind) { + switch (ti->options->type) { case TPM_TYPE_OPTIONS_KIND_PASSTHROUGH: - tpo = ti->options->passthrough; + tpo = ti->options->u.passthrough; monitor_printf(mon, "%s%s%s%s", tpo->has_path ? ",path=" : "", tpo->has_path ? tpo->path : "", @@ -1293,6 +1296,13 @@ void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &err); } +void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + qmp_migrate_start_postcopy(&err); + hmp_handle_error(mon, &err); +} + void hmp_set_password(Monitor *mon, const QDict *qdict) { const char *protocol = qdict_get_str(qdict, "protocol"); @@ -1336,24 +1346,46 @@ void hmp_change(Monitor *mon, const QDict *qdict) const char *device = qdict_get_str(qdict, "device"); const char *target = qdict_get_str(qdict, "target"); const char *arg = qdict_get_try_str(qdict, "arg"); + const char *read_only = qdict_get_try_str(qdict, "read-only-mode"); + BlockdevChangeReadOnlyMode read_only_mode = 0; Error *err = NULL; - if (strcmp(device, "vnc") == 0 && - (strcmp(target, "passwd") == 0 || - strcmp(target, "password") == 0)) { - if (!arg) { - monitor_read_password(mon, hmp_change_read_arg, NULL); + if (strcmp(device, "vnc") == 0) { + if (read_only) { + monitor_printf(mon, + "Parameter 'read-only-mode' is invalid for VNC\n"); return; } - } + if (strcmp(target, "passwd") == 0 || + strcmp(target, "password") == 0) { + if (!arg) { + monitor_read_password(mon, hmp_change_read_arg, NULL); + return; + } + } + qmp_change("vnc", target, !!arg, arg, &err); + } else { + if (read_only) { + read_only_mode = + qapi_enum_parse(BlockdevChangeReadOnlyMode_lookup, + read_only, BLOCKDEV_CHANGE_READ_ONLY_MODE_MAX, + BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN, &err); + if (err) { + hmp_handle_error(mon, &err); + return; + } + } - qmp_change(device, target, !!arg, arg, &err); - if (err && - error_get_class(err) == ERROR_CLASS_DEVICE_ENCRYPTED) { - error_free(err); - monitor_read_block_device_key(mon, device, NULL, NULL); - return; + qmp_blockdev_change_medium(device, target, !!arg, arg, + !!read_only, read_only_mode, &err); + if (err && + error_get_class(err) == ERROR_CLASS_DEVICE_ENCRYPTED) { + error_free(err); + monitor_read_block_device_key(mon, device, NULL, NULL); + return; + } } + hmp_handle_error(mon, &err); } @@ -1735,15 +1767,15 @@ void hmp_sendkey(Monitor *mon, const QDict *qdict) if (*endp != '\0') { goto err_out; } - keylist->value->kind = KEY_VALUE_KIND_NUMBER; - keylist->value->number = value; + keylist->value->type = KEY_VALUE_KIND_NUMBER; + keylist->value->u.number = value; } else { int idx = index_from_key(keyname_buf); if (idx == Q_KEY_CODE_MAX) { goto err_out; } - keylist->value->kind = KEY_VALUE_KIND_QCODE; - keylist->value->qcode = idx; + keylist->value->type = KEY_VALUE_KIND_QCODE; + keylist->value->u.qcode = idx; } if (!separator) { @@ -1958,12 +1990,12 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) value = info->value; if (value) { - switch (value->kind) { + switch (value->type) { case MEMORY_DEVICE_INFO_KIND_DIMM: - di = value->dimm; + di = value->u.dimm; monitor_printf(mon, "Memory device [%s]: \"%s\"\n", - MemoryDeviceInfoKind_lookup[value->kind], + MemoryDeviceInfoKind_lookup[value->type], di->id ? di->id : ""); monitor_printf(mon, " addr: 0x%" PRIx64 "\n", di->addr); monitor_printf(mon, " slot: %" PRId64 "\n", di->slot); @@ -69,6 +69,7 @@ void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict); void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict); void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict); void hmp_client_migrate_info(Monitor *mon, const QDict *qdict); +void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict); void hmp_set_password(Monitor *mon, const QDict *qdict); void hmp_expire_password(Monitor *mon, const QDict *qdict); void hmp_eject(Monitor *mon, const QDict *qdict); diff --git a/hw/9pfs/virtio-9p-coth.c b/hw/9pfs/virtio-9p-coth.c index 5057f8d220..fb6e8f80e0 100644 --- a/hw/9pfs/virtio-9p-coth.c +++ b/hw/9pfs/virtio-9p-coth.c @@ -12,71 +12,30 @@ * */ -#include "fsdev/qemu-fsdev.h" -#include "qemu/thread.h" -#include "qemu/event_notifier.h" +#include "qemu-common.h" +#include "block/thread-pool.h" #include "qemu/coroutine.h" +#include "qemu/main-loop.h" #include "virtio-9p-coth.h" -/* v9fs glib thread pool */ -static V9fsThPool v9fs_pool; - -void co_run_in_worker_bh(void *opaque) +/* Called from QEMU I/O thread. */ +static void coroutine_enter_cb(void *opaque, int ret) { Coroutine *co = opaque; - g_thread_pool_push(v9fs_pool.pool, co, NULL); -} - -static void v9fs_qemu_process_req_done(EventNotifier *e) -{ - Coroutine *co; - - event_notifier_test_and_clear(e); - - while ((co = g_async_queue_try_pop(v9fs_pool.completed)) != NULL) { - qemu_coroutine_enter(co, NULL); - } + qemu_coroutine_enter(co, NULL); } -static void v9fs_thread_routine(gpointer data, gpointer user_data) +/* Called from worker thread. */ +static int coroutine_enter_func(void *arg) { - Coroutine *co = data; - + Coroutine *co = arg; qemu_coroutine_enter(co, NULL); - - g_async_queue_push(v9fs_pool.completed, co); - - event_notifier_set(&v9fs_pool.e); + return 0; } -int v9fs_init_worker_threads(void) +void co_run_in_worker_bh(void *opaque) { - int ret = 0; - V9fsThPool *p = &v9fs_pool; - sigset_t set, oldset; - - sigfillset(&set); - /* Leave signal handling to the iothread. */ - pthread_sigmask(SIG_SETMASK, &set, &oldset); - - p->pool = g_thread_pool_new(v9fs_thread_routine, p, -1, FALSE, NULL); - if (!p->pool) { - ret = -1; - goto err_out; - } - p->completed = g_async_queue_new(); - if (!p->completed) { - /* - * We are going to terminate. - * So don't worry about cleanup - */ - ret = -1; - goto err_out; - } - event_notifier_init(&p->e, 0); - - event_notifier_set_handler(&p->e, v9fs_qemu_process_req_done); -err_out: - pthread_sigmask(SIG_SETMASK, &oldset, NULL); - return ret; + Coroutine *co = opaque; + thread_pool_submit_aio(qemu_get_aio_context()->thread_pool, + coroutine_enter_func, co, coroutine_enter_cb, co); } diff --git a/hw/9pfs/virtio-9p-coth.h b/hw/9pfs/virtio-9p-coth.h index 0fbe49a946..4ac1aaf902 100644 --- a/hw/9pfs/virtio-9p-coth.h +++ b/hw/9pfs/virtio-9p-coth.h @@ -18,14 +18,6 @@ #include "qemu/thread.h" #include "qemu/coroutine.h" #include "virtio-9p.h" -#include <glib.h> - -typedef struct V9fsThPool { - EventNotifier e; - - GThreadPool *pool; - GAsyncQueue *completed; -} V9fsThPool; /* * we want to use bottom half because we want to make sure the below @@ -45,7 +37,7 @@ typedef struct V9fsThPool { qemu_bh_schedule(co_bh); \ /* \ * yield in qemu thread and re-enter back \ - * in glib worker thread \ + * in worker thread \ */ \ qemu_coroutine_yield(); \ qemu_bh_delete(co_bh); \ diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 93a407c459..944b5f5e9f 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -43,6 +43,16 @@ static void virtio_9p_get_config(VirtIODevice *vdev, uint8_t *config) g_free(cfg); } +static void virtio_9p_save(QEMUFile *f, void *opaque) +{ + virtio_save(VIRTIO_DEVICE(opaque), f); +} + +static int virtio_9p_load(QEMUFile *f, void *opaque, int version_id) +{ + return virtio_load(VIRTIO_DEVICE(opaque), f, version_id); +} + static void virtio_9p_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); @@ -106,10 +116,6 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp) " and export path:%s", s->fsconf.fsdev_id, s->ctx.fs_root); goto out; } - if (v9fs_init_worker_threads() < 0) { - error_setg(errp, "worker thread initialization failed"); - goto out; - } /* * Check details of export path, We need to use fs driver @@ -130,6 +136,7 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp) } v9fs_path_free(&path); + register_savevm(dev, "virtio-9p", -1, 1, virtio_9p_save, virtio_9p_load, s); return; out: g_free(s->ctx.fs_root); diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 0d4b3247b7..a00a0abe83 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1163,9 +1163,7 @@ void *acpi_data_push(GArray *table_data, unsigned size) unsigned acpi_data_len(GArray *table) { -#if GLIB_CHECK_VERSION(2, 22, 0) assert(g_array_get_element_size(table) == 1); -#endif return table->len; } diff --git a/hw/acpi/core.c b/hw/acpi/core.c index fe6215af4a..21e113d713 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -625,8 +625,12 @@ void acpi_pm1_cnt_reset(ACPIREGS *ar) void acpi_gpe_init(ACPIREGS *ar, uint8_t len) { ar->gpe.len = len; - ar->gpe.sts = g_malloc0(len / 2); - ar->gpe.en = g_malloc0(len / 2); + /* Only first len / 2 bytes are ever used, + * but the caller in ich9.c migrates full len bytes. + * TODO: fix ich9.c and drop the extra allocation. + */ + ar->gpe.sts = g_malloc0(len); + ar->gpe.en = g_malloc0(len); } void acpi_gpe_reset(ACPIREGS *ar) diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c index 2ff0d5ce1b..e4b9a01f80 100644 --- a/hw/acpi/memory_hotplug.c +++ b/hw/acpi/memory_hotplug.c @@ -155,6 +155,7 @@ static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data, qapi_event_send_mem_unplug_error(dev->id, error_get_pretty(local_err), &error_abort); + error_free(local_err); break; } trace_mhp_acpi_pc_dimm_deleted(mem_st->selector); @@ -238,10 +239,12 @@ void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st, mdev->dimm = dev; mdev->is_enabled = true; - mdev->is_inserting = true; + if (dev->hotplugged) { + mdev->is_inserting = true; - /* do ACPI magic */ - acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS); + /* do ACPI magic */ + acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS); + } return; } diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c index 43dc0a12de..b0ca81cea3 100644 --- a/hw/arm/allwinner-a10.c +++ b/hw/arm/allwinner-a10.c @@ -39,6 +39,9 @@ static void aw_a10_init(Object *obj) qemu_check_nic_model(&nd_table[0], TYPE_AW_EMAC); qdev_set_nic_properties(DEVICE(&s->emac), &nd_table[0]); } + + object_initialize(&s->sata, sizeof(s->sata), TYPE_ALLWINNER_AHCI); + qdev_set_parent_bus(DEVICE(&s->sata), sysbus_get_default()); } static void aw_a10_realize(DeviceState *dev, Error **errp) @@ -93,6 +96,14 @@ static void aw_a10_realize(DeviceState *dev, Error **errp) sysbus_mmio_map(sysbusdev, 0, AW_A10_EMAC_BASE); sysbus_connect_irq(sysbusdev, 0, s->irq[55]); + object_property_set_bool(OBJECT(&s->sata), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->sata), 0, AW_A10_SATA_BASE); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->sata), 0, s->irq[56]); + /* FIXME use a qdev chardev prop instead of serial_hds[] */ serial_mm_init(get_system_memory(), AW_A10_UART0_REG_BASE, 2, s->irq[1], 115200, serial_hds[0], DEVICE_NATIVE_ENDIAN); diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c index eb214db443..a80d2ad29c 100644 --- a/hw/arm/armv7m.c +++ b/hw/arm/armv7m.c @@ -166,17 +166,15 @@ static void armv7m_reset(void *opaque) mem_size is in bytes. Returns the NVIC array. */ -qemu_irq *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, +DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, const char *kernel_filename, const char *cpu_model) { ARMCPU *cpu; CPUARMState *env; DeviceState *nvic; - qemu_irq *pic = g_new(qemu_irq, num_irq); int image_size; uint64_t entry; uint64_t lowaddr; - int i; int big_endian; MemoryRegion *hack = g_new(MemoryRegion, 1); @@ -198,9 +196,6 @@ qemu_irq *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, qdev_init_nofail(nvic); sysbus_connect_irq(SYS_BUS_DEVICE(nvic), 0, qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ)); - for (i = 0; i < num_irq; i++) { - pic[i] = qdev_get_gpio_in(nvic, i); - } #ifdef TARGET_WORDS_BIGENDIAN big_endian = 1; @@ -234,7 +229,7 @@ qemu_irq *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, memory_region_add_subregion(system_memory, 0xfffff000, hack); qemu_register_reset(armv7m_reset, cpu); - return pic; + return nvic; } static Property bitband_properties[] = { diff --git a/hw/arm/boot.c b/hw/arm/boot.c index bef451b3b2..75f69bfe01 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -11,6 +11,7 @@ #include "hw/hw.h" #include "hw/arm/arm.h" #include "hw/arm/linux-boot-if.h" +#include "sysemu/kvm.h" #include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/loader.h" @@ -28,14 +29,15 @@ #define KERNEL64_LOAD_ADDR 0x00080000 typedef enum { - FIXUP_NONE = 0, /* do nothing */ - FIXUP_TERMINATOR, /* end of insns */ - FIXUP_BOARDID, /* overwrite with board ID number */ - FIXUP_ARGPTR, /* overwrite with pointer to kernel args */ - FIXUP_ENTRYPOINT, /* overwrite with kernel entry point */ - FIXUP_GIC_CPU_IF, /* overwrite with GIC CPU interface address */ - FIXUP_BOOTREG, /* overwrite with boot register address */ - FIXUP_DSB, /* overwrite with correct DSB insn for cpu */ + FIXUP_NONE = 0, /* do nothing */ + FIXUP_TERMINATOR, /* end of insns */ + FIXUP_BOARDID, /* overwrite with board ID number */ + FIXUP_BOARD_SETUP, /* overwrite with board specific setup code address */ + FIXUP_ARGPTR, /* overwrite with pointer to kernel args */ + FIXUP_ENTRYPOINT, /* overwrite with kernel entry point */ + FIXUP_GIC_CPU_IF, /* overwrite with GIC CPU interface address */ + FIXUP_BOOTREG, /* overwrite with boot register address */ + FIXUP_DSB, /* overwrite with correct DSB insn for cpu */ FIXUP_MAX, } FixupType; @@ -58,8 +60,17 @@ static const ARMInsnFixup bootloader_aarch64[] = { { 0, FIXUP_TERMINATOR } }; -/* The worlds second smallest bootloader. Set r0-r2, then jump to kernel. */ +/* A very small bootloader: call the board-setup code (if needed), + * set r0-r2, then jump to the kernel. + * If we're not calling boot setup code then we don't copy across + * the first BOOTLOADER_NO_BOARD_SETUP_OFFSET insns in this array. + */ + static const ARMInsnFixup bootloader[] = { + { 0xe28fe008 }, /* add lr, pc, #8 */ + { 0xe51ff004 }, /* ldr pc, [pc, #-4] */ + { 0, FIXUP_BOARD_SETUP }, +#define BOOTLOADER_NO_BOARD_SETUP_OFFSET 3 { 0xe3a00000 }, /* mov r0, #0 */ { 0xe59f1004 }, /* ldr r1, [pc, #4] */ { 0xe59f2004 }, /* ldr r2, [pc, #4] */ @@ -131,6 +142,7 @@ static void write_bootloader(const char *name, hwaddr addr, case FIXUP_NONE: break; case FIXUP_BOARDID: + case FIXUP_BOARD_SETUP: case FIXUP_ARGPTR: case FIXUP_ENTRYPOINT: case FIXUP_GIC_CPU_IF: @@ -484,7 +496,8 @@ static void do_cpu_reset(void *opaque) } /* Set to non-secure if not a secure boot */ - if (!info->secure_boot) { + if (!info->secure_boot && + (cs != first_cpu || !info->secure_board_setup)) { /* Linux expects non-secure state */ env->cp15.scr_el3 |= SCR_NS; } @@ -587,6 +600,12 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) struct arm_boot_info *info = container_of(n, struct arm_boot_info, load_kernel_notifier); + /* The board code is not supposed to set secure_board_setup unless + * running its code in secure mode is actually possible, and KVM + * doesn't support secure. + */ + assert(!(info->secure_board_setup && kvm_enabled())); + /* Load the kernel. */ if (!info->kernel_filename || info->firmware_loaded) { @@ -640,6 +659,9 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) elf_machine = EM_AARCH64; } else { primary_loader = bootloader; + if (!info->write_board_setup) { + primary_loader += BOOTLOADER_NO_BOARD_SETUP_OFFSET; + } kernel_load_offset = KERNEL_LOAD_ADDR; elf_machine = EM_ARM; } @@ -745,6 +767,7 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) info->initrd_size = initrd_size; fixupcontext[FIXUP_BOARDID] = info->board_id; + fixupcontext[FIXUP_BOARD_SETUP] = info->board_setup_addr; /* for device tree boot, we pass the DTB directly in r2. Otherwise * we point to the kernel args. @@ -793,6 +816,9 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) if (info->nb_cpus > 1) { info->write_secondary_boot(cpu, info); } + if (info->write_board_setup) { + info->write_board_setup(cpu, info); + } /* Notify devices which need to fake up firmware initialization * that we're doing a direct kernel boot. diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c index be04b27230..85ae69efd9 100644 --- a/hw/arm/highbank.c +++ b/hw/arm/highbank.c @@ -22,6 +22,7 @@ #include "hw/devices.h" #include "hw/loader.h" #include "net/net.h" +#include "sysemu/kvm.h" #include "sysemu/sysemu.h" #include "hw/boards.h" #include "sysemu/block-backend.h" @@ -32,10 +33,52 @@ #define SMP_BOOT_REG 0x40 #define MPCORE_PERIPHBASE 0xfff10000 +#define MVBAR_ADDR 0x200 + #define NIRQ_GIC 160 /* Board init. */ +/* MVBAR_ADDR is limited by precision of movw */ + +QEMU_BUILD_BUG_ON(MVBAR_ADDR >= (1 << 16)); + +#define ARMV7_IMM16(x) (extract32((x), 0, 12) | \ + extract32((x), 12, 4) << 16) + +static void hb_write_board_setup(ARMCPU *cpu, + const struct arm_boot_info *info) +{ + int n; + uint32_t board_setup_blob[] = { + /* MVBAR_ADDR */ + /* Default unimplemented and unused vectors to spin. Makes it + * easier to debug (as opposed to the CPU running away). + */ + 0xeafffffe, /* notused1: b notused */ + 0xeafffffe, /* notused2: b notused */ + 0xe1b0f00e, /* smc: movs pc, lr - exception return */ + 0xeafffffe, /* prefetch_abort: b prefetch_abort */ + 0xeafffffe, /* data_abort: b data_abort */ + 0xeafffffe, /* notused3: b notused3 */ + 0xeafffffe, /* irq: b irq */ + 0xeafffffe, /* fiq: b fiq */ +#define BOARD_SETUP_ADDR (MVBAR_ADDR + 8 * sizeof(uint32_t)) + 0xe3000000 + ARMV7_IMM16(MVBAR_ADDR), /* movw r0, MVBAR_ADDR */ + 0xee0c0f30, /* mcr p15, 0, r0, c12, c0, 1 - set MVBAR */ + 0xee110f11, /* mrc p15, 0, r0, c1 , c1, 0 - get SCR */ + 0xe3810001, /* orr r0, #1 - set NS */ + 0xee010f11, /* mcr p15, 0, r0, c1 , c1, 0 - set SCR */ + 0xe1600070, /* smc - go to monitor mode to flush NS change */ + 0xe12fff1e, /* bx lr - return to caller */ + }; + for (n = 0; n < ARRAY_SIZE(board_setup_blob); n++) { + board_setup_blob[n] = tswap32(board_setup_blob[n]); + } + rom_add_blob_fixed("board-setup", board_setup_blob, + sizeof(board_setup_blob), MVBAR_ADDR); +} + static void hb_write_secondary(ARMCPU *cpu, const struct arm_boot_info *info) { int n; @@ -223,15 +266,13 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id) MemoryRegion *sysmem; char *sysboot_filename; - if (!cpu_model) { - switch (machine_id) { - case CALXEDA_HIGHBANK: - cpu_model = "cortex-a9"; - break; - case CALXEDA_MIDWAY: - cpu_model = "cortex-a15"; - break; - } + switch (machine_id) { + case CALXEDA_HIGHBANK: + cpu_model = "cortex-a9"; + break; + case CALXEDA_MIDWAY: + cpu_model = "cortex-a15"; + break; } for (n = 0; n < smp_cpus; n++) { @@ -240,24 +281,16 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id) ARMCPU *cpu; Error *err = NULL; - if (!oc) { - error_report("Unable to find CPU definition"); - exit(1); - } - cpuobj = object_new(object_class_get_name(oc)); cpu = ARM_CPU(cpuobj); - /* By default A9 and A15 CPUs have EL3 enabled. This board does not - * currently support EL3 so the CPU EL3 property is disabled before - * realization. - */ - if (object_property_find(cpuobj, "has_el3", NULL)) { - object_property_set_bool(cpuobj, false, "has_el3", &err); - if (err) { - error_report_err(err); - exit(1); - } + object_property_set_int(cpuobj, QEMU_PSCI_CONDUIT_SMC, + "psci-conduit", &error_abort); + + if (n) { + /* Secondary CPUs start in PSCI powered-down state */ + object_property_set_bool(cpuobj, true, + "start-powered-off", &error_abort); } if (object_property_find(cpuobj, "reset-cbar", NULL)) { @@ -378,6 +411,16 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id) highbank_binfo.loader_start = 0; highbank_binfo.write_secondary_boot = hb_write_secondary; highbank_binfo.secondary_cpu_reset_hook = hb_reset_secondary; + if (!kvm_enabled()) { + highbank_binfo.board_setup_addr = BOARD_SETUP_ADDR; + highbank_binfo.write_board_setup = hb_write_board_setup; + highbank_binfo.secure_board_setup = true; + } else { + error_report("WARNING: cannot load built-in Monitor support " + "if KVM is enabled. Some guests (such as Linux) " + "may not boot."); + } + arm_load_kernel(ARM_CPU(first_cpu), &highbank_binfo); } diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c index 6a6b3e6642..2a8835ec01 100644 --- a/hw/arm/nseries.c +++ b/hw/arm/nseries.c @@ -1275,7 +1275,7 @@ static int n8x0_atag_setup(void *p, int model) strcpy((void *) w, "hw-build"); /* char component[12] */ w += 6; strcpy((void *) w, "QEMU "); - pstrcat((void *) w, 12, qemu_get_version()); /* char version[12] */ + pstrcat((void *) w, 12, qemu_hw_version()); /* char version[12] */ w += 6; tag = (model == 810) ? "1.1.10-qemu" : "1.1.6-qemu"; diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c index 3d6486fcf5..0114e0a7f8 100644 --- a/hw/arm/stellaris.c +++ b/hw/arm/stellaris.c @@ -16,6 +16,7 @@ #include "net/net.h" #include "hw/boards.h" #include "exec/address-spaces.h" +#include "sysemu/sysemu.h" #define GPIO_A 0 #define GPIO_B 1 @@ -1176,6 +1177,14 @@ static int stellaris_adc_init(SysBusDevice *sbd) return 0; } +static +void do_sys_reset(void *opaque, int n, int level) +{ + if (level) { + qemu_system_reset_request(); + } +} + /* Board init. */ static stellaris_board_info stellaris_boards[] = { { "LM3S811EVB", @@ -1210,8 +1219,7 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model, 0x40024000, 0x40025000, 0x40026000}; static const int gpio_irq[7] = {0, 1, 2, 3, 4, 30, 31}; - qemu_irq *pic; - DeviceState *gpio_dev[7]; + DeviceState *gpio_dev[7], *nvic; qemu_irq gpio_in[7][8]; qemu_irq gpio_out[7][8]; qemu_irq adc; @@ -1241,12 +1249,19 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model, vmstate_register_ram_global(sram); memory_region_add_subregion(system_memory, 0x20000000, sram); - pic = armv7m_init(system_memory, flash_size, NUM_IRQ_LINES, + nvic = armv7m_init(system_memory, flash_size, NUM_IRQ_LINES, kernel_filename, cpu_model); + qdev_connect_gpio_out_named(nvic, "SYSRESETREQ", 0, + qemu_allocate_irq(&do_sys_reset, NULL, 0)); + if (board->dc1 & (1 << 16)) { dev = sysbus_create_varargs(TYPE_STELLARIS_ADC, 0x40038000, - pic[14], pic[15], pic[16], pic[17], NULL); + qdev_get_gpio_in(nvic, 14), + qdev_get_gpio_in(nvic, 15), + qdev_get_gpio_in(nvic, 16), + qdev_get_gpio_in(nvic, 17), + NULL); adc = qdev_get_gpio_in(dev, 0); } else { adc = NULL; @@ -1255,19 +1270,21 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model, if (board->dc2 & (0x10000 << i)) { dev = sysbus_create_simple(TYPE_STELLARIS_GPTM, 0x40030000 + i * 0x1000, - pic[timer_irq[i]]); + qdev_get_gpio_in(nvic, timer_irq[i])); /* TODO: This is incorrect, but we get away with it because the ADC output is only ever pulsed. */ qdev_connect_gpio_out(dev, 0, adc); } } - stellaris_sys_init(0x400fe000, pic[28], board, nd_table[0].macaddr.a); + stellaris_sys_init(0x400fe000, qdev_get_gpio_in(nvic, 28), + board, nd_table[0].macaddr.a); for (i = 0; i < 7; i++) { if (board->dc4 & (1 << i)) { gpio_dev[i] = sysbus_create_simple("pl061_luminary", gpio_addr[i], - pic[gpio_irq[i]]); + qdev_get_gpio_in(nvic, + gpio_irq[i])); for (j = 0; j < 8; j++) { gpio_in[i][j] = qdev_get_gpio_in(gpio_dev[i], j); gpio_out[i][j] = NULL; @@ -1276,7 +1293,8 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model, } if (board->dc2 & (1 << 12)) { - dev = sysbus_create_simple(TYPE_STELLARIS_I2C, 0x40020000, pic[8]); + dev = sysbus_create_simple(TYPE_STELLARIS_I2C, 0x40020000, + qdev_get_gpio_in(nvic, 8)); i2c = (I2CBus *)qdev_get_child_bus(dev, "i2c"); if (board->peripherals & BP_OLED_I2C) { i2c_create_slave(i2c, "ssd0303", 0x3d); @@ -1286,11 +1304,12 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model, for (i = 0; i < 4; i++) { if (board->dc2 & (1 << i)) { sysbus_create_simple("pl011_luminary", 0x4000c000 + i * 0x1000, - pic[uart_irq[i]]); + qdev_get_gpio_in(nvic, uart_irq[i])); } } if (board->dc2 & (1 << 4)) { - dev = sysbus_create_simple("pl022", 0x40008000, pic[7]); + dev = sysbus_create_simple("pl022", 0x40008000, + qdev_get_gpio_in(nvic, 7)); if (board->peripherals & BP_OLED_SSI) { void *bus; DeviceState *sddev; @@ -1326,7 +1345,7 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model, qdev_set_nic_properties(enet, &nd_table[0]); qdev_init_nofail(enet); sysbus_mmio_map(SYS_BUS_DEVICE(enet), 0, 0x40048000); - sysbus_connect_irq(SYS_BUS_DEVICE(enet), 0, pic[42]); + sysbus_connect_irq(SYS_BUS_DEVICE(enet), 0, qdev_get_gpio_in(nvic, 42)); } if (board->peripherals & BP_GAMEPAD) { qemu_irq gpad_irq[5]; diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c index 4d26a7ed91..3f993406dd 100644 --- a/hw/arm/stm32f205_soc.c +++ b/hw/arm/stm32f205_soc.c @@ -59,9 +59,8 @@ static void stm32f205_soc_initfn(Object *obj) static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) { STM32F205State *s = STM32F205_SOC(dev_soc); - DeviceState *syscfgdev, *usartdev, *timerdev; + DeviceState *syscfgdev, *usartdev, *timerdev, *nvic; SysBusDevice *syscfgbusdev, *usartbusdev, *timerbusdev; - qemu_irq *pic; Error *err = NULL; int i; @@ -88,8 +87,8 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) vmstate_register_ram_global(sram); memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram); - pic = armv7m_init(get_system_memory(), FLASH_SIZE, 96, - s->kernel_filename, s->cpu_model); + nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96, + s->kernel_filename, s->cpu_model); /* System configuration controller */ syscfgdev = DEVICE(&s->syscfg); @@ -100,7 +99,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } syscfgbusdev = SYS_BUS_DEVICE(syscfgdev); sysbus_mmio_map(syscfgbusdev, 0, 0x40013800); - sysbus_connect_irq(syscfgbusdev, 0, pic[71]); + sysbus_connect_irq(syscfgbusdev, 0, qdev_get_gpio_in(nvic, 71)); /* Attach UART (uses USART registers) and USART controllers */ for (i = 0; i < STM_NUM_USARTS; i++) { @@ -112,7 +111,8 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } usartbusdev = SYS_BUS_DEVICE(usartdev); sysbus_mmio_map(usartbusdev, 0, usart_addr[i]); - sysbus_connect_irq(usartbusdev, 0, pic[usart_irq[i]]); + sysbus_connect_irq(usartbusdev, 0, + qdev_get_gpio_in(nvic, usart_irq[i])); } /* Timer 2 to 5 */ @@ -126,7 +126,8 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } timerbusdev = SYS_BUS_DEVICE(timerdev); sysbus_mmio_map(timerbusdev, 0, timer_addr[i]); - sysbus_connect_irq(timerbusdev, 0, pic[timer_irq[i]]); + sysbus_connect_irq(timerbusdev, 0, + qdev_get_gpio_in(nvic, timer_irq[i])); } } diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 1aaff1f662..3c2c5d6bfa 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -180,6 +180,7 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, int irq, aml_append(dev, aml_name_decl("_ADR", aml_int(0))); aml_append(dev, aml_name_decl("_UID", aml_string("PCI0"))); aml_append(dev, aml_name_decl("_STR", aml_unicode("PCIe 0 Device"))); + aml_append(dev, aml_name_decl("_CCA", aml_int(1))); /* Declare the PCI Routing Table. */ Aml *rt_pkg = aml_package(nr_pcie_buses * PCI_NUM_PINS); @@ -448,6 +449,24 @@ build_madt(GArray *table_data, GArray *linker, VirtGuestInfo *guest_info, gicd->length = sizeof(*gicd); gicd->base_address = memmap[VIRT_GIC_DIST].base; + for (i = 0; i < guest_info->smp_cpus; i++) { + AcpiMadtGenericInterrupt *gicc = acpi_data_push(table_data, + sizeof *gicc); + ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i)); + + gicc->type = ACPI_APIC_GENERIC_INTERRUPT; + gicc->length = sizeof(*gicc); + if (guest_info->gic_version == 2) { + gicc->base_address = memmap[VIRT_GIC_CPU].base; + } + gicc->cpu_interface_number = i; + gicc->arm_mpidr = armcpu->mp_affinity; + gicc->uid = i; + if (test_bit(i, cpuinfo->found_cpus)) { + gicc->flags = cpu_to_le32(ACPI_GICC_ENABLED); + } + } + if (guest_info->gic_version == 3) { AcpiMadtGenericRedistributor *gicr = acpi_data_push(table_data, sizeof *gicr); @@ -457,20 +476,6 @@ build_madt(GArray *table_data, GArray *linker, VirtGuestInfo *guest_info, gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST].base); gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST].size); } else { - for (i = 0; i < guest_info->smp_cpus; i++) { - AcpiMadtGenericInterrupt *gicc = acpi_data_push(table_data, - sizeof *gicc); - gicc->type = ACPI_APIC_GENERIC_INTERRUPT; - gicc->length = sizeof(*gicc); - gicc->base_address = memmap[VIRT_GIC_CPU].base; - gicc->cpu_interface_number = i; - gicc->arm_mpidr = i; - gicc->uid = i; - if (test_bit(i, cpuinfo->found_cpus)) { - gicc->flags = cpu_to_le32(ACPI_GICC_ENABLED); - } - } - gic_msi = acpi_data_push(table_data, sizeof *gic_msi); gic_msi->type = ACPI_APIC_GENERIC_MSI_FRAME; gic_msi->length = sizeof(*gic_msi); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 77d9267599..9c6792cea1 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -941,8 +941,8 @@ static void machvirt_init(MachineState *machine) if (!gic_version) { gic_version = kvm_arm_vgic_probe(); if (!gic_version) { - error_report("Unable to determine GIC version supported by host\n" - "Probably KVM acceleration is not supported\n"); + error_report("Unable to determine GIC version supported by host"); + error_printf("KVM acceleration is probably not supported\n"); exit(1); } } @@ -990,7 +990,7 @@ static void machvirt_init(MachineState *machine) char *cpuopts = g_strdup(cpustr[1]); if (!oc) { - fprintf(stderr, "Unable to find CPU definition\n"); + error_report("Unable to find CPU definition"); exit(1); } cpuobj = object_new(object_class_get_name(oc)); @@ -1126,8 +1126,8 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) } else if (!strcmp(value, "host")) { vms->gic_version = 0; /* Will probe later */ } else { - error_report("Invalid gic-version option value\n" - "Allowed values are: 3, 2, host\n"); + error_report("Invalid gic-version option value"); + error_printf("Allowed gic-version values are: 3, 2, host\n"); exit(1); } } diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c index 9f89483d6a..1c1a44547f 100644 --- a/hw/arm/xilinx_zynq.c +++ b/hw/arm/xilinx_zynq.c @@ -24,6 +24,7 @@ #include "hw/block/flash.h" #include "sysemu/block-backend.h" #include "hw/loader.h" +#include "hw/misc/zynq-xadc.h" #include "hw/ssi.h" #include "qemu/error-report.h" @@ -43,6 +44,45 @@ static const int dma_irqs[8] = { 46, 47, 48, 49, 72, 73, 74, 75 }; +#define BOARD_SETUP_ADDR 0x100 + +#define SLCR_LOCK_OFFSET 0x004 +#define SLCR_UNLOCK_OFFSET 0x008 +#define SLCR_ARM_PLL_OFFSET 0x100 + +#define SLCR_XILINX_UNLOCK_KEY 0xdf0d +#define SLCR_XILINX_LOCK_KEY 0x767b + +#define ARMV7_IMM16(x) (extract32((x), 0, 12) | \ + extract32((x), 12, 4) << 16) + +/* Write immediate val to address r0 + addr. r0 should contain base offset + * of the SLCR block. Clobbers r1. + */ + +#define SLCR_WRITE(addr, val) \ + 0xe3001000 + ARMV7_IMM16(extract32((val), 0, 16)), /* movw r1 ... */ \ + 0xe3401000 + ARMV7_IMM16(extract32((val), 16, 16)), /* movt r1 ... */ \ + 0xe5801000 + (addr) + +static void zynq_write_board_setup(ARMCPU *cpu, + const struct arm_boot_info *info) +{ + int n; + uint32_t board_setup_blob[] = { + 0xe3a004f8, /* mov r0, #0xf8000000 */ + SLCR_WRITE(SLCR_UNLOCK_OFFSET, SLCR_XILINX_UNLOCK_KEY), + SLCR_WRITE(SLCR_ARM_PLL_OFFSET, 0x00014008), + SLCR_WRITE(SLCR_LOCK_OFFSET, SLCR_XILINX_LOCK_KEY), + 0xe12fff1e, /* bx lr */ + }; + for (n = 0; n < ARRAY_SIZE(board_setup_blob); n++) { + board_setup_blob[n] = tswap32(board_setup_blob[n]); + } + rom_add_blob_fixed("board-setup", board_setup_blob, + sizeof(board_setup_blob), BOARD_SETUP_ADDR); +} + static struct arm_boot_info zynq_binfo = {}; static void gem_init(NICInfo *nd, uint32_t base, qemu_irq irq) @@ -225,6 +265,11 @@ static void zynq_init(MachineState *machine) sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0xE0101000); sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[79-IRQ_OFFSET]); + dev = qdev_create(NULL, TYPE_ZYNQ_XADC); + qdev_init_nofail(dev); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0xF8007100); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[39-IRQ_OFFSET]); + dev = qdev_create(NULL, "pl330"); qdev_prop_set_uint8(dev, "num_chnls", 8); qdev_prop_set_uint8(dev, "num_periph_req", 4); @@ -252,6 +297,9 @@ static void zynq_init(MachineState *machine) zynq_binfo.nb_cpus = 1; zynq_binfo.board_id = 0xd32; zynq_binfo.loader_start = 0; + zynq_binfo.board_setup_addr = BOARD_SETUP_ADDR; + zynq_binfo.write_board_setup = zynq_write_board_setup; + arm_load_kernel(ARM_CPU(first_cpu), &zynq_binfo); } diff --git a/hw/arm/xlnx-ep108.c b/hw/arm/xlnx-ep108.c index 2899698443..85b978fa76 100644 --- a/hw/arm/xlnx-ep108.c +++ b/hw/arm/xlnx-ep108.c @@ -51,7 +51,7 @@ static void xlnx_ep108_init(MachineState *machine) machine->ram_size = EP108_MAX_RAM_SIZE; } - if (machine->ram_size <= 0x08000000) { + if (machine->ram_size < 0x08000000) { qemu_log("WARNING: RAM size " RAM_ADDR_FMT " is small for EP108", machine->ram_size); } diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c index b36ca3da74..87553bbc60 100644 --- a/hw/arm/xlnx-zynqmp.c +++ b/hw/arm/xlnx-zynqmp.c @@ -48,6 +48,14 @@ static const int uart_intr[XLNX_ZYNQMP_NUM_UARTS] = { 21, 22, }; +static const uint64_t sdhci_addr[XLNX_ZYNQMP_NUM_SDHCI] = { + 0xFF160000, 0xFF170000, +}; + +static const int sdhci_intr[XLNX_ZYNQMP_NUM_SDHCI] = { + 48, 49, +}; + typedef struct XlnxZynqMPGICRegion { int region_index; uint32_t address; @@ -97,6 +105,13 @@ static void xlnx_zynqmp_init(Object *obj) object_initialize(&s->sata, sizeof(s->sata), TYPE_SYSBUS_AHCI); qdev_set_parent_bus(DEVICE(&s->sata), sysbus_get_default()); + + for (i = 0; i < XLNX_ZYNQMP_NUM_SDHCI; i++) { + object_initialize(&s->sdhci[i], sizeof(s->sdhci[i]), + TYPE_SYSBUS_SDHCI); + qdev_set_parent_bus(DEVICE(&s->sdhci[i]), + sysbus_get_default()); + } } static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp) @@ -258,6 +273,19 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp) sysbus_mmio_map(SYS_BUS_DEVICE(&s->sata), 0, SATA_ADDR); sysbus_connect_irq(SYS_BUS_DEVICE(&s->sata), 0, gic_spi[SATA_INTR]); + + for (i = 0; i < XLNX_ZYNQMP_NUM_SDHCI; i++) { + object_property_set_bool(OBJECT(&s->sdhci[i]), true, + "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->sdhci[i]), 0, + sdhci_addr[i]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci[i]), 0, + gic_spi[sdhci_intr[i]]); + } } static Property xlnx_zynqmp_props[] = { diff --git a/hw/block/nand.c b/hw/block/nand.c index 61d2cec032..f0e34139fe 100644 --- a/hw/block/nand.c +++ b/hw/block/nand.c @@ -522,8 +522,8 @@ void nand_setio(DeviceState *dev, uint32_t value) if (s->ale) { unsigned int shift = s->addrlen * 8; - unsigned int mask = ~(0xff << shift); - unsigned int v = value << shift; + uint64_t mask = ~(0xffull << shift); + uint64_t v = (uint64_t)value << shift; s->addr = (s->addr & mask) | v; s->addrlen ++; @@ -712,7 +712,7 @@ static void glue(nand_blk_erase_, PAGE_SIZE)(NANDFlashState *s) memset(s->storage + (PAGE(addr) << OOB_SHIFT), 0xff, OOB_SIZE << s->erase_shift); i = SECTOR(addr); - page = SECTOR(addr + (ADDR_SHIFT + s->erase_shift)); + page = SECTOR(addr + (1 << (ADDR_SHIFT + s->erase_shift))); for (; i < page; i ++) if (blk_write(s->blk, i, iobuf, 1) < 0) { printf("%s: write error in sector %" PRIu64 "\n", __func__, i); diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 5da41b23cf..169e4fa7a5 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -201,10 +201,11 @@ static void nvme_rw_cb(void *opaque, int ret) NvmeCtrl *n = sq->ctrl; NvmeCQueue *cq = n->cq[sq->cqid]; - block_acct_done(blk_get_stats(n->conf.blk), &req->acct); if (!ret) { + block_acct_done(blk_get_stats(n->conf.blk), &req->acct); req->status = NVME_SUCCESS; } else { + block_acct_failed(blk_get_stats(n->conf.blk), &req->acct); req->status = NVME_INTERNAL_DEV_ERROR; } if (req->has_sg) { @@ -238,18 +239,22 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, uint64_t data_size = (uint64_t)nlb << data_shift; uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS); int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; + enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; if ((slba + nlb) > ns->id_ns.nsze) { + block_acct_invalid(blk_get_stats(n->conf.blk), acct); return NVME_LBA_RANGE | NVME_DNR; } + if (nvme_map_prp(&req->qsg, prp1, prp2, data_size, n)) { + block_acct_invalid(blk_get_stats(n->conf.blk), acct); return NVME_INVALID_FIELD | NVME_DNR; } + assert((nlb << data_shift) == req->qsg.size); req->has_sg = true; - dma_acct_start(n->conf.blk, &req->acct, &req->qsg, - is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); + dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct); req->aiocb = is_write ? dma_blk_write(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req) : dma_blk_read(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req); diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 8beb26b4db..756ae5ce63 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -72,11 +72,14 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, VirtIOBlock *s = req->dev; if (action == BLOCK_ERROR_ACTION_STOP) { + /* Break the link as the next request is going to be parsed from the + * ring again. Otherwise we may end up doing a double completion! */ + req->mr_next = NULL; req->next = s->rq; s->rq = req; } else if (action == BLOCK_ERROR_ACTION_REPORT) { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); - block_acct_done(blk_get_stats(s->blk), &req->acct); + block_acct_failed(blk_get_stats(s->blk), &req->acct); virtio_blk_free_request(req); } @@ -536,6 +539,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) if (!virtio_blk_sect_range_ok(req->dev, req->sector_num, req->qiov.size)) { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); + block_acct_invalid(blk_get_stats(req->dev->blk), + is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); virtio_blk_free_request(req); return; } @@ -798,6 +803,11 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) static void virtio_blk_save(QEMUFile *f, void *opaque) { VirtIODevice *vdev = VIRTIO_DEVICE(opaque); + VirtIOBlock *s = VIRTIO_BLK(vdev); + + if (s->dataplane) { + virtio_blk_data_plane_stop(s->dataplane); + } virtio_save(vdev, f); } @@ -839,10 +849,7 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, req->next = s->rq; s->rq = req; - virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr, - req->elem.in_num, 1); - virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr, - req->elem.out_num, 0); + virtqueue_map(&req->elem); } return 0; @@ -975,7 +982,7 @@ static Property virtio_blk_properties[] = { DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial), DEFINE_PROP_BIT("config-wce", VirtIOBlock, conf.config_wce, 0, true), #ifdef __linux__ - DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, true), + DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, false), #endif DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, true), diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 1bbc111939..814665034d 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -75,7 +75,6 @@ struct ioreq { off_t start; QEMUIOVector v; int presync; - int postsync; uint8_t mapped; /* grant mapping */ @@ -144,7 +143,6 @@ static void ioreq_reset(struct ioreq *ioreq) ioreq->status = 0; ioreq->start = 0; ioreq->presync = 0; - ioreq->postsync = 0; ioreq->mapped = 0; memset(ioreq->domids, 0, sizeof(ioreq->domids)); @@ -520,12 +518,6 @@ static void qemu_aio_complete(void *opaque, int ret) if (ioreq->aio_inflight > 0) { return; } - if (ioreq->postsync) { - ioreq->postsync = 0; - ioreq->aio_inflight++; - blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq); - return; - } ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; ioreq_unmap(ioreq); @@ -537,7 +529,11 @@ static void qemu_aio_complete(void *opaque, int ret) break; } case BLKIF_OP_READ: - block_acct_done(blk_get_stats(ioreq->blkdev->blk), &ioreq->acct); + if (ioreq->status == BLKIF_RSP_OKAY) { + block_acct_done(blk_get_stats(ioreq->blkdev->blk), &ioreq->acct); + } else { + block_acct_failed(blk_get_stats(ioreq->blkdev->blk), &ioreq->acct); + } break; case BLKIF_OP_DISCARD: default: @@ -576,7 +572,9 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq) } block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct, - ioreq->v.size, BLOCK_ACCT_WRITE); + ioreq->v.size, + ioreq->req.operation == BLKIF_OP_WRITE ? + BLOCK_ACCT_WRITE : BLOCK_ACCT_FLUSH); ioreq->aio_inflight++; blk_aio_writev(blkdev->blk, ioreq->start / BLOCK_SIZE, &ioreq->v, ioreq->v.size / BLOCK_SIZE, @@ -720,6 +718,23 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) /* parse them */ if (ioreq_parse(ioreq) != 0) { + + switch (ioreq->req.operation) { + case BLKIF_OP_READ: + block_acct_invalid(blk_get_stats(blkdev->blk), + BLOCK_ACCT_READ); + break; + case BLKIF_OP_WRITE: + block_acct_invalid(blk_get_stats(blkdev->blk), + BLOCK_ACCT_WRITE); + break; + case BLKIF_OP_FLUSH_DISKCACHE: + block_acct_invalid(blk_get_stats(blkdev->blk), + BLOCK_ACCT_FLUSH); + default: + break; + }; + if (blk_send_response_one(ioreq)) { xen_be_send_notify(&blkdev->xendev); } diff --git a/hw/bt/hci.c b/hw/bt/hci.c index 6a88d492ac..2151d01281 100644 --- a/hw/bt/hci.c +++ b/hw/bt/hci.c @@ -23,6 +23,8 @@ #include "hw/usb.h" #include "sysemu/bt.h" #include "hw/bt.h" +#include "qapi/qmp/qerror.h" +#include "sysemu/replay.h" struct bt_hci_s { uint8_t *(*evt_packet)(void *opaque); @@ -72,6 +74,8 @@ struct bt_hci_s { struct HCIInfo info; struct bt_device_s device; + + Error *replay_blocker; }; #define DEFAULT_RSSI_DBM 20 @@ -2189,6 +2193,9 @@ struct HCIInfo *bt_new_hci(struct bt_scatternet_s *net) s->device.handle_destroy = bt_hci_destroy; + error_setg(&s->replay_blocker, QERR_REPLAY_NOT_SUPPORTED, "-bt hci"); + replay_add_blocker(s->replay_blocker); + return &s->info; } diff --git a/hw/bt/sdp.c b/hw/bt/sdp.c index c903747952..04eaecae67 100644 --- a/hw/bt/sdp.c +++ b/hw/bt/sdp.c @@ -42,7 +42,7 @@ struct bt_l2cap_sdp_state_s { static ssize_t sdp_datalen(const uint8_t **element, ssize_t *left) { - size_t len = *(*element) ++ & SDP_DSIZE_MASK; + uint32_t len = *(*element) ++ & SDP_DSIZE_MASK; if (!*left) return -1; @@ -150,12 +150,14 @@ static ssize_t sdp_svc_search(struct bt_l2cap_sdp_state_s *sdp, if (seqlen < 3 || len < seqlen) return -SDP_INVALID_SYNTAX; len -= seqlen; - while (seqlen) if (sdp_svc_match(sdp, &req, &seqlen)) return -SDP_INVALID_SYNTAX; - } else if (sdp_svc_match(sdp, &req, &seqlen)) - return -SDP_INVALID_SYNTAX; + } else { + if (sdp_svc_match(sdp, &req, &len)) { + return -SDP_INVALID_SYNTAX; + } + } if (len < 3) return -SDP_INVALID_SYNTAX; @@ -278,8 +280,11 @@ static ssize_t sdp_attr_get(struct bt_l2cap_sdp_state_s *sdp, while (seqlen) if (sdp_attr_match(record, &req, &seqlen)) return -SDP_INVALID_SYNTAX; - } else if (sdp_attr_match(record, &req, &seqlen)) - return -SDP_INVALID_SYNTAX; + } else { + if (sdp_attr_match(record, &req, &len)) { + return -SDP_INVALID_SYNTAX; + } + } if (len < 1) return -SDP_INVALID_SYNTAX; @@ -393,8 +398,11 @@ static ssize_t sdp_svc_search_attr_get(struct bt_l2cap_sdp_state_s *sdp, while (seqlen) if (sdp_svc_match(sdp, &req, &seqlen)) return -SDP_INVALID_SYNTAX; - } else if (sdp_svc_match(sdp, &req, &seqlen)) - return -SDP_INVALID_SYNTAX; + } else { + if (sdp_svc_match(sdp, &req, &len)) { + return -SDP_INVALID_SYNTAX; + } + } if (len < 3) return -SDP_INVALID_SYNTAX; @@ -413,8 +421,11 @@ static ssize_t sdp_svc_search_attr_get(struct bt_l2cap_sdp_state_s *sdp, while (seqlen) if (sdp_svc_attr_match(sdp, &req, &seqlen)) return -SDP_INVALID_SYNTAX; - } else if (sdp_svc_attr_match(sdp, &req, &seqlen)) - return -SDP_INVALID_SYNTAX; + } else { + if (sdp_svc_attr_match(sdp, &req, &len)) { + return -SDP_INVALID_SYNTAX; + } + } if (len < 1) return -SDP_INVALID_SYNTAX; diff --git a/hw/char/escc.c b/hw/char/escc.c index 9816154206..c9840e11da 100644 --- a/hw/char/escc.c +++ b/hw/char/escc.c @@ -842,13 +842,13 @@ static void sunkbd_handle_event(DeviceState *dev, QemuConsole *src, ChannelState *s = (ChannelState *)dev; int qcode, keycode; - assert(evt->kind == INPUT_EVENT_KIND_KEY); - qcode = qemu_input_key_value_to_qcode(evt->key->key); + assert(evt->type == INPUT_EVENT_KIND_KEY); + qcode = qemu_input_key_value_to_qcode(evt->u.key->key); trace_escc_sunkbd_event_in(qcode, QKeyCode_lookup[qcode], - evt->key->down); + evt->u.key->down); if (qcode == Q_KEY_CODE_CAPS_LOCK) { - if (evt->key->down) { + if (evt->u.key->down) { s->caps_lock_mode ^= 1; if (s->caps_lock_mode == 2) { return; /* Drop second press */ @@ -862,7 +862,7 @@ static void sunkbd_handle_event(DeviceState *dev, QemuConsole *src, } if (qcode == Q_KEY_CODE_NUM_LOCK) { - if (evt->key->down) { + if (evt->u.key->down) { s->num_lock_mode ^= 1; if (s->num_lock_mode == 2) { return; /* Drop second press */ @@ -876,7 +876,7 @@ static void sunkbd_handle_event(DeviceState *dev, QemuConsole *src, } keycode = qcode_to_keycode[qcode]; - if (!evt->key->down) { + if (!evt->u.key->down) { keycode |= 0x80; } trace_escc_sunkbd_event_out(keycode); diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index be97058712..497b0afd9f 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -705,10 +705,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id, qemu_get_buffer(f, (unsigned char *)&port->elem, sizeof(port->elem)); - virtqueue_map_sg(port->elem.in_sg, port->elem.in_addr, - port->elem.in_num, 1); - virtqueue_map_sg(port->elem.out_sg, port->elem.out_addr, - port->elem.out_num, 1); + virtqueue_map(&port->elem); /* * Port was throttled on source machine. Let's diff --git a/hw/core/machine.c b/hw/core/machine.c index f4db340468..acca00db22 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -462,11 +462,6 @@ bool machine_usb(MachineState *machine) return machine->usb; } -bool machine_iommu(MachineState *machine) -{ - return machine->iommu; -} - bool machine_kernel_irqchip_allowed(MachineState *machine) { return machine->kernel_irqchip_allowed; diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c index 8437bd6e8b..edf077cfd0 100644 --- a/hw/core/ptimer.c +++ b/hw/core/ptimer.c @@ -9,6 +9,7 @@ #include "qemu/timer.h" #include "hw/ptimer.h" #include "qemu/host-utils.h" +#include "sysemu/replay.h" struct ptimer_state { @@ -27,7 +28,7 @@ struct ptimer_state static void ptimer_trigger(ptimer_state *s) { if (s->bh) { - qemu_bh_schedule(s->bh); + replay_bh_schedule_event(s->bh); } } diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 4ab04aa31e..b3ad467754 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -325,6 +325,11 @@ void qdev_reset_all(DeviceState *dev) qdev_walk_children(dev, NULL, NULL, qdev_reset_one, qbus_reset_one, NULL); } +void qdev_reset_all_fn(void *opaque) +{ + qdev_reset_all(DEVICE(opaque)); +} + void qbus_reset_all(BusState *bus) { qbus_walk_children(bus, NULL, NULL, qdev_reset_one, qbus_reset_one, NULL); diff --git a/hw/display/milkymist-tmu2.c b/hw/display/milkymist-tmu2.c index 3e1d0b9c20..e2de281768 100644 --- a/hw/display/milkymist-tmu2.c +++ b/hw/display/milkymist-tmu2.c @@ -30,8 +30,8 @@ #include "qemu/error-report.h" #include <X11/Xlib.h> -#include <GL/gl.h> -#include <GL/glx.h> +#include <epoxy/gl.h> +#include <epoxy/glx.h> enum { R_CTL = 0, diff --git a/hw/display/qxl.c b/hw/display/qxl.c index 9c961dae93..8a3040cbbf 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -2156,7 +2156,7 @@ static int qxl_post_load(void *opaque, int version) qxl_create_guest_primary(d, 1, QXL_SYNC); /* replay surface-create and cursor-set commands */ - cmds = g_malloc0(sizeof(QXLCommandExt) * (d->ssd.num_surfaces + 1)); + cmds = g_new0(QXLCommandExt, d->ssd.num_surfaces + 1); for (in = 0, out = 0; in < d->ssd.num_surfaces; in++) { if (d->guest_surfaces.cmds[in] == 0) { continue; diff --git a/hw/display/tcx.c b/hw/display/tcx.c index bf119bc89a..d720ea6666 100644 --- a/hw/display/tcx.c +++ b/hw/display/tcx.c @@ -944,57 +944,55 @@ static void tcx_initfn(Object *obj) SysBusDevice *sbd = SYS_BUS_DEVICE(obj); TCXState *s = TCX(obj); - memory_region_init_ram(&s->rom, OBJECT(s), "tcx.prom", FCODE_MAX_ROM_SIZE, + memory_region_init_ram(&s->rom, obj, "tcx.prom", FCODE_MAX_ROM_SIZE, &error_fatal); memory_region_set_readonly(&s->rom, true); sysbus_init_mmio(sbd, &s->rom); /* 2/STIP : Stippler */ - memory_region_init_io(&s->stip, OBJECT(s), &tcx_stip_ops, s, "tcx.stip", + memory_region_init_io(&s->stip, obj, &tcx_stip_ops, s, "tcx.stip", TCX_STIP_NREGS); sysbus_init_mmio(sbd, &s->stip); /* 3/BLIT : Blitter */ - memory_region_init_io(&s->blit, OBJECT(s), &tcx_blit_ops, s, "tcx.blit", + memory_region_init_io(&s->blit, obj, &tcx_blit_ops, s, "tcx.blit", TCX_BLIT_NREGS); sysbus_init_mmio(sbd, &s->blit); /* 5/RSTIP : Raw Stippler */ - memory_region_init_io(&s->rstip, OBJECT(s), &tcx_rstip_ops, s, "tcx.rstip", + memory_region_init_io(&s->rstip, obj, &tcx_rstip_ops, s, "tcx.rstip", TCX_RSTIP_NREGS); sysbus_init_mmio(sbd, &s->rstip); /* 6/RBLIT : Raw Blitter */ - memory_region_init_io(&s->rblit, OBJECT(s), &tcx_rblit_ops, s, "tcx.rblit", + memory_region_init_io(&s->rblit, obj, &tcx_rblit_ops, s, "tcx.rblit", TCX_RBLIT_NREGS); sysbus_init_mmio(sbd, &s->rblit); /* 7/TEC : ??? */ - memory_region_init_io(&s->tec, OBJECT(s), &tcx_dummy_ops, s, - "tcx.tec", TCX_TEC_NREGS); + memory_region_init_io(&s->tec, obj, &tcx_dummy_ops, s, "tcx.tec", + TCX_TEC_NREGS); sysbus_init_mmio(sbd, &s->tec); /* 8/CMAP : DAC */ - memory_region_init_io(&s->dac, OBJECT(s), &tcx_dac_ops, s, - "tcx.dac", TCX_DAC_NREGS); + memory_region_init_io(&s->dac, obj, &tcx_dac_ops, s, "tcx.dac", + TCX_DAC_NREGS); sysbus_init_mmio(sbd, &s->dac); /* 9/THC : Cursor */ - memory_region_init_io(&s->thc, OBJECT(s), &tcx_thc_ops, s, "tcx.thc", + memory_region_init_io(&s->thc, obj, &tcx_thc_ops, s, "tcx.thc", TCX_THC_NREGS); sysbus_init_mmio(sbd, &s->thc); /* 11/DHC : ??? */ - memory_region_init_io(&s->dhc, OBJECT(s), &tcx_dummy_ops, s, "tcx.dhc", + memory_region_init_io(&s->dhc, obj, &tcx_dummy_ops, s, "tcx.dhc", TCX_DHC_NREGS); sysbus_init_mmio(sbd, &s->dhc); /* 12/ALT : ??? */ - memory_region_init_io(&s->alt, OBJECT(s), &tcx_dummy_ops, s, "tcx.alt", + memory_region_init_io(&s->alt, obj, &tcx_dummy_ops, s, "tcx.alt", TCX_ALT_NREGS); sysbus_init_mmio(sbd, &s->alt); - - return; } static void tcx_realizefn(DeviceState *dev, Error **errp) diff --git a/hw/dma/pxa2xx_dma.c b/hw/dma/pxa2xx_dma.c index d4501fb4cb..54cdb25a32 100644 --- a/hw/dma/pxa2xx_dma.c +++ b/hw/dma/pxa2xx_dma.c @@ -459,9 +459,8 @@ static int pxa2xx_dma_init(SysBusDevice *sbd) return -1; } - s->chan = g_malloc0(sizeof(PXA2xxDMAChannel) * s->channels); + s->chan = g_new0(PXA2xxDMAChannel, s->channels); - memset(s->chan, 0, sizeof(PXA2xxDMAChannel) * s->channels); for (i = 0; i < s->channels; i ++) s->chan[i].state = DCSR_STOPINTR; diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index efdf165848..0593a3f1f5 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -17,7 +17,7 @@ #include "qemu/host-utils.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" -#include "sysemu/cpus.h" +#include "kvm_i386.h" #include "hw/sysbus.h" #include "hw/kvm/clock.h" @@ -125,21 +125,7 @@ static void kvmclock_vm_state_change(void *opaque, int running, return; } - cpu_synchronize_all_states(); - /* In theory, the cpu_synchronize_all_states() call above wouldn't - * affect the rest of the code, as the VCPU state inside CPUState - * is supposed to always match the VCPU state on the kernel side. - * - * In practice, calling cpu_synchronize_state() too soon will load the - * kernel-side APIC state into X86CPU.apic_state too early, APIC state - * won't be reloaded later because CPUState.vcpu_dirty==true, and - * outdated APIC state may be migrated to another host. - * - * The real fix would be to make sure outdated APIC state is read - * from the kernel again when necessary. While this is not fixed, we - * need the cpu_clean_all_dirty() call below. - */ - cpu_clean_all_dirty(); + kvm_synchronize_all_tsc(); ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); if (ret < 0) { diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 3d958bae5b..5e20e07b6f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1616,7 +1616,6 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MemoryRegion *mr = ddc->get_memory_region(dimm); @@ -1632,8 +1631,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, goto out; } - pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, - pcmc->inter_dimm_gap, &local_err); + pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err); if (local_err) { goto out; } @@ -1797,9 +1795,9 @@ static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v, return; } if (value > (1ULL << 32)) { - error_set(&error, ERROR_CLASS_GENERIC_ERROR, - "Machine option 'max-ram-below-4g=%"PRIu64 - "' expects size less than or equal to 4G", value); + error_setg(&error, + "Machine option 'max-ram-below-4g=%"PRIu64 + "' expects size less than or equal to 4G", value); error_propagate(errp, error); return; } @@ -1953,7 +1951,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) PCMachineClass *pcmc = PC_MACHINE_CLASS(oc); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); - pcmc->inter_dimm_gap = true; pcmc->get_hotplug_handler = mc->get_hotplug_handler; mc->get_hotplug_handler = pc_get_hotpug_handler; mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 9d4425a5b9..2e41efe1b4 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -484,10 +484,10 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_2_5_machine_options(m); + m->hw_version = "2.4.0"; m->alias = NULL; m->is_default = 0; pcmc->broken_reserved_end = true; - pcmc->inter_dimm_gap = false; SET_MACHINE_COMPAT(m, PC_COMPAT_2_4); } @@ -498,6 +498,7 @@ DEFINE_I440FX_MACHINE(v2_4, "pc-i440fx-2.4", NULL, static void pc_i440fx_2_3_machine_options(MachineClass *m) { pc_i440fx_2_4_machine_options(m); + m->hw_version = "2.3.0"; m->alias = NULL; m->is_default = 0; SET_MACHINE_COMPAT(m, PC_COMPAT_2_3); @@ -510,6 +511,7 @@ DEFINE_I440FX_MACHINE(v2_3, "pc-i440fx-2.3", pc_compat_2_3, static void pc_i440fx_2_2_machine_options(MachineClass *m) { pc_i440fx_2_3_machine_options(m); + m->hw_version = "2.2.0"; SET_MACHINE_COMPAT(m, PC_COMPAT_2_2); } @@ -520,6 +522,7 @@ DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2, static void pc_i440fx_2_1_machine_options(MachineClass *m) { pc_i440fx_2_2_machine_options(m); + m->hw_version = "2.1.0"; m->default_display = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_2_1); } @@ -532,6 +535,7 @@ DEFINE_I440FX_MACHINE(v2_1, "pc-i440fx-2.1", pc_compat_2_1, static void pc_i440fx_2_0_machine_options(MachineClass *m) { pc_i440fx_2_1_machine_options(m); + m->hw_version = "2.0.0"; SET_MACHINE_COMPAT(m, PC_COMPAT_2_0); } @@ -542,6 +546,7 @@ DEFINE_I440FX_MACHINE(v2_0, "pc-i440fx-2.0", pc_compat_2_0, static void pc_i440fx_1_7_machine_options(MachineClass *m) { pc_i440fx_2_0_machine_options(m); + m->hw_version = "1.7.0"; m->default_machine_opts = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_1_7); } @@ -553,6 +558,7 @@ DEFINE_I440FX_MACHINE(v1_7, "pc-i440fx-1.7", pc_compat_1_7, static void pc_i440fx_1_6_machine_options(MachineClass *m) { pc_i440fx_1_7_machine_options(m); + m->hw_version = "1.6.0"; SET_MACHINE_COMPAT(m, PC_COMPAT_1_6); } @@ -563,6 +569,7 @@ DEFINE_I440FX_MACHINE(v1_6, "pc-i440fx-1.6", pc_compat_1_6, static void pc_i440fx_1_5_machine_options(MachineClass *m) { pc_i440fx_1_6_machine_options(m); + m->hw_version = "1.5.0"; SET_MACHINE_COMPAT(m, PC_COMPAT_1_5); } @@ -573,6 +580,7 @@ DEFINE_I440FX_MACHINE(v1_5, "pc-i440fx-1.5", pc_compat_1_5, static void pc_i440fx_1_4_machine_options(MachineClass *m) { pc_i440fx_1_5_machine_options(m); + m->hw_version = "1.4.0"; m->hot_add_cpu = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_1_4); } @@ -605,6 +613,7 @@ DEFINE_I440FX_MACHINE(v1_4, "pc-i440fx-1.4", pc_compat_1_4, static void pc_i440fx_1_3_machine_options(MachineClass *m) { pc_i440fx_1_4_machine_options(m); + m->hw_version = "1.3.0"; SET_MACHINE_COMPAT(m, PC_COMPAT_1_3); } @@ -643,6 +652,7 @@ DEFINE_I440FX_MACHINE(v1_3, "pc-1.3", pc_compat_1_3, static void pc_i440fx_1_2_machine_options(MachineClass *m) { pc_i440fx_1_3_machine_options(m); + m->hw_version = "1.2.0"; SET_MACHINE_COMPAT(m, PC_COMPAT_1_2); } @@ -685,6 +695,7 @@ DEFINE_I440FX_MACHINE(v1_2, "pc-1.2", pc_compat_1_2, static void pc_i440fx_1_1_machine_options(MachineClass *m) { pc_i440fx_1_2_machine_options(m); + m->hw_version = "1.1.0"; SET_MACHINE_COMPAT(m, PC_COMPAT_1_1); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 3744abd397..133bc68fac 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -383,9 +383,9 @@ static void pc_q35_2_4_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_2_5_machine_options(m); + m->hw_version = "2.4.0"; m->alias = NULL; pcmc->broken_reserved_end = true; - pcmc->inter_dimm_gap = false; SET_MACHINE_COMPAT(m, PC_COMPAT_2_4); } @@ -396,6 +396,7 @@ DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, static void pc_q35_2_3_machine_options(MachineClass *m) { pc_q35_2_4_machine_options(m); + m->hw_version = "2.3.0"; m->no_floppy = 0; m->no_tco = 1; m->alias = NULL; @@ -409,6 +410,7 @@ DEFINE_Q35_MACHINE(v2_3, "pc-q35-2.3", pc_compat_2_3, static void pc_q35_2_2_machine_options(MachineClass *m) { pc_q35_2_3_machine_options(m); + m->hw_version = "2.2.0"; SET_MACHINE_COMPAT(m, PC_COMPAT_2_2); } @@ -419,6 +421,7 @@ DEFINE_Q35_MACHINE(v2_2, "pc-q35-2.2", pc_compat_2_2, static void pc_q35_2_1_machine_options(MachineClass *m) { pc_q35_2_2_machine_options(m); + m->hw_version = "2.1.0"; m->default_display = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_2_1); } @@ -430,6 +433,7 @@ DEFINE_Q35_MACHINE(v2_1, "pc-q35-2.1", pc_compat_2_1, static void pc_q35_2_0_machine_options(MachineClass *m) { pc_q35_2_1_machine_options(m); + m->hw_version = "2.0.0"; SET_MACHINE_COMPAT(m, PC_COMPAT_2_0); } @@ -440,6 +444,7 @@ DEFINE_Q35_MACHINE(v2_0, "pc-q35-2.0", pc_compat_2_0, static void pc_q35_1_7_machine_options(MachineClass *m) { pc_q35_2_0_machine_options(m); + m->hw_version = "1.7.0"; m->default_machine_opts = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_1_7); } @@ -451,6 +456,7 @@ DEFINE_Q35_MACHINE(v1_7, "pc-q35-1.7", pc_compat_1_7, static void pc_q35_1_6_machine_options(MachineClass *m) { pc_q35_machine_options(m); + m->hw_version = "1.6.0"; SET_MACHINE_COMPAT(m, PC_COMPAT_1_6); } @@ -461,6 +467,7 @@ DEFINE_Q35_MACHINE(v1_6, "pc-q35-1.6", pc_compat_1_6, static void pc_q35_1_5_machine_options(MachineClass *m) { pc_q35_1_6_machine_options(m); + m->hw_version = "1.5.0"; SET_MACHINE_COMPAT(m, PC_COMPAT_1_5); } @@ -471,6 +478,7 @@ DEFINE_Q35_MACHINE(v1_5, "pc-q35-1.5", pc_compat_1_5, static void pc_q35_1_4_machine_options(MachineClass *m) { pc_q35_1_5_machine_options(m); + m->hw_version = "1.4.0"; m->hot_add_cpu = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_1_4); } diff --git a/hw/i386/pci-assign-load-rom.c b/hw/i386/pci-assign-load-rom.c index 34a3a7ed7f..e40b586b92 100644 --- a/hw/i386/pci-assign-load-rom.c +++ b/hw/i386/pci-assign-load-rom.c @@ -45,14 +45,10 @@ void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner, return NULL; } - if (access(rom_file, F_OK)) { - error_report("pci-assign: Insufficient privileges for %s", rom_file); - return NULL; - } - /* Write "1" to the ROM file to enable it */ fp = fopen(rom_file, "r+"); if (fp == NULL) { + error_report("pci-assign: Cannot open %s: %s", rom_file, strerror(errno)); return NULL; } val = 1; diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 21f76ed86e..dd1912e80d 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -378,17 +378,23 @@ static uint64_t ahci_mem_read(void *opaque, hwaddr addr, unsigned size) int ofst = addr - aligned; uint64_t lo = ahci_mem_read_32(opaque, aligned); uint64_t hi; + uint64_t val; /* if < 8 byte read does not cross 4 byte boundary */ if (ofst + size <= 4) { - return lo >> (ofst * 8); + val = lo >> (ofst * 8); + } else { + g_assert_cmpint(size, >, 1); + + /* If the 64bit read is unaligned, we will produce undefined + * results. AHCI does not support unaligned 64bit reads. */ + hi = ahci_mem_read_32(opaque, aligned + 4); + val = (hi << 32 | lo) >> (ofst * 8); } - g_assert_cmpint(size, >, 1); - /* If the 64bit read is unaligned, we will produce undefined - * results. AHCI does not support unaligned 64bit reads. */ - hi = ahci_mem_read_32(opaque, aligned + 4); - return (hi << 32 | lo) >> (ofst * 8); + DPRINTF(-1, "addr=0x%" HWADDR_PRIx " val=0x%" PRIx64 ", size=%d\n", + addr, val, size); + return val; } @@ -397,6 +403,9 @@ static void ahci_mem_write(void *opaque, hwaddr addr, { AHCIState *s = opaque; + DPRINTF(-1, "addr=0x%" HWADDR_PRIx " val=0x%" PRIx64 ", size=%d\n", + addr, val, size); + /* Only aligned reads are allowed on AHCI */ if (addr & 3) { fprintf(stderr, "ahci: Mis-aligned write to addr 0x" @@ -804,8 +813,21 @@ static int prdt_tbl_entry_size(const AHCI_SG *tbl) return (le32_to_cpu(tbl->flags_size) & AHCI_PRDT_SIZE_MASK) + 1; } +/** + * Fetch entries in a guest-provided PRDT and convert it into a QEMU SGlist. + * @ad: The AHCIDevice for whom we are building the SGList. + * @sglist: The SGList target to add PRD entries to. + * @cmd: The AHCI Command Header that describes where the PRDT is. + * @limit: The remaining size of the S/ATA transaction, in bytes. + * @offset: The number of bytes already transferred, in bytes. + * + * The AHCI PRDT can describe up to 256GiB. S/ATA only support transactions of + * up to 32MiB as of ATA8-ACS3 rev 1b, assuming a 512 byte sector size. We stop + * building the sglist from the PRDT as soon as we hit @limit bytes, + * which is <= INT32_MAX/2GiB. + */ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, - AHCICmdHdr *cmd, int64_t limit, int32_t offset) + AHCICmdHdr *cmd, int64_t limit, uint64_t offset) { uint16_t opts = le16_to_cpu(cmd->opts); uint16_t prdtl = le16_to_cpu(cmd->prdtl); @@ -823,14 +845,6 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, IDEBus *bus = &ad->port; BusState *qbus = BUS(bus); - /* - * Note: AHCI PRDT can describe up to 256GiB. SATA/ATA only support - * transactions of up to 32MiB as of ATA8-ACS3 rev 1b, assuming a - * 512 byte sector size. We limit the PRDT in this implementation to - * a reasonably large 2GiB, which can accommodate the maximum transfer - * request for sector sizes up to 32K. - */ - if (!prdtl) { DPRINTF(ad->port_no, "no sg list given by guest: 0x%08x\n", opts); return -1; @@ -880,13 +894,6 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr), MIN(prdt_tbl_entry_size(&tbl[i]), limit - sglist->size)); - if (sglist->size > INT32_MAX) { - error_report("AHCI Physical Region Descriptor Table describes " - "more than 2 GiB."); - qemu_sglist_destroy(sglist); - r = -1; - goto out; - } } } @@ -1427,24 +1434,26 @@ static const IDEDMAOps ahci_dma_ops = { .cmd_done = ahci_cmd_done, }; -void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports) +void ahci_init(AHCIState *s, DeviceState *qdev) { - qemu_irq *irqs; - int i; - - s->as = as; - s->ports = ports; - s->dev = g_new0(AHCIDevice, ports); s->container = qdev; - ahci_reg_init(s); /* XXX BAR size should be 1k, but that breaks, so bump it to 4k for now */ memory_region_init_io(&s->mem, OBJECT(qdev), &ahci_mem_ops, s, "ahci", AHCI_MEM_BAR_SIZE); memory_region_init_io(&s->idp, OBJECT(qdev), &ahci_idp_ops, s, "ahci-idp", 32); +} - irqs = qemu_allocate_irqs(ahci_irq_set, s, s->ports); +void ahci_realize(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports) +{ + qemu_irq *irqs; + int i; + s->as = as; + s->ports = ports; + s->dev = g_new0(AHCIDevice, ports); + ahci_reg_init(s); + irqs = qemu_allocate_irqs(ahci_irq_set, s, s->ports); for (i = 0; i < s->ports; i++) { AHCIDevice *ad = &s->dev[i]; @@ -1639,17 +1648,24 @@ static void sysbus_ahci_reset(DeviceState *dev) ahci_reset(&s->ahci); } -static void sysbus_ahci_realize(DeviceState *dev, Error **errp) +static void sysbus_ahci_init(Object *obj) { - SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - SysbusAHCIState *s = SYSBUS_AHCI(dev); + SysbusAHCIState *s = SYSBUS_AHCI(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); - ahci_init(&s->ahci, dev, &address_space_memory, s->num_ports); + ahci_init(&s->ahci, DEVICE(obj)); sysbus_init_mmio(sbd, &s->ahci.mem); sysbus_init_irq(sbd, &s->ahci.irq); } +static void sysbus_ahci_realize(DeviceState *dev, Error **errp) +{ + SysbusAHCIState *s = SYSBUS_AHCI(dev); + + ahci_realize(&s->ahci, dev, &address_space_memory, s->num_ports); +} + static Property sysbus_ahci_properties[] = { DEFINE_PROP_UINT32("num-ports", SysbusAHCIState, num_ports, 1), DEFINE_PROP_END_OF_LIST(), @@ -1670,12 +1686,108 @@ static const TypeInfo sysbus_ahci_info = { .name = TYPE_SYSBUS_AHCI, .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(SysbusAHCIState), + .instance_init = sysbus_ahci_init, .class_init = sysbus_ahci_class_init, }; +#define ALLWINNER_AHCI_BISTAFR ((0xa0 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_BISTCR ((0xa4 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_BISTFCTR ((0xa8 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_BISTSR ((0xac - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_BISTDECR ((0xb0 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_DIAGNR0 ((0xb4 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_DIAGNR1 ((0xb8 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_OOBR ((0xbc - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_PHYCS0R ((0xc0 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_PHYCS1R ((0xc4 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_PHYCS2R ((0xc8 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_TIMER1MS ((0xe0 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_GPARAM1R ((0xe8 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_GPARAM2R ((0xec - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_PPARAMR ((0xf0 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_TESTR ((0xf4 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_VERSIONR ((0xf8 - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_IDR ((0xfc - ALLWINNER_AHCI_MMIO_OFF) / 4) +#define ALLWINNER_AHCI_RWCR ((0xfc - ALLWINNER_AHCI_MMIO_OFF) / 4) + +static uint64_t allwinner_ahci_mem_read(void *opaque, hwaddr addr, + unsigned size) +{ + AllwinnerAHCIState *a = opaque; + uint64_t val = a->regs[addr/4]; + + switch (addr / 4) { + case ALLWINNER_AHCI_PHYCS0R: + val |= 0x2 << 28; + break; + case ALLWINNER_AHCI_PHYCS2R: + val &= ~(0x1 << 24); + break; + } + DPRINTF(-1, "addr=0x%" HWADDR_PRIx " val=0x%" PRIx64 ", size=%d\n", + addr, val, size); + return val; +} + +static void allwinner_ahci_mem_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + AllwinnerAHCIState *a = opaque; + + DPRINTF(-1, "addr=0x%" HWADDR_PRIx " val=0x%" PRIx64 ", size=%d\n", + addr, val, size); + a->regs[addr/4] = val; +} + +static const MemoryRegionOps allwinner_ahci_mem_ops = { + .read = allwinner_ahci_mem_read, + .write = allwinner_ahci_mem_write, + .valid.min_access_size = 4, + .valid.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void allwinner_ahci_init(Object *obj) +{ + SysbusAHCIState *s = SYSBUS_AHCI(obj); + AllwinnerAHCIState *a = ALLWINNER_AHCI(obj); + + memory_region_init_io(&a->mmio, OBJECT(obj), &allwinner_ahci_mem_ops, a, + "allwinner-ahci", ALLWINNER_AHCI_MMIO_SIZE); + memory_region_add_subregion(&s->ahci.mem, ALLWINNER_AHCI_MMIO_OFF, + &a->mmio); +} + +static const VMStateDescription vmstate_allwinner_ahci = { + .name = "allwinner-ahci", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(regs, AllwinnerAHCIState, + ALLWINNER_AHCI_MMIO_SIZE/4), + VMSTATE_END_OF_LIST() + } +}; + +static void allwinner_ahci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_allwinner_ahci; +} + +static const TypeInfo allwinner_ahci_info = { + .name = TYPE_ALLWINNER_AHCI, + .parent = TYPE_SYSBUS_AHCI, + .instance_size = sizeof(AllwinnerAHCIState), + .instance_init = allwinner_ahci_init, + .class_init = allwinner_ahci_class_init, +}; + static void sysbus_ahci_register_types(void) { type_register_static(&sysbus_ahci_info); + type_register_static(&allwinner_ahci_info); } type_init(sysbus_ahci_register_types) diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h index c9b3805415..bc777ed5c2 100644 --- a/hw/ide/ahci.h +++ b/hw/ide/ahci.h @@ -366,7 +366,8 @@ typedef struct SDBFIS { uint32_t payload; } QEMU_PACKED SDBFIS; -void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports); +void ahci_realize(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports); +void ahci_init(AHCIState *s, DeviceState *qdev); void ahci_uninit(AHCIState *s); void ahci_reset(AHCIState *s); @@ -385,4 +386,20 @@ typedef struct SysbusAHCIState { uint32_t num_ports; } SysbusAHCIState; +#define TYPE_ALLWINNER_AHCI "allwinner-ahci" +#define ALLWINNER_AHCI(obj) OBJECT_CHECK(AllwinnerAHCIState, (obj), \ + TYPE_ALLWINNER_AHCI) + +#define ALLWINNER_AHCI_MMIO_OFF 0x80 +#define ALLWINNER_AHCI_MMIO_SIZE 0x80 + +struct AllwinnerAHCIState { + /*< private >*/ + SysbusAHCIState parent_obj; + /*< public >*/ + + MemoryRegion mmio; + uint32_t regs[ALLWINNER_AHCI_MMIO_SIZE/4]; +}; + #endif /* HW_IDE_AHCI_H */ diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index 747f46611e..65f8dd457b 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -105,33 +105,99 @@ static void cd_data_to_raw(uint8_t *buf, int lba) memset(buf, 0, 288); } -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size) +static int +cd_read_sector_sync(IDEState *s) { int ret; + block_acct_start(blk_get_stats(s->blk), &s->acct, + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); - switch(sector_size) { +#ifdef DEBUG_IDE_ATAPI + printf("cd_read_sector_sync: lba=%d\n", s->lba); +#endif + + switch (s->cd_sector_size) { case 2048: - block_acct_start(blk_get_stats(s->blk), &s->acct, - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); - ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4); - block_acct_done(blk_get_stats(s->blk), &s->acct); + ret = blk_read(s->blk, (int64_t)s->lba << 2, + s->io_buffer, 4); break; case 2352: - block_acct_start(blk_get_stats(s->blk), &s->acct, - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); - ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4); - block_acct_done(blk_get_stats(s->blk), &s->acct); - if (ret < 0) - return ret; - cd_data_to_raw(buf, lba); + ret = blk_read(s->blk, (int64_t)s->lba << 2, + s->io_buffer + 16, 4); + if (ret >= 0) { + cd_data_to_raw(s->io_buffer, s->lba); + } break; default: - ret = -EIO; - break; + block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_READ); + return -EIO; + } + + if (ret < 0) { + block_acct_failed(blk_get_stats(s->blk), &s->acct); + } else { + block_acct_done(blk_get_stats(s->blk), &s->acct); + s->lba++; + s->io_buffer_index = 0; } + return ret; } +static void cd_read_sector_cb(void *opaque, int ret) +{ + IDEState *s = opaque; + +#ifdef DEBUG_IDE_ATAPI + printf("cd_read_sector_cb: lba=%d ret=%d\n", s->lba, ret); +#endif + + if (ret < 0) { + block_acct_failed(blk_get_stats(s->blk), &s->acct); + ide_atapi_io_error(s, ret); + return; + } + + block_acct_done(blk_get_stats(s->blk), &s->acct); + + if (s->cd_sector_size == 2352) { + cd_data_to_raw(s->io_buffer, s->lba); + } + + s->lba++; + s->io_buffer_index = 0; + s->status &= ~BUSY_STAT; + + ide_atapi_cmd_reply_end(s); +} + +static int cd_read_sector(IDEState *s) +{ + if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) { + block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_READ); + return -EINVAL; + } + + s->iov.iov_base = (s->cd_sector_size == 2352) ? + s->io_buffer + 16 : s->io_buffer; + + s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; + qemu_iovec_init_external(&s->qiov, &s->iov, 1); + +#ifdef DEBUG_IDE_ATAPI + printf("cd_read_sector: lba=%d\n", s->lba); +#endif + + block_acct_start(blk_get_stats(s->blk), &s->acct, + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); + + ide_buffered_readv(s, (int64_t)s->lba << 2, &s->qiov, 4, + cd_read_sector_cb, s); + + s->status |= BUSY_STAT; + return 0; +} + void ide_atapi_cmd_ok(IDEState *s) { s->error = 0; @@ -167,6 +233,17 @@ void ide_atapi_io_error(IDEState *s, int ret) } } +static uint16_t atapi_byte_count_limit(IDEState *s) +{ + uint16_t bcl; + + bcl = s->lcyl | (s->hcyl << 8); + if (bcl == 0xffff) { + return 0xfffe; + } + return bcl; +} + /* The whole ATAPI transfer logic is handled in this function */ void ide_atapi_cmd_reply_end(IDEState *s) { @@ -182,18 +259,27 @@ void ide_atapi_cmd_reply_end(IDEState *s) ide_atapi_cmd_ok(s); ide_set_irq(s->bus); #ifdef DEBUG_IDE_ATAPI - printf("status=0x%x\n", s->status); + printf("end of transfer, status=0x%x\n", s->status); #endif } else { /* see if a new sector must be read */ if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) { - ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size); - if (ret < 0) { - ide_atapi_io_error(s, ret); + if (!s->elementary_transfer_size) { + ret = cd_read_sector(s); + if (ret < 0) { + ide_atapi_io_error(s, ret); + } return; + } else { + /* rebuffering within an elementary transfer is + * only possible with a sync request because we + * end up with a race condition otherwise */ + ret = cd_read_sector_sync(s); + if (ret < 0) { + ide_atapi_io_error(s, ret); + return; + } } - s->lba++; - s->io_buffer_index = 0; } if (s->elementary_transfer_size > 0) { /* there are some data left to transmit in this elementary @@ -209,12 +295,10 @@ void ide_atapi_cmd_reply_end(IDEState *s) } else { /* a new transfer is needed */ s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO; - byte_count_limit = s->lcyl | (s->hcyl << 8); + byte_count_limit = atapi_byte_count_limit(s); #ifdef DEBUG_IDE_ATAPI printf("byte_count_limit=%d\n", byte_count_limit); #endif - if (byte_count_limit == 0xffff) - byte_count_limit--; size = s->packet_transfer_size; if (size > byte_count_limit) { /* byte count limit must be even if this case */ @@ -275,7 +359,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors, s->io_buffer_index = sector_size; s->cd_sector_size = sector_size; - s->status = READY_STAT | SEEK_STAT; ide_atapi_cmd_reply_end(s); } @@ -351,13 +434,17 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret) s->bus->dma->iov.iov_len = n * 4 * 512; qemu_iovec_init_external(&s->bus->dma->qiov, &s->bus->dma->iov, 1); - s->bus->dma->aiocb = blk_aio_readv(s->blk, (int64_t)s->lba << 2, - &s->bus->dma->qiov, n * 4, - ide_atapi_cmd_read_dma_cb, s); + s->bus->dma->aiocb = ide_buffered_readv(s, (int64_t)s->lba << 2, + &s->bus->dma->qiov, n * 4, + ide_atapi_cmd_read_dma_cb, s); return; eot: - block_acct_done(blk_get_stats(s->blk), &s->acct); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->blk), &s->acct); + } else { + block_acct_done(blk_get_stats(s->blk), &s->acct); + } ide_set_inactive(s, false); } @@ -1179,7 +1266,7 @@ enum { NONDATA = 0x04, }; -static const struct { +static const struct AtapiCmd { void (*handler)(IDEState *s, uint8_t *buf); int flags; } atapi_cmd_table[0x100] = { @@ -1206,9 +1293,9 @@ static const struct { void ide_atapi_cmd(IDEState *s) { - uint8_t *buf; + uint8_t *buf = s->io_buffer; + const struct AtapiCmd *cmd = &atapi_cmd_table[s->io_buffer[0]]; - buf = s->io_buffer; #ifdef DEBUG_IDE_ATAPI { int i; @@ -1219,14 +1306,14 @@ void ide_atapi_cmd(IDEState *s) printf("\n"); } #endif + /* * If there's a UNIT_ATTENTION condition pending, only command flagged with * ALLOW_UA are allowed to complete. with other commands getting a CHECK * condition response unless a higher priority status, defined by the drive * here, is pending. */ - if (s->sense_key == UNIT_ATTENTION && - !(atapi_cmd_table[s->io_buffer[0]].flags & ALLOW_UA)) { + if (s->sense_key == UNIT_ATTENTION && !(cmd->flags & ALLOW_UA)) { ide_atapi_cmd_check_status(s); return; } @@ -1237,7 +1324,7 @@ void ide_atapi_cmd(IDEState *s) * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close * states rely on this behavior. */ - if (!(atapi_cmd_table[s->io_buffer[0]].flags & ALLOW_UA) && + if (!(cmd->flags & ALLOW_UA) && !s->tray_open && blk_is_inserted(s->blk) && s->cdrom_changed) { if (s->cdrom_changed == 1) { @@ -1252,7 +1339,7 @@ void ide_atapi_cmd(IDEState *s) } /* Report a Not Ready condition if appropriate for the command */ - if ((atapi_cmd_table[s->io_buffer[0]].flags & CHECK_READY) && + if ((cmd->flags & CHECK_READY) && (!media_present(s) || !blk_is_inserted(s->blk))) { ide_atapi_cmd_error(s, NOT_READY, ASC_MEDIUM_NOT_PRESENT); @@ -1263,10 +1350,9 @@ void ide_atapi_cmd(IDEState *s) * If this is a data-transferring PIO command and BCL is 0, * we abort at the /ATA/ level, not the ATAPI level. * See ATA8 ACS3 section 7.17.6.49 and 7.21.5 */ - if (!(atapi_cmd_table[s->io_buffer[0]].flags & NONDATA)) { + if (cmd->handler && !(cmd->flags & NONDATA)) { /* TODO: Check IDENTIFY data word 125 for default BCL (currently 0) */ - uint16_t byte_count_limit = s->lcyl | (s->hcyl << 8); - if (!(byte_count_limit || s->atapi_dma)) { + if (!(atapi_byte_count_limit(s) || s->atapi_dma)) { /* TODO: Move abort back into core.c and make static inline again */ ide_abort_command(s); return; @@ -1274,8 +1360,8 @@ void ide_atapi_cmd(IDEState *s) } /* Execute the command */ - if (atapi_cmd_table[s->io_buffer[0]].handler) { - atapi_cmd_table[s->io_buffer[0]].handler(s, buf); + if (cmd->handler) { + cmd->handler(s, buf); return; } diff --git a/hw/ide/core.c b/hw/ide/core.c index 317406dca3..da3baab1eb 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -561,6 +561,53 @@ static bool ide_sect_range_ok(IDEState *s, return true; } +static void ide_buffered_readv_cb(void *opaque, int ret) +{ + IDEBufferedRequest *req = opaque; + if (!req->orphaned) { + if (!ret) { + qemu_iovec_from_buf(req->original_qiov, 0, req->iov.iov_base, + req->original_qiov->size); + } + req->original_cb(req->original_opaque, ret); + } + QLIST_REMOVE(req, list); + qemu_vfree(req->iov.iov_base); + g_free(req); +} + +#define MAX_BUFFERED_REQS 16 + +BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque) +{ + BlockAIOCB *aioreq; + IDEBufferedRequest *req; + int c = 0; + + QLIST_FOREACH(req, &s->buffered_requests, list) { + c++; + } + if (c > MAX_BUFFERED_REQS) { + return blk_abort_aio_request(s->blk, cb, opaque, -EIO); + } + + req = g_new0(IDEBufferedRequest, 1); + req->original_qiov = iov; + req->original_cb = cb; + req->original_opaque = opaque; + req->iov.iov_base = qemu_blockalign(blk_bs(s->blk), iov->size); + req->iov.iov_len = iov->size; + qemu_iovec_init_external(&req->qiov, &req->iov, 1); + + aioreq = blk_aio_readv(s->blk, sector_num, &req->qiov, nb_sectors, + ide_buffered_readv_cb, req); + + QLIST_INSERT_HEAD(&s->buffered_requests, req, list); + return aioreq; +} + static void ide_sector_read(IDEState *s); static void ide_sector_read_cb(void *opaque, int ret) @@ -574,7 +621,6 @@ static void ide_sector_read_cb(void *opaque, int ret) if (ret == -ECANCELED) { return; } - block_acct_done(blk_get_stats(s->blk), &s->acct); if (ret != 0) { if (ide_handle_rw_error(s, -ret, IDE_RETRY_PIO | IDE_RETRY_READ)) { @@ -582,6 +628,8 @@ static void ide_sector_read_cb(void *opaque, int ret) } } + block_acct_done(blk_get_stats(s->blk), &s->acct); + n = s->nsector; if (n > s->req_nb_sectors) { n = s->req_nb_sectors; @@ -621,6 +669,7 @@ static void ide_sector_read(IDEState *s) if (!ide_sect_range_ok(s, sector_num, n)) { ide_rw_error(s); + block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_READ); return; } @@ -630,8 +679,8 @@ static void ide_sector_read(IDEState *s) block_acct_start(blk_get_stats(s->blk), &s->acct, n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); - s->pio_aiocb = blk_aio_readv(s->blk, sector_num, &s->qiov, n, - ide_sector_read_cb, s); + s->pio_aiocb = ide_buffered_readv(s, sector_num, &s->qiov, n, + ide_sector_read_cb, s); } void dma_buf_commit(IDEState *s, uint32_t tx_bytes) @@ -672,6 +721,7 @@ static int ide_handle_rw_error(IDEState *s, int error, int op) assert(s->bus->retry_unit == s->unit); s->bus->error_status = op; } else if (action == BLOCK_ERROR_ACTION_REPORT) { + block_acct_failed(blk_get_stats(s->blk), &s->acct); if (op & IDE_RETRY_DMA) { ide_dma_error(s); } else { @@ -750,6 +800,7 @@ static void ide_dma_cb(void *opaque, int ret) if ((s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) && !ide_sect_range_ok(s, sector_num, n)) { ide_dma_error(s); + block_acct_invalid(blk_get_stats(s->blk), s->acct.type); return; } @@ -826,7 +877,6 @@ static void ide_sector_write_cb(void *opaque, int ret) if (ret == -ECANCELED) { return; } - block_acct_done(blk_get_stats(s->blk), &s->acct); s->pio_aiocb = NULL; s->status &= ~BUSY_STAT; @@ -837,6 +887,8 @@ static void ide_sector_write_cb(void *opaque, int ret) } } + block_acct_done(blk_get_stats(s->blk), &s->acct); + n = s->nsector; if (n > s->req_nb_sectors) { n = s->req_nb_sectors; @@ -887,6 +939,7 @@ static void ide_sector_write(IDEState *s) if (!ide_sect_range_ok(s, sector_num, n)) { ide_rw_error(s); + block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE); return; } @@ -895,7 +948,7 @@ static void ide_sector_write(IDEState *s) qemu_iovec_init_external(&s->qiov, &s->iov, 1); block_acct_start(blk_get_stats(s->blk), &s->acct, - n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); + n * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE); s->pio_aiocb = blk_aio_writev(s->blk, sector_num, &s->qiov, n, ide_sector_write_cb, s); } @@ -2312,7 +2365,7 @@ int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind, if (version) { pstrcpy(s->version, sizeof(s->version), version); } else { - pstrcpy(s->version, sizeof(s->version), qemu_get_version()); + pstrcpy(s->version, sizeof(s->version), qemu_hw_version()); } ide_reset(s); diff --git a/hw/ide/ich.c b/hw/ide/ich.c index 350c7f1c75..16925fa258 100644 --- a/hw/ide/ich.c +++ b/hw/ide/ich.c @@ -97,6 +97,13 @@ static void pci_ich9_reset(DeviceState *dev) ahci_reset(&d->ahci); } +static void pci_ich9_ahci_init(Object *obj) +{ + struct AHCIPCIState *d = ICH_AHCI(obj); + + ahci_init(&d->ahci, DEVICE(obj)); +} + static void pci_ich9_ahci_realize(PCIDevice *dev, Error **errp) { struct AHCIPCIState *d; @@ -104,7 +111,7 @@ static void pci_ich9_ahci_realize(PCIDevice *dev, Error **errp) uint8_t *sata_cap; d = ICH_AHCI(dev); - ahci_init(&d->ahci, DEVICE(dev), pci_get_address_space(dev), 6); + ahci_realize(&d->ahci, DEVICE(dev), pci_get_address_space(dev), 6); pci_config_set_prog_interface(dev->config, AHCI_PROGMODE_MAJOR_REV_1); @@ -171,6 +178,7 @@ static const TypeInfo ich_ahci_info = { .name = TYPE_ICH9_AHCI, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(AHCIPCIState), + .instance_init = pci_ich9_ahci_init, .class_init = ich_ahci_class_init, }; diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 05e93ffe3b..2d1e2d2d2f 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -343,6 +343,16 @@ enum ide_dma_cmd { #define ide_cmd_is_read(s) \ ((s)->dma_cmd == IDE_DMA_READ) +typedef struct IDEBufferedRequest { + QLIST_ENTRY(IDEBufferedRequest) list; + struct iovec iov; + QEMUIOVector qiov; + QEMUIOVector *original_qiov; + BlockCompletionFunc *original_cb; + void *original_opaque; + bool orphaned; +} IDEBufferedRequest; + /* NOTE: IDEState represents in fact one drive */ struct IDEState { IDEBus *bus; @@ -396,8 +406,9 @@ struct IDEState { BlockAIOCB *pio_aiocb; struct iovec iov; QEMUIOVector qiov; + QLIST_HEAD(, IDEBufferedRequest) buffered_requests; /* ATA DMA state */ - int32_t io_buffer_offset; + uint64_t io_buffer_offset; int32_t io_buffer_size; QEMUSGList sg; /* PIO transfer handling */ @@ -572,6 +583,9 @@ void ide_set_inactive(IDEState *s, bool more); BlockAIOCB *ide_issue_trim(BlockBackend *blk, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque); /* hw/ide/atapi.c */ void ide_atapi_cmd(IDEState *s); diff --git a/hw/ide/macio.c b/hw/ide/macio.c index 893c9b9bae..3ee962f830 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -286,7 +286,11 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret) return; done: - block_acct_done(blk_get_stats(s->blk), &s->acct); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->blk), &s->acct); + } else { + block_acct_done(blk_get_stats(s->blk), &s->acct); + } io->dma_end(opaque); return; @@ -348,7 +352,11 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) done: if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) { - block_acct_done(blk_get_stats(s->blk), &s->acct); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->blk), &s->acct); + } else { + block_acct_done(blk_get_stats(s->blk), &s->acct); + } } io->dma_end(opaque); } diff --git a/hw/ide/pci.c b/hw/ide/pci.c index d31ff885b7..37dbc291da 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -103,13 +103,6 @@ static int32_t bmdma_prepare_buf(IDEDMA *dma, int32_t limit) qemu_sglist_add(&s->sg, bm->cur_prd_addr, sg_len); } - /* Note: We limit the max transfer to be 2GiB. - * This should accommodate the largest ATA transaction - * for LBA48 (65,536 sectors) and 32K sector sizes. */ - if (s->sg.size > INT32_MAX) { - error_report("IDE: sglist describes more than 2GiB."); - break; - } bm->cur_prd_addr += l; bm->cur_prd_len -= l; s->io_buffer_size += l; @@ -240,6 +233,22 @@ void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val) /* Ignore writes to SSBM if it keeps the old value */ if ((val & BM_CMD_START) != (bm->cmd & BM_CMD_START)) { if (!(val & BM_CMD_START)) { + /* First invoke the callbacks of all buffered requests + * and flag those requests as orphaned. Ideally there + * are no unbuffered (Scatter Gather DMA Requests or + * write requests) pending and we can avoid to drain. */ + IDEBufferedRequest *req; + IDEState *s = idebus_active_if(bm->bus); + QLIST_FOREACH(req, &s->buffered_requests, list) { + if (!req->orphaned) { +#ifdef DEBUG_IDE + printf("%s: invoking cb %p of buffered request %p with" + " -ECANCELED\n", __func__, req->original_cb, req); +#endif + req->original_cb(req->original_opaque, -ECANCELED); + } + req->orphaned = true; + } /* * We can't cancel Scatter Gather DMA in the middle of the * operation or a partial (not full) DMA transfer would reach @@ -253,6 +262,9 @@ void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val) * aio operation with preadv/pwritev. */ if (bm->bus->dma->aiocb) { +#ifdef DEBUG_IDE + printf("%s: draining all remaining requests", __func__); +#endif blk_drain_all(); assert(bm->bus->dma->aiocb == NULL); } diff --git a/hw/input/hid.c b/hw/input/hid.c index 21ebd9e718..e39269fc7a 100644 --- a/hw/input/hid.c +++ b/hw/input/hid.c @@ -119,33 +119,33 @@ static void hid_pointer_event(DeviceState *dev, QemuConsole *src, assert(hs->n < QUEUE_LENGTH); e = &hs->ptr.queue[(hs->head + hs->n) & QUEUE_MASK]; - switch (evt->kind) { + switch (evt->type) { case INPUT_EVENT_KIND_REL: - if (evt->rel->axis == INPUT_AXIS_X) { - e->xdx += evt->rel->value; - } else if (evt->rel->axis == INPUT_AXIS_Y) { - e->ydy += evt->rel->value; + if (evt->u.rel->axis == INPUT_AXIS_X) { + e->xdx += evt->u.rel->value; + } else if (evt->u.rel->axis == INPUT_AXIS_Y) { + e->ydy += evt->u.rel->value; } break; case INPUT_EVENT_KIND_ABS: - if (evt->rel->axis == INPUT_AXIS_X) { - e->xdx = evt->rel->value; - } else if (evt->rel->axis == INPUT_AXIS_Y) { - e->ydy = evt->rel->value; + if (evt->u.rel->axis == INPUT_AXIS_X) { + e->xdx = evt->u.rel->value; + } else if (evt->u.rel->axis == INPUT_AXIS_Y) { + e->ydy = evt->u.rel->value; } break; case INPUT_EVENT_KIND_BTN: - if (evt->btn->down) { - e->buttons_state |= bmap[evt->btn->button]; - if (evt->btn->button == INPUT_BUTTON_WHEEL_UP) { + if (evt->u.btn->down) { + e->buttons_state |= bmap[evt->u.btn->button]; + if (evt->u.btn->button == INPUT_BUTTON_WHEEL_UP) { e->dz--; - } else if (evt->btn->button == INPUT_BUTTON_WHEEL_DOWN) { + } else if (evt->u.btn->button == INPUT_BUTTON_WHEEL_DOWN) { e->dz++; } } else { - e->buttons_state &= ~bmap[evt->btn->button]; + e->buttons_state &= ~bmap[evt->u.btn->button]; } break; @@ -223,8 +223,8 @@ static void hid_keyboard_event(DeviceState *dev, QemuConsole *src, int scancodes[3], i, count; int slot; - count = qemu_input_key_value_to_scancode(evt->key->key, - evt->key->down, + count = qemu_input_key_value_to_scancode(evt->u.key->key, + evt->u.key->down, scancodes); if (hs->n + count > QUEUE_LENGTH) { fprintf(stderr, "usb-kbd: warning: key event queue full\n"); diff --git a/hw/input/ps2.c b/hw/input/ps2.c index fdbe565e62..3d6d4961db 100644 --- a/hw/input/ps2.c +++ b/hw/input/ps2.c @@ -183,8 +183,8 @@ static void ps2_keyboard_event(DeviceState *dev, QemuConsole *src, int scancodes[3], i, count; qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); - count = qemu_input_key_value_to_scancode(evt->key->key, - evt->key->down, + count = qemu_input_key_value_to_scancode(evt->u.key->key, + evt->u.key->down, scancodes); for (i = 0; i < count; i++) { ps2_put_keycode(s, scancodes[i]); @@ -393,25 +393,25 @@ static void ps2_mouse_event(DeviceState *dev, QemuConsole *src, if (!(s->mouse_status & MOUSE_STATUS_ENABLED)) return; - switch (evt->kind) { + switch (evt->type) { case INPUT_EVENT_KIND_REL: - if (evt->rel->axis == INPUT_AXIS_X) { - s->mouse_dx += evt->rel->value; - } else if (evt->rel->axis == INPUT_AXIS_Y) { - s->mouse_dy -= evt->rel->value; + if (evt->u.rel->axis == INPUT_AXIS_X) { + s->mouse_dx += evt->u.rel->value; + } else if (evt->u.rel->axis == INPUT_AXIS_Y) { + s->mouse_dy -= evt->u.rel->value; } break; case INPUT_EVENT_KIND_BTN: - if (evt->btn->down) { - s->mouse_buttons |= bmap[evt->btn->button]; - if (evt->btn->button == INPUT_BUTTON_WHEEL_UP) { + if (evt->u.btn->down) { + s->mouse_buttons |= bmap[evt->u.btn->button]; + if (evt->u.btn->button == INPUT_BUTTON_WHEEL_UP) { s->mouse_dz--; - } else if (evt->btn->button == INPUT_BUTTON_WHEEL_DOWN) { + } else if (evt->u.btn->button == INPUT_BUTTON_WHEEL_DOWN) { s->mouse_dz++; } } else { - s->mouse_buttons &= ~bmap[evt->btn->button]; + s->mouse_buttons &= ~bmap[evt->u.btn->button]; } break; diff --git a/hw/input/tsc210x.c b/hw/input/tsc210x.c index fae3385636..1073bbfec6 100644 --- a/hw/input/tsc210x.c +++ b/hw/input/tsc210x.c @@ -1086,9 +1086,7 @@ uWireSlave *tsc2102_init(qemu_irq pint) { TSC210xState *s; - s = (TSC210xState *) - g_malloc0(sizeof(TSC210xState)); - memset(s, 0, sizeof(TSC210xState)); + s = g_new0(TSC210xState, 1); s->x = 160; s->y = 160; s->pressure = 0; @@ -1135,9 +1133,7 @@ uWireSlave *tsc2301_init(qemu_irq penirq, qemu_irq kbirq, qemu_irq dav) { TSC210xState *s; - s = (TSC210xState *) - g_malloc0(sizeof(TSC210xState)); - memset(s, 0, sizeof(TSC210xState)); + s = g_new0(TSC210xState, 1); s->x = 400; s->y = 240; s->pressure = 0; diff --git a/hw/input/virtio-input-hid.c b/hw/input/virtio-input-hid.c index 4d85dad4d1..bdd479cd0a 100644 --- a/hw/input/virtio-input-hid.c +++ b/hw/input/virtio-input-hid.c @@ -191,44 +191,45 @@ static void virtio_input_handle_event(DeviceState *dev, QemuConsole *src, virtio_input_event event; int qcode; - switch (evt->kind) { + switch (evt->type) { case INPUT_EVENT_KIND_KEY: - qcode = qemu_input_key_value_to_qcode(evt->key->key); + qcode = qemu_input_key_value_to_qcode(evt->u.key->key); if (qcode && keymap_qcode[qcode]) { event.type = cpu_to_le16(EV_KEY); event.code = cpu_to_le16(keymap_qcode[qcode]); - event.value = cpu_to_le32(evt->key->down ? 1 : 0); + event.value = cpu_to_le32(evt->u.key->down ? 1 : 0); virtio_input_send(vinput, &event); } else { - if (evt->key->down) { + if (evt->u.key->down) { fprintf(stderr, "%s: unmapped key: %d [%s]\n", __func__, qcode, QKeyCode_lookup[qcode]); } } break; case INPUT_EVENT_KIND_BTN: - if (keymap_button[evt->btn->button]) { + if (keymap_button[evt->u.btn->button]) { event.type = cpu_to_le16(EV_KEY); - event.code = cpu_to_le16(keymap_button[evt->btn->button]); - event.value = cpu_to_le32(evt->btn->down ? 1 : 0); + event.code = cpu_to_le16(keymap_button[evt->u.btn->button]); + event.value = cpu_to_le32(evt->u.btn->down ? 1 : 0); virtio_input_send(vinput, &event); } else { - if (evt->btn->down) { + if (evt->u.btn->down) { fprintf(stderr, "%s: unmapped button: %d [%s]\n", __func__, - evt->btn->button, InputButton_lookup[evt->btn->button]); + evt->u.btn->button, + InputButton_lookup[evt->u.btn->button]); } } break; case INPUT_EVENT_KIND_REL: event.type = cpu_to_le16(EV_REL); - event.code = cpu_to_le16(axismap_rel[evt->rel->axis]); - event.value = cpu_to_le32(evt->rel->value); + event.code = cpu_to_le16(axismap_rel[evt->u.rel->axis]); + event.value = cpu_to_le32(evt->u.rel->value); virtio_input_send(vinput, &event); break; case INPUT_EVENT_KIND_ABS: event.type = cpu_to_le16(EV_ABS); - event.code = cpu_to_le16(axismap_abs[evt->abs->axis]); - event.value = cpu_to_le32(evt->abs->value); + event.code = cpu_to_le16(axismap_abs[evt->u.abs->axis]); + event.value = cpu_to_le32(evt->u.abs->value); virtio_input_send(vinput, &event); break; default: diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c index 8bad132d5a..13e297d52e 100644 --- a/hw/intc/arm_gic.c +++ b/hw/intc/arm_gic.c @@ -35,8 +35,6 @@ static const uint8_t gic_id[] = { 0x90, 0x13, 0x04, 0x00, 0x0d, 0xf0, 0x05, 0xb1 }; -#define NUM_CPU(s) ((s)->num_cpu) - static inline int gic_get_current_cpu(GICState *s) { if (s->num_cpu > 1) { @@ -64,7 +62,7 @@ void gic_update(GICState *s) int cpu; int cm; - for (cpu = 0; cpu < NUM_CPU(s); cpu++) { + for (cpu = 0; cpu < s->num_cpu; cpu++) { cm = 1 << cpu; s->current_pending[cpu] = 1023; if (!(s->ctlr & (GICD_CTLR_EN_GRP0 | GICD_CTLR_EN_GRP1)) @@ -256,9 +254,9 @@ static void gic_activate_irq(GICState *s, int cpu, int irq) int bitno = preemption_level % 32; if (gic_has_groups(s) && GIC_TEST_GROUP(irq, (1 << cpu))) { - s->nsapr[regno][cpu] &= (1 << bitno); + s->nsapr[regno][cpu] |= (1 << bitno); } else { - s->apr[regno][cpu] &= (1 << bitno); + s->apr[regno][cpu] |= (1 << bitno); } s->running_priority[cpu] = prio; @@ -567,7 +565,7 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset, MemTxAttrs attrs) if (offset == 4) /* Interrupt Controller Type Register */ return ((s->num_irq / 32) - 1) - | ((NUM_CPU(s) - 1) << 5) + | ((s->num_cpu - 1) << 5) | (s->security_extn << 10); if (offset < 0x08) return 0; @@ -1284,7 +1282,7 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) * GIC v2 defines a larger memory region (0x1000) so this will need * to be extended when we implement A15. */ - for (i = 0; i < NUM_CPU(s); i++) { + for (i = 0; i < s->num_cpu; i++) { s->backref[i] = s; memory_region_init_io(&s->cpuiomem[i+1], OBJECT(s), &gic_cpu_ops, &s->backref[i], "gic_cpu", 0x100); diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 3ec8408618..6fc167e230 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -28,6 +28,7 @@ typedef struct { MemoryRegion gic_iomem_alias; MemoryRegion container; uint32_t num_irq; + qemu_irq sysresetreq; } nvic_state; #define TYPE_NVIC "armv7m_nvic" @@ -348,10 +349,13 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value) break; case 0xd0c: /* Application Interrupt/Reset Control. */ if ((value >> 16) == 0x05fa) { + if (value & 4) { + qemu_irq_pulse(s->sysresetreq); + } if (value & 2) { qemu_log_mask(LOG_UNIMP, "VECTCLRACTIVE unimplemented\n"); } - if (value & 5) { + if (value & 1) { qemu_log_mask(LOG_UNIMP, "AIRCR system reset unimplemented\n"); } if (value & 0x700) { @@ -535,11 +539,14 @@ static void armv7m_nvic_instance_init(Object *obj) * value in the GICState struct. */ GICState *s = ARM_GIC_COMMON(obj); + DeviceState *dev = DEVICE(obj); + nvic_state *nvic = NVIC(obj); /* The ARM v7m may have anything from 0 to 496 external interrupt * IRQ lines. We default to 64. Other boards may differ and should * set the num-irq property appropriately. */ s->num_irq = 64; + qdev_init_gpio_out_named(dev, &nvic->sysresetreq, "SYSRESETREQ", 1); } static void armv7m_nvic_class_init(ObjectClass *klass, void *data) diff --git a/hw/lm32/milkymist-hw.h b/hw/lm32/milkymist-hw.h index 8d20cac1db..c8dfb4d2d4 100644 --- a/hw/lm32/milkymist-hw.h +++ b/hw/lm32/milkymist-hw.h @@ -88,7 +88,8 @@ static inline DeviceState *milkymist_pfpu_create(hwaddr base, #ifdef CONFIG_OPENGL #include <X11/Xlib.h> -#include <GL/glx.h> +#include <epoxy/gl.h> +#include <epoxy/glx.h> static const int glx_fbconfig_attr[] = { GLX_GREEN_SIZE, 5, GLX_GREEN_SIZE, 6, diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 2bae994667..d5cdab2707 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -33,8 +33,7 @@ typedef struct pc_dimms_capacity { } pc_dimms_capacity; void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, - MemoryRegion *mr, uint64_t align, bool gap, - Error **errp) + MemoryRegion *mr, uint64_t align, Error **errp) { int slot; MachineState *machine = MACHINE(qdev_get_machine()); @@ -50,7 +49,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, addr = pc_dimm_get_free_addr(hpms->base, memory_region_size(&hpms->mr), - !addr ? NULL : &addr, align, gap, + !addr ? NULL : &addr, align, memory_region_size(mr), &local_err); if (local_err) { goto out; @@ -180,7 +179,7 @@ int qmp_pc_dimm_device_list(Object *obj, void *opaque) NULL); di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); - info->dimm = di; + info->u.dimm = di; elem->value = info; elem->next = NULL; **prev = elem; @@ -204,9 +203,9 @@ ram_addr_t get_current_ram_size(void) MemoryDeviceInfo *value = info->value; if (value) { - switch (value->kind) { + switch (value->type) { case MEMORY_DEVICE_INFO_KIND_DIMM: - size += value->dimm->size; + size += value->u.dimm->size; break; default: break; @@ -295,8 +294,8 @@ static int pc_dimm_built_list(Object *obj, void *opaque) uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_size, - uint64_t *hint, uint64_t align, bool gap, - uint64_t size, Error **errp) + uint64_t *hint, uint64_t align, uint64_t size, + Error **errp) { GSList *list = NULL, *item; uint64_t new_addr, ret = 0; @@ -341,15 +340,13 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, goto out; } - if (ranges_overlap(dimm->addr, dimm_size, new_addr, - size + (gap ? 1 : 0))) { + if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) { if (hint) { DeviceState *d = DEVICE(dimm); error_setg(errp, "address range conflicts with '%s'", d->id); goto out; } - new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size + (gap ? 1 : 0), - align); + new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size, align); } } ret = new_addr; diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c index 10fcca33f8..f76a9fd36b 100644 --- a/hw/mips/gt64xxx_pci.c +++ b/hw/mips/gt64xxx_pci.c @@ -275,7 +275,8 @@ static void check_reserved_space (hwaddr *start, static void gt64120_isd_mapping(GT64120State *s) { - hwaddr start = s->regs[GT_ISD] << 21; + /* Bits 14:0 of ISD map to bits 35:21 of the start address. */ + hwaddr start = ((hwaddr)s->regs[GT_ISD] << 21) & 0xFFFE00000ull; hwaddr length = 0x1000; if (s->ISD_length) { diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c index c1f570a79f..91c36baa55 100644 --- a/hw/mips/mips_malta.c +++ b/hw/mips/mips_malta.c @@ -901,7 +901,7 @@ static void main_cpu_reset(void *opaque) if (kvm_enabled()) { /* Start running from the bootloader we wrote to end of RAM */ - env->active_tc.PC = 0x40000000 + loaderparams.ram_size; + env->active_tc.PC = 0x40000000 + loaderparams.ram_low_size; } } diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs index 4aa76ffec9..aeb6b7da74 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -36,6 +36,7 @@ obj-$(CONFIG_OMAP) += omap_sdrc.o obj-$(CONFIG_OMAP) += omap_tap.o obj-$(CONFIG_SLAVIO) += slavio_misc.o obj-$(CONFIG_ZYNQ) += zynq_slcr.o +obj-$(CONFIG_ZYNQ) += zynq-xadc.o obj-$(CONFIG_STM32F2XX_SYSCFG) += stm32f2xx_syscfg.o obj-$(CONFIG_PVPANIC) += pvpanic.o diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 83d7bd3e5f..f73f0c2b17 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -60,8 +60,6 @@ #define IVSHMEM(obj) \ OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) -#define IVSHMEM_MEMDEV_PROP "memdev" - typedef struct Peer { int nb_eventfds; EventNotifier *eventfds; @@ -857,8 +855,8 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) PCI_BASE_ADDRESS_MEM_PREFETCH; if (!!s->server_chr + !!s->shmobj + !!s->hostmem != 1) { - error_setg(errp, "You must specify either a shmobj, a chardev" - " or a hostmem"); + error_setg(errp, + "You must specify either 'shm', 'chardev' or 'x-memdev'"); return; } @@ -939,6 +937,7 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) memory_region_add_subregion(&s->bar, 0, mr); pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar); } else if (s->server_chr != NULL) { + /* FIXME do not rely on what chr drivers put into filename */ if (strncmp(s->server_chr->filename, "unix:", 5)) { error_setg(errp, "chardev is not a unix client socket"); return; @@ -1181,7 +1180,7 @@ static void ivshmem_init(Object *obj) { IVShmemState *s = IVSHMEM(obj); - object_property_add_link(obj, IVSHMEM_MEMDEV_PROP, TYPE_MEMORY_BACKEND, + object_property_add_link(obj, "x-memdev", TYPE_MEMORY_BACKEND, (Object **)&s->hostmem, ivshmem_check_memdev_is_busy, OBJ_PROP_LINK_UNREF_ON_RELEASE, diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c index 0fd75b376f..9db4c64150 100644 --- a/hw/misc/macio/cuda.c +++ b/hw/misc/macio/cuda.c @@ -57,6 +57,8 @@ #define IER_SET 0x80 /* set bits in IER */ #define IER_CLR 0 /* clear bits in IER */ #define SR_INT 0x04 /* Shift register full/empty */ +#define SR_DATA_INT 0x08 +#define SR_CLOCK_INT 0x10 #define T1_INT 0x40 /* Timer 1 interrupt */ #define T2_INT 0x20 /* Timer 2 interrupt */ @@ -108,6 +110,24 @@ /* CUDA returns time_t's offset from Jan 1, 1904, not 1970 */ #define RTC_OFFSET 2082844800 +/* CUDA registers */ +#define CUDA_REG_B 0x00 +#define CUDA_REG_A 0x01 +#define CUDA_REG_DIRB 0x02 +#define CUDA_REG_DIRA 0x03 +#define CUDA_REG_T1CL 0x04 +#define CUDA_REG_T1CH 0x05 +#define CUDA_REG_T1LL 0x06 +#define CUDA_REG_T1LH 0x07 +#define CUDA_REG_T2CL 0x08 +#define CUDA_REG_T2CH 0x09 +#define CUDA_REG_SR 0x0a +#define CUDA_REG_ACR 0x0b +#define CUDA_REG_PCR 0x0c +#define CUDA_REG_IFR 0x0d +#define CUDA_REG_IER 0x0e +#define CUDA_REG_ANH 0x0f + static void cuda_update(CUDAState *s); static void cuda_receive_packet_from_host(CUDAState *s, const uint8_t *data, int len); @@ -116,47 +136,48 @@ static void cuda_timer_update(CUDAState *s, CUDATimer *ti, static void cuda_update_irq(CUDAState *s) { - if (s->ifr & s->ier & (SR_INT | T1_INT)) { + if (s->ifr & s->ier & (SR_INT | T1_INT | T2_INT)) { qemu_irq_raise(s->irq); } else { qemu_irq_lower(s->irq); } } -static uint64_t get_tb(uint64_t freq) +static uint64_t get_tb(uint64_t time, uint64_t freq) { - return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - freq, get_ticks_per_sec()); + return muldiv64(time, freq, get_ticks_per_sec()); } -static unsigned int get_counter(CUDATimer *s) +static unsigned int get_counter(CUDATimer *ti) { int64_t d; unsigned int counter; uint64_t tb_diff; + uint64_t current_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); /* Reverse of the tb calculation algorithm that Mac OS X uses on bootup. */ - tb_diff = get_tb(s->frequency) - s->load_time; - d = (tb_diff * 0xBF401675E5DULL) / (s->frequency << 24); + tb_diff = get_tb(current_time, ti->frequency) - ti->load_time; + d = (tb_diff * 0xBF401675E5DULL) / (ti->frequency << 24); - if (s->index == 0) { + if (ti->index == 0) { /* the timer goes down from latch to -1 (period of latch + 2) */ - if (d <= (s->counter_value + 1)) { - counter = (s->counter_value - d) & 0xffff; + if (d <= (ti->counter_value + 1)) { + counter = (ti->counter_value - d) & 0xffff; } else { - counter = (d - (s->counter_value + 1)) % (s->latch + 2); - counter = (s->latch - counter) & 0xffff; + counter = (d - (ti->counter_value + 1)) % (ti->latch + 2); + counter = (ti->latch - counter) & 0xffff; } } else { - counter = (s->counter_value - d) & 0xffff; + counter = (ti->counter_value - d) & 0xffff; } return counter; } static void set_counter(CUDAState *s, CUDATimer *ti, unsigned int val) { - CUDA_DPRINTF("T%d.counter=%d\n", 1 + (ti->timer == NULL), val); - ti->load_time = get_tb(s->frequency); + CUDA_DPRINTF("T%d.counter=%d\n", 1 + ti->index, val); + ti->load_time = get_tb(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + s->frequency); ti->counter_value = val; cuda_timer_update(s, ti, ti->load_time); } @@ -199,7 +220,7 @@ static void cuda_timer_update(CUDAState *s, CUDATimer *ti, { if (!ti->timer) return; - if ((s->acr & T1MODE) != T1MODE_CONT) { + if (ti->index == 0 && (s->acr & T1MODE) != T1MODE_CONT) { timer_del(ti->timer); } else { ti->next_irq_time = get_next_irq_time(ti, current_time); @@ -217,6 +238,41 @@ static void cuda_timer1(void *opaque) cuda_update_irq(s); } +static void cuda_timer2(void *opaque) +{ + CUDAState *s = opaque; + CUDATimer *ti = &s->timers[1]; + + cuda_timer_update(s, ti, ti->next_irq_time); + s->ifr |= T2_INT; + cuda_update_irq(s); +} + +static void cuda_set_sr_int(void *opaque) +{ + CUDAState *s = opaque; + + CUDA_DPRINTF("CUDA: %s:%d\n", __func__, __LINE__); + s->ifr |= SR_INT; + cuda_update_irq(s); +} + +static void cuda_delay_set_sr_int(CUDAState *s) +{ + int64_t expire; + + if (s->dirb == 0xff) { + /* Not in Mac OS, fire the IRQ directly */ + cuda_set_sr_int(s); + return; + } + + CUDA_DPRINTF("CUDA: %s:%d\n", __func__, __LINE__); + + expire = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 300 * SCALE_US; + timer_mod(s->sr_delay_timer, expire); +} + static uint32_t cuda_readb(void *opaque, hwaddr addr) { CUDAState *s = opaque; @@ -224,66 +280,68 @@ static uint32_t cuda_readb(void *opaque, hwaddr addr) addr = (addr >> 9) & 0xf; switch(addr) { - case 0: + case CUDA_REG_B: val = s->b; break; - case 1: + case CUDA_REG_A: val = s->a; break; - case 2: + case CUDA_REG_DIRB: val = s->dirb; break; - case 3: + case CUDA_REG_DIRA: val = s->dira; break; - case 4: + case CUDA_REG_T1CL: val = get_counter(&s->timers[0]) & 0xff; s->ifr &= ~T1_INT; cuda_update_irq(s); break; - case 5: + case CUDA_REG_T1CH: val = get_counter(&s->timers[0]) >> 8; cuda_update_irq(s); break; - case 6: + case CUDA_REG_T1LL: val = s->timers[0].latch & 0xff; break; - case 7: + case CUDA_REG_T1LH: /* XXX: check this */ val = (s->timers[0].latch >> 8) & 0xff; break; - case 8: + case CUDA_REG_T2CL: val = get_counter(&s->timers[1]) & 0xff; s->ifr &= ~T2_INT; + cuda_update_irq(s); break; - case 9: + case CUDA_REG_T2CH: val = get_counter(&s->timers[1]) >> 8; break; - case 10: + case CUDA_REG_SR: val = s->sr; - s->ifr &= ~SR_INT; + s->ifr &= ~(SR_INT | SR_CLOCK_INT | SR_DATA_INT); cuda_update_irq(s); break; - case 11: + case CUDA_REG_ACR: val = s->acr; break; - case 12: + case CUDA_REG_PCR: val = s->pcr; break; - case 13: + case CUDA_REG_IFR: val = s->ifr; - if (s->ifr & s->ier) + if (s->ifr & s->ier) { val |= 0x80; + } break; - case 14: + case CUDA_REG_IER: val = s->ier | 0x80; break; default: - case 15: + case CUDA_REG_ANH: val = s->anh; break; } - if (addr != 13 || val != 0) { + if (addr != CUDA_REG_IFR || val != 0) { CUDA_DPRINTF("read: reg=0x%x val=%02x\n", (int)addr, val); } @@ -298,61 +356,65 @@ static void cuda_writeb(void *opaque, hwaddr addr, uint32_t val) CUDA_DPRINTF("write: reg=0x%x val=%02x\n", (int)addr, val); switch(addr) { - case 0: + case CUDA_REG_B: s->b = val; cuda_update(s); break; - case 1: + case CUDA_REG_A: s->a = val; break; - case 2: + case CUDA_REG_DIRB: s->dirb = val; break; - case 3: + case CUDA_REG_DIRA: s->dira = val; break; - case 4: + case CUDA_REG_T1CL: s->timers[0].latch = (s->timers[0].latch & 0xff00) | val; cuda_timer_update(s, &s->timers[0], qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); break; - case 5: + case CUDA_REG_T1CH: s->timers[0].latch = (s->timers[0].latch & 0xff) | (val << 8); s->ifr &= ~T1_INT; set_counter(s, &s->timers[0], s->timers[0].latch); break; - case 6: + case CUDA_REG_T1LL: s->timers[0].latch = (s->timers[0].latch & 0xff00) | val; cuda_timer_update(s, &s->timers[0], qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); break; - case 7: + case CUDA_REG_T1LH: s->timers[0].latch = (s->timers[0].latch & 0xff) | (val << 8); s->ifr &= ~T1_INT; cuda_timer_update(s, &s->timers[0], qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); break; - case 8: - s->timers[1].latch = val; - set_counter(s, &s->timers[1], val); + case CUDA_REG_T2CL: + s->timers[1].latch = (s->timers[1].latch & 0xff00) | val; break; - case 9: - set_counter(s, &s->timers[1], (val << 8) | s->timers[1].latch); + case CUDA_REG_T2CH: + /* To ensure T2 generates an interrupt on zero crossing with the + common timer code, write the value directly from the latch to + the counter */ + s->timers[1].latch = (s->timers[1].latch & 0xff) | (val << 8); + s->ifr &= ~T2_INT; + set_counter(s, &s->timers[1], s->timers[1].latch); break; - case 10: + case CUDA_REG_SR: s->sr = val; break; - case 11: + case CUDA_REG_ACR: s->acr = val; cuda_timer_update(s, &s->timers[0], qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); cuda_update(s); break; - case 12: + case CUDA_REG_PCR: s->pcr = val; break; - case 13: + case CUDA_REG_IFR: /* reset bits */ s->ifr &= ~val; cuda_update_irq(s); break; - case 14: + case CUDA_REG_IER: if (val & IER_SET) { /* set bits */ s->ier |= val & 0x7f; @@ -363,7 +425,7 @@ static void cuda_writeb(void *opaque, hwaddr addr, uint32_t val) cuda_update_irq(s); break; default: - case 15: + case CUDA_REG_ANH: s->anh = val; break; } @@ -384,8 +446,7 @@ static void cuda_update(CUDAState *s) if (s->data_out_index < sizeof(s->data_out)) { CUDA_DPRINTF("send: %02x\n", s->sr); s->data_out[s->data_out_index++] = s->sr; - s->ifr |= SR_INT; - cuda_update_irq(s); + cuda_delay_set_sr_int(s); } } } else { @@ -398,8 +459,7 @@ static void cuda_update(CUDAState *s) if (s->data_in_index >= s->data_in_size) { s->b = (s->b | TREQ); } - s->ifr |= SR_INT; - cuda_update_irq(s); + cuda_delay_set_sr_int(s); } } } @@ -411,15 +471,13 @@ static void cuda_update(CUDAState *s) s->b = (s->b | TREQ); else s->b = (s->b & ~TREQ); - s->ifr |= SR_INT; - cuda_update_irq(s); + cuda_delay_set_sr_int(s); } else { if (!(s->last_b & TIP)) { /* handle end of host to cuda transfer */ packet_received = (s->data_out_index > 0); /* always an IRQ at the end of transfer */ - s->ifr |= SR_INT; - cuda_update_irq(s); + cuda_delay_set_sr_int(s); } /* signal if there is data to read */ if (s->data_in_index < s->data_in_size) { @@ -456,8 +514,7 @@ static void cuda_send_packet_to_host(CUDAState *s, s->data_in_size = len; s->data_in_index = 0; cuda_update(s); - s->ifr |= SR_INT; - cuda_update_irq(s); + cuda_delay_set_sr_int(s); } static void cuda_adb_poll(void *opaque) @@ -480,7 +537,7 @@ static void cuda_adb_poll(void *opaque) static void cuda_receive_packet(CUDAState *s, const uint8_t *data, int len) { - uint8_t obuf[16]; + uint8_t obuf[16] = { CUDA_PACKET, 0, data[0] }; int autopoll; uint32_t ti; @@ -497,23 +554,18 @@ static void cuda_receive_packet(CUDAState *s, timer_del(s->adb_poll_timer); } } - obuf[0] = CUDA_PACKET; - obuf[1] = data[1]; - cuda_send_packet_to_host(s, obuf, 2); + cuda_send_packet_to_host(s, obuf, 3); + break; + case CUDA_GET_6805_ADDR: + cuda_send_packet_to_host(s, obuf, 3); break; case CUDA_SET_TIME: ti = (((uint32_t)data[1]) << 24) + (((uint32_t)data[2]) << 16) + (((uint32_t)data[3]) << 8) + data[4]; s->tick_offset = ti - (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / get_ticks_per_sec()); - obuf[0] = CUDA_PACKET; - obuf[1] = 0; - obuf[2] = 0; cuda_send_packet_to_host(s, obuf, 3); break; case CUDA_GET_TIME: ti = s->tick_offset + (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / get_ticks_per_sec()); - obuf[0] = CUDA_PACKET; - obuf[1] = 0; - obuf[2] = 0; obuf[3] = ti >> 24; obuf[4] = ti >> 16; obuf[5] = ti >> 8; @@ -524,22 +576,34 @@ static void cuda_receive_packet(CUDAState *s, case CUDA_SET_DEVICE_LIST: case CUDA_SET_AUTO_RATE: case CUDA_SET_POWER_MESSAGES: - obuf[0] = CUDA_PACKET; - obuf[1] = 0; - cuda_send_packet_to_host(s, obuf, 2); + cuda_send_packet_to_host(s, obuf, 3); break; case CUDA_POWERDOWN: - obuf[0] = CUDA_PACKET; - obuf[1] = 0; - cuda_send_packet_to_host(s, obuf, 2); + cuda_send_packet_to_host(s, obuf, 3); qemu_system_shutdown_request(); break; case CUDA_RESET_SYSTEM: - obuf[0] = CUDA_PACKET; - obuf[1] = 0; - cuda_send_packet_to_host(s, obuf, 2); + cuda_send_packet_to_host(s, obuf, 3); qemu_system_reset_request(); break; + case CUDA_COMBINED_FORMAT_IIC: + obuf[0] = ERROR_PACKET; + obuf[1] = 0x5; + obuf[2] = CUDA_PACKET; + obuf[3] = data[0]; + cuda_send_packet_to_host(s, obuf, 4); + break; + case CUDA_GET_SET_IIC: + if (len == 4) { + cuda_send_packet_to_host(s, obuf, 3); + } else { + obuf[0] = ERROR_PACKET; + obuf[1] = 0x2; + obuf[2] = CUDA_PACKET; + obuf[3] = data[0]; + cuda_send_packet_to_host(s, obuf, 4); + } + break; default: break; } @@ -560,19 +624,21 @@ static void cuda_receive_packet_from_host(CUDAState *s, switch(data[0]) { case ADB_PACKET: { - uint8_t obuf[ADB_MAX_OUT_LEN + 2]; + uint8_t obuf[ADB_MAX_OUT_LEN + 3]; int olen; olen = adb_request(&s->adb_bus, obuf + 2, data + 1, len - 1); if (olen > 0) { obuf[0] = ADB_PACKET; obuf[1] = 0x00; + cuda_send_packet_to_host(s, obuf, olen + 2); } else { /* error */ obuf[0] = ADB_PACKET; obuf[1] = -olen; + obuf[2] = data[1]; olen = 0; + cuda_send_packet_to_host(s, obuf, olen + 3); } - cuda_send_packet_to_host(s, obuf, olen + 2); } break; case CUDA_PACKET: @@ -671,7 +737,7 @@ static void cuda_reset(DeviceState *dev) s->b = 0; s->a = 0; - s->dirb = 0; + s->dirb = 0xff; s->dira = 0; s->sr = 0; s->acr = 0; @@ -688,8 +754,9 @@ static void cuda_reset(DeviceState *dev) s->timers[0].latch = 0xffff; set_counter(s, &s->timers[0], 0xffff); - s->timers[1].latch = 0; - set_counter(s, &s->timers[1], 0xffff); + s->timers[1].latch = 0xffff; + + s->sr_delay_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, cuda_set_sr_int, s); } static void cuda_realizefn(DeviceState *dev, Error **errp) @@ -699,7 +766,8 @@ static void cuda_realizefn(DeviceState *dev, Error **errp) s->timers[0].timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, cuda_timer1, s); s->timers[0].frequency = s->frequency; - s->timers[1].frequency = s->frequency; + s->timers[1].timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, cuda_timer2, s); + s->timers[1].frequency = (SCALE_US * 6000) / 4700; qemu_get_timedate(&tm, 0); s->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET; diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c index 779683cc07..5ee8f022a5 100644 --- a/hw/misc/macio/mac_dbdma.c +++ b/hw/misc/macio/mac_dbdma.c @@ -557,7 +557,6 @@ void DBDMA_register_channel(void *dbdma, int nchan, qemu_irq irq, DBDMA_DPRINTF("DBDMA_register_channel 0x%x\n", nchan); ch->irq = irq; - ch->channel = nchan; ch->rw = rw; ch->flush = flush; ch->io.opaque = opaque; @@ -753,6 +752,7 @@ void* DBDMA_init (MemoryRegion **dbdma_mem) for (i = 0; i < DBDMA_CHANNELS; i++) { DBDMA_io *io = &s->channels[i].io; qemu_iovec_init(&io->iov, 1); + s->channels[i].channel = i; } memory_region_init_io(&s->mem, NULL, &dbdma_ops, s, "dbdma", 0x1000); diff --git a/hw/misc/zynq-xadc.c b/hw/misc/zynq-xadc.c new file mode 100644 index 0000000000..1a32595455 --- /dev/null +++ b/hw/misc/zynq-xadc.c @@ -0,0 +1,302 @@ +/* + * ADC registers for Xilinx Zynq Platform + * + * Copyright (c) 2015 Guenter Roeck + * Based on hw/misc/zynq_slcr.c, written by Michal Simek + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/hw.h" +#include "hw/misc/zynq-xadc.h" +#include "qemu/timer.h" +#include "sysemu/sysemu.h" + +enum { + CFG = 0x000 / 4, + INT_STS, + INT_MASK, + MSTS, + CMDFIFO, + RDFIFO, + MCTL, +}; + +#define CFG_ENABLE BIT(31) +#define CFG_CFIFOTH_SHIFT 20 +#define CFG_CFIFOTH_LENGTH 4 +#define CFG_DFIFOTH_SHIFT 16 +#define CFG_DFIFOTH_LENGTH 4 +#define CFG_WEDGE BIT(13) +#define CFG_REDGE BIT(12) +#define CFG_TCKRATE_SHIFT 8 +#define CFG_TCKRATE_LENGTH 2 + +#define CFG_TCKRATE_DIV(x) (0x1 << (x - 1)) + +#define CFG_IGAP_SHIFT 0 +#define CFG_IGAP_LENGTH 5 + +#define INT_CFIFO_LTH BIT(9) +#define INT_DFIFO_GTH BIT(8) +#define INT_OT BIT(7) +#define INT_ALM_SHIFT 0 +#define INT_ALM_LENGTH 7 +#define INT_ALM_MASK (((1 << INT_ALM_LENGTH) - 1) << INT_ALM_SHIFT) + +#define INT_ALL (INT_CFIFO_LTH | INT_DFIFO_GTH | INT_OT | INT_ALM_MASK) + +#define MSTS_CFIFO_LVL_SHIFT 16 +#define MSTS_CFIFO_LVL_LENGTH 4 +#define MSTS_DFIFO_LVL_SHIFT 12 +#define MSTS_DFIFO_LVL_LENGTH 4 +#define MSTS_CFIFOF BIT(11) +#define MSTS_CFIFOE BIT(10) +#define MSTS_DFIFOF BIT(9) +#define MSTS_DFIFOE BIT(8) +#define MSTS_OT BIT(7) +#define MSTS_ALM_SHIFT 0 +#define MSTS_ALM_LENGTH 7 + +#define MCTL_RESET BIT(4) + +#define CMD_NOP 0x00 +#define CMD_READ 0x01 +#define CMD_WRITE 0x02 + +static void zynq_xadc_update_ints(ZynqXADCState *s) +{ + + /* We are fast, commands are actioned instantly so the CFIFO is always + * empty (and below threshold). + */ + s->regs[INT_STS] |= INT_CFIFO_LTH; + + if (s->xadc_dfifo_entries > + extract32(s->regs[CFG], CFG_DFIFOTH_SHIFT, CFG_DFIFOTH_LENGTH)) { + s->regs[INT_STS] |= INT_DFIFO_GTH; + } + + qemu_set_irq(s->qemu_irq, !!(s->regs[INT_STS] & ~s->regs[INT_MASK])); +} + +static void zynq_xadc_reset(DeviceState *d) +{ + ZynqXADCState *s = ZYNQ_XADC(d); + + s->regs[CFG] = 0x14 << CFG_IGAP_SHIFT | + CFG_TCKRATE_DIV(4) << CFG_TCKRATE_SHIFT | CFG_REDGE; + s->regs[INT_STS] = INT_CFIFO_LTH; + s->regs[INT_MASK] = 0xffffffff; + s->regs[CMDFIFO] = 0; + s->regs[RDFIFO] = 0; + s->regs[MCTL] = MCTL_RESET; + + memset(s->xadc_regs, 0, sizeof(s->xadc_regs)); + memset(s->xadc_dfifo, 0, sizeof(s->xadc_dfifo)); + s->xadc_dfifo_entries = 0; + + zynq_xadc_update_ints(s); +} + +static uint16_t xadc_pop_dfifo(ZynqXADCState *s) +{ + uint16_t rv = s->xadc_dfifo[0]; + int i; + + if (s->xadc_dfifo_entries > 0) { + s->xadc_dfifo_entries--; + } + for (i = 0; i < s->xadc_dfifo_entries; i++) { + s->xadc_dfifo[i] = s->xadc_dfifo[i + 1]; + } + s->xadc_dfifo[s->xadc_dfifo_entries] = 0; + zynq_xadc_update_ints(s); + return rv; +} + +static void xadc_push_dfifo(ZynqXADCState *s, uint16_t regval) +{ + if (s->xadc_dfifo_entries < ZYNQ_XADC_FIFO_DEPTH) { + s->xadc_dfifo[s->xadc_dfifo_entries++] = s->xadc_read_reg_previous; + } + s->xadc_read_reg_previous = regval; + zynq_xadc_update_ints(s); +} + +static bool zynq_xadc_check_offset(hwaddr offset, bool rnw) +{ + switch (offset) { + case CFG: + case INT_MASK: + case INT_STS: + case MCTL: + return true; + case RDFIFO: + case MSTS: + return rnw; /* read only */ + case CMDFIFO: + return !rnw; /* write only */ + default: + return false; + } +} + +static uint64_t zynq_xadc_read(void *opaque, hwaddr offset, unsigned size) +{ + ZynqXADCState *s = opaque; + int reg = offset / 4; + uint32_t rv = 0; + + if (!zynq_xadc_check_offset(reg, true)) { + qemu_log_mask(LOG_GUEST_ERROR, "zynq_xadc: Invalid read access to " + "addr %" HWADDR_PRIx "\n", offset); + return 0; + } + + switch (reg) { + case CFG: + case INT_MASK: + case INT_STS: + case MCTL: + rv = s->regs[reg]; + break; + case MSTS: + rv = MSTS_CFIFOE; + rv |= s->xadc_dfifo_entries << MSTS_DFIFO_LVL_SHIFT; + if (!s->xadc_dfifo_entries) { + rv |= MSTS_DFIFOE; + } else if (s->xadc_dfifo_entries == ZYNQ_XADC_FIFO_DEPTH) { + rv |= MSTS_DFIFOF; + } + break; + case RDFIFO: + rv = xadc_pop_dfifo(s); + break; + } + return rv; +} + +static void zynq_xadc_write(void *opaque, hwaddr offset, uint64_t val, + unsigned size) +{ + ZynqXADCState *s = (ZynqXADCState *)opaque; + int reg = offset / 4; + int xadc_reg; + int xadc_cmd; + int xadc_data; + + if (!zynq_xadc_check_offset(reg, false)) { + qemu_log_mask(LOG_GUEST_ERROR, "zynq_xadc: Invalid write access " + "to addr %" HWADDR_PRIx "\n", offset); + return; + } + + switch (reg) { + case CFG: + s->regs[CFG] = val; + break; + case INT_STS: + s->regs[INT_STS] &= ~val; + break; + case INT_MASK: + s->regs[INT_MASK] = val & INT_ALL; + break; + case CMDFIFO: + xadc_cmd = extract32(val, 26, 4); + xadc_reg = extract32(val, 16, 10); + xadc_data = extract32(val, 0, 16); + + if (s->regs[MCTL] & MCTL_RESET) { + qemu_log_mask(LOG_GUEST_ERROR, "zynq_xadc: Sending command " + "while comm channel held in reset: %" PRIx32 "\n", + (uint32_t) val); + break; + } + + if (xadc_reg > ZYNQ_XADC_NUM_ADC_REGS && xadc_cmd != CMD_NOP) { + qemu_log_mask(LOG_GUEST_ERROR, "read/write op to invalid xadc " + "reg 0x%x\n", xadc_reg); + break; + } + + switch (xadc_cmd) { + case CMD_READ: + xadc_push_dfifo(s, s->xadc_regs[xadc_reg]); + break; + case CMD_WRITE: + s->xadc_regs[xadc_reg] = xadc_data; + /* fallthrough */ + case CMD_NOP: + xadc_push_dfifo(s, 0); + break; + } + break; + case MCTL: + s->regs[MCTL] = val & 0x00fffeff; + break; + } + zynq_xadc_update_ints(s); +} + +static const MemoryRegionOps xadc_ops = { + .read = zynq_xadc_read, + .write = zynq_xadc_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void zynq_xadc_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + ZynqXADCState *s = ZYNQ_XADC(obj); + + memory_region_init_io(&s->iomem, obj, &xadc_ops, s, "zynq-xadc", + ZYNQ_XADC_MMIO_SIZE); + sysbus_init_mmio(sbd, &s->iomem); + sysbus_init_irq(sbd, &s->qemu_irq); +} + +static const VMStateDescription vmstate_zynq_xadc = { + .name = "zynq-xadc", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(regs, ZynqXADCState, ZYNQ_XADC_NUM_IO_REGS), + VMSTATE_UINT16_ARRAY(xadc_regs, ZynqXADCState, + ZYNQ_XADC_NUM_ADC_REGS), + VMSTATE_UINT16_ARRAY(xadc_dfifo, ZynqXADCState, + ZYNQ_XADC_FIFO_DEPTH), + VMSTATE_UINT16(xadc_read_reg_previous, ZynqXADCState), + VMSTATE_UINT16(xadc_dfifo_entries, ZynqXADCState), + VMSTATE_END_OF_LIST() + } +}; + +static void zynq_xadc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_zynq_xadc; + dc->reset = zynq_xadc_reset; +} + +static const TypeInfo zynq_xadc_info = { + .class_init = zynq_xadc_class_init, + .name = TYPE_ZYNQ_XADC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(ZynqXADCState), + .instance_init = zynq_xadc_init, +}; + +static void zynq_xadc_register_types(void) +{ + type_register_static(&zynq_xadc_info); +} + +type_init(zynq_xadc_register_types) diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 153124485d..bec06e90ec 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -37,24 +37,26 @@ #include "e1000_regs.h" +static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + #define E1000_DEBUG #ifdef E1000_DEBUG enum { - DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT, - DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM, - DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR, + DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT, + DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM, + DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR, DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET, }; -#define DBGBIT(x) (1<<DEBUG_##x) +#define DBGBIT(x) (1<<DEBUG_##x) static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL); -#define DBGOUT(what, fmt, ...) do { \ +#define DBGOUT(what, fmt, ...) do { \ if (debugflags & DBGBIT(what)) \ fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \ } while (0) #else -#define DBGOUT(what, fmt, ...) do {} while (0) +#define DBGOUT(what, fmt, ...) do {} while (0) #endif #define IOPORT_SIZE 0x40 @@ -118,7 +120,7 @@ typedef struct E1000State_st { } tx; struct { - uint32_t val_in; // shifted in from guest driver + uint32_t val_in; /* shifted in from guest driver */ uint16_t bitnum_in; uint16_t bitnum_out; uint16_t reading; @@ -135,11 +137,15 @@ typedef struct E1000State_st { /* Compatibility flags for migration to/from qemu 1.3.0 and older */ #define E1000_FLAG_AUTONEG_BIT 0 #define E1000_FLAG_MIT_BIT 1 +#define E1000_FLAG_MAC_BIT 2 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT) #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT) +#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT) uint32_t compat_flags; } E1000State; +#define chkflag(x) (s->compat_flags & E1000_FLAG_##x) + typedef struct E1000BaseClass { PCIDeviceClass parent_class; uint16_t phy_id2; @@ -155,20 +161,36 @@ typedef struct E1000BaseClass { #define E1000_DEVICE_GET_CLASS(obj) \ OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE) -#define defreg(x) x = (E1000_##x>>2) +#define defreg(x) x = (E1000_##x>>2) enum { - defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC), - defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC), - defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC), - defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH), - defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT), - defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH), - defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT), - defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL), - defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC), - defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA), - defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV), - defreg(ITR), + defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC), + defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC), + defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC), + defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH), + defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT), + defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH), + defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT), + defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL), + defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC), + defreg(RA), defreg(MTA), defreg(CRCERRS), defreg(VFTA), + defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV), + defreg(ITR), defreg(FCRUC), defreg(TDFH), defreg(TDFT), + defreg(TDFHS), defreg(TDFTS), defreg(TDFPC), defreg(RDFH), + defreg(RDFT), defreg(RDFHS), defreg(RDFTS), defreg(RDFPC), + defreg(IPAV), defreg(WUC), defreg(WUS), defreg(AIT), + defreg(IP6AT), defreg(IP4AT), defreg(FFLT), defreg(FFMT), + defreg(FFVT), defreg(WUPM), defreg(PBM), defreg(SCC), + defreg(ECOL), defreg(MCC), defreg(LATECOL), defreg(COLC), + defreg(DC), defreg(TNCRS), defreg(SEC), defreg(CEXTERR), + defreg(RLEC), defreg(XONRXC), defreg(XONTXC), defreg(XOFFRXC), + defreg(XOFFTXC), defreg(RFC), defreg(RJC), defreg(RNBC), + defreg(TSCTFC), defreg(MGTPRC), defreg(MGTPDC), defreg(MGTPTC), + defreg(RUC), defreg(ROC), defreg(GORCL), defreg(GORCH), + defreg(GOTCL), defreg(GOTCH), defreg(BPRC), defreg(MPRC), + defreg(TSCTC), defreg(PRC64), defreg(PRC127), defreg(PRC255), + defreg(PRC511), defreg(PRC1023), defreg(PRC1522), defreg(PTC64), + defreg(PTC127), defreg(PTC255), defreg(PTC511), defreg(PTC1023), + defreg(PTC1522), defreg(MPTC), defreg(BPTC) }; static void @@ -193,8 +215,7 @@ e1000_link_up(E1000State *s) static bool have_autoneg(E1000State *s) { - return (s->compat_flags & E1000_FLAG_AUTONEG) && - (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN); + return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN); } static void @@ -226,18 +247,18 @@ enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) }; enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W }; static const char phy_regcap[0x20] = { - [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW, - [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW, - [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW, - [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R, - [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R, - [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R, + [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW, + [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW, + [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW, + [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R, + [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R, + [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R, [PHY_AUTONEG_EXP] = PHY_R, }; /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */ static const uint16_t phy_reg_init[] = { - [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB | + [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB | MII_CR_FULL_DUPLEX | MII_CR_AUTO_NEG_EN, @@ -264,15 +285,15 @@ static const uint16_t phy_reg_init[] = { }; static const uint32_t mac_reg_init[] = { - [PBA] = 0x00100030, - [LEDCTL] = 0x602, - [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 | + [PBA] = 0x00100030, + [LEDCTL] = 0x602, + [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 | E1000_CTRL_SPD_1000 | E1000_CTRL_SLU, - [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE | + [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE | E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK | E1000_STATUS_SPEED_1000 | E1000_STATUS_FD | E1000_STATUS_LU, - [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN | + [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN | E1000_MANC_ARP_EN | E1000_MANC_0298_EN | E1000_MANC_RMCP_EN, }; @@ -319,7 +340,7 @@ set_interrupt_cause(E1000State *s, int index, uint32_t val) if (s->mit_timer_on) { return; } - if (s->compat_flags & E1000_FLAG_MIT) { + if (chkflag(MIT)) { /* Compute the next mitigation delay according to pending * interrupts and the current values of RADV (provided * RDTR!=0), TADV and ITR. @@ -515,17 +536,19 @@ set_eecd(E1000State *s, int index, uint32_t val) s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS | E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ); - if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do - return; - if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state - s->eecd_state.val_in = 0; - s->eecd_state.bitnum_in = 0; - s->eecd_state.bitnum_out = 0; - s->eecd_state.reading = 0; + if (!(E1000_EECD_CS & val)) { /* CS inactive; nothing to do */ + return; } - if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge + if (E1000_EECD_CS & (val ^ oldval)) { /* CS rise edge; reset state */ + s->eecd_state.val_in = 0; + s->eecd_state.bitnum_in = 0; + s->eecd_state.bitnum_out = 0; + s->eecd_state.reading = 0; + } + if (!(E1000_EECD_SK & (val ^ oldval))) { /* no clock edge */ return; - if (!(E1000_EECD_SK & val)) { // falling edge + } + if (!(E1000_EECD_SK & val)) { /* falling edge */ s->eecd_state.bitnum_out++; return; } @@ -570,6 +593,56 @@ putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse) } } +static inline void +inc_reg_if_not_full(E1000State *s, int index) +{ + if (s->mac_reg[index] != 0xffffffff) { + s->mac_reg[index]++; + } +} + +static inline void +inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr) +{ + if (!memcmp(arr, bcast, sizeof bcast)) { + inc_reg_if_not_full(s, BPTC); + } else if (arr[0] & 1) { + inc_reg_if_not_full(s, MPTC); + } +} + +static void +grow_8reg_if_not_full(E1000State *s, int index, int size) +{ + uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32; + + if (sum + size < sum) { + sum = ~0ULL; + } else { + sum += size; + } + s->mac_reg[index] = sum; + s->mac_reg[index+1] = sum >> 32; +} + +static void +increase_size_stats(E1000State *s, const int *size_regs, int size) +{ + if (size > 1023) { + inc_reg_if_not_full(s, size_regs[5]); + } else if (size > 511) { + inc_reg_if_not_full(s, size_regs[4]); + } else if (size > 255) { + inc_reg_if_not_full(s, size_regs[3]); + } else if (size > 127) { + inc_reg_if_not_full(s, size_regs[2]); + } else if (size > 64) { + inc_reg_if_not_full(s, size_regs[1]); + } else if (size == 64) { + inc_reg_if_not_full(s, size_regs[0]); + } +} + static inline int vlan_enabled(E1000State *s) { @@ -607,40 +680,49 @@ fcs_len(E1000State *s) static void e1000_send_packet(E1000State *s, const uint8_t *buf, int size) { + static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511, + PTC1023, PTC1522 }; + NetClientState *nc = qemu_get_queue(s->nic); if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) { nc->info->receive(nc, buf, size); } else { qemu_send_packet(nc, buf, size); } + inc_tx_bcast_or_mcast_count(s, buf); + increase_size_stats(s, PTCregs, size); } static void xmit_seg(E1000State *s) { uint16_t len, *sp; - unsigned int frames = s->tx.tso_frames, css, sofar, n; + unsigned int frames = s->tx.tso_frames, css, sofar; struct e1000_tx *tp = &s->tx; if (tp->tse && tp->cptse) { css = tp->ipcss; DBGOUT(TXSUM, "frames %d size %d ipcss %d\n", frames, tp->size, css); - if (tp->ip) { // IPv4 + if (tp->ip) { /* IPv4 */ stw_be_p(tp->data+css+2, tp->size - css); stw_be_p(tp->data+css+4, - be16_to_cpup((uint16_t *)(tp->data+css+4))+frames); - } else // IPv6 + be16_to_cpup((uint16_t *)(tp->data+css+4))+frames); + } else { /* IPv6 */ stw_be_p(tp->data+css+4, tp->size - css); + } css = tp->tucss; len = tp->size - css; DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len); if (tp->tcp) { sofar = frames * tp->mss; stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */ - if (tp->paylen - sofar > tp->mss) - tp->data[css + 13] &= ~9; // PSH, FIN - } else // UDP + if (tp->paylen - sofar > tp->mss) { + tp->data[css + 13] &= ~9; /* PSH, FIN */ + } else if (frames) { + inc_reg_if_not_full(s, TSCTC); + } + } else /* UDP */ stw_be_p(tp->data+css+4, len); if (tp->sum_needed & E1000_TXD_POPTS_TXSM) { unsigned int phsum; @@ -662,13 +744,15 @@ xmit_seg(E1000State *s) memmove(tp->data, tp->data + 4, 8); memcpy(tp->data + 8, tp->vlan_header, 4); e1000_send_packet(s, tp->vlan, tp->size + 4); - } else + } else { e1000_send_packet(s, tp->data, tp->size); - s->mac_reg[TPT]++; - s->mac_reg[GPTC]++; - n = s->mac_reg[TOTL]; - if ((s->mac_reg[TOTL] += s->tx.size) < n) - s->mac_reg[TOTH]++; + } + + inc_reg_if_not_full(s, TPT); + grow_8reg_if_not_full(s, TOTL, s->tx.size); + s->mac_reg[GPTC] = s->mac_reg[TPT]; + s->mac_reg[GOTCL] = s->mac_reg[TOTL]; + s->mac_reg[GOTCH] = s->mac_reg[TOTH]; } static void @@ -684,7 +768,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) struct e1000_tx *tp = &s->tx; s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE); - if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor + if (dtype == E1000_TXD_CMD_DEXT) { /* context descriptor */ op = le32_to_cpu(xp->cmd_and_length); tp->ipcss = xp->lower_setup.ip_fields.ipcss; tp->ipcso = xp->lower_setup.ip_fields.ipcso; @@ -699,7 +783,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0; tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0; tp->tso_frames = 0; - if (tp->tucso == 0) { // this is probably wrong + if (tp->tucso == 0) { /* this is probably wrong */ DBGOUT(TXSUM, "TCP/UDP: cso 0!\n"); tp->tucso = tp->tucss + (tp->tcp ? 16 : 6); } @@ -723,7 +807,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) stw_be_p(tp->vlan_header + 2, le16_to_cpu(dp->upper.fields.special)); } - + addr = le64_to_cpu(dp->buffer_addr); if (tp->tse && tp->cptse) { msh = tp->hdr_len + tp->mss; @@ -836,9 +920,9 @@ start_xmit(E1000State *s) static int receive_filter(E1000State *s, const uint8_t *buf, int size) { - static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; static const int mta_shift[] = {4, 3, 2, 0}; uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp; + int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1); if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) { uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14)); @@ -848,14 +932,19 @@ receive_filter(E1000State *s, const uint8_t *buf, int size) return 0; } - if (rctl & E1000_RCTL_UPE) // promiscuous + if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */ return 1; + } - if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast + if (ismcast && (rctl & E1000_RCTL_MPE)) { /* promiscuous mcast */ + inc_reg_if_not_full(s, MPRC); return 1; + } - if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast)) + if (isbcast && (rctl & E1000_RCTL_BAM)) { /* broadcast enabled */ + inc_reg_if_not_full(s, BPRC); return 1; + } for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) { if (!(rp[1] & E1000_RAH_AV)) @@ -875,8 +964,10 @@ receive_filter(E1000State *s, const uint8_t *buf, int size) f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff; - if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) + if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) { + inc_reg_if_not_full(s, MPRC); return 1; + } DBGOUT(RXFILTER, "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], @@ -965,6 +1056,8 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) size_t desc_offset; size_t desc_size; size_t total_size; + static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511, + PRC1023, PRC1522 }; if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) { return -1; @@ -978,6 +1071,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) if (size < sizeof(min_buf)) { iov_to_buf(iov, iovcnt, 0, min_buf, size); memset(&min_buf[size], 0, sizeof(min_buf) - size); + inc_reg_if_not_full(s, RUC); min_iov.iov_base = filter_buf = min_buf; min_iov.iov_len = size = sizeof(min_buf); iovcnt = 1; @@ -993,6 +1087,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) (size > MAXIMUM_ETHERNET_VLAN_SIZE && !(s->mac_reg[RCTL] & E1000_RCTL_LPE))) && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) { + inc_reg_if_not_full(s, ROC); return size; } @@ -1078,16 +1173,17 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) } } while (desc_offset < total_size); - s->mac_reg[GPRC]++; - s->mac_reg[TPR]++; + increase_size_stats(s, PRCregs, total_size); + inc_reg_if_not_full(s, TPR); + s->mac_reg[GPRC] = s->mac_reg[TPR]; /* TOR - Total Octets Received: * This register includes bytes received in a packet from the <Destination * Address> field through the <CRC> field, inclusively. + * Always include FCS length (4) in size. */ - n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4; - if (n < s->mac_reg[TORL]) - s->mac_reg[TORH]++; - s->mac_reg[TORL] = n; + grow_8reg_if_not_full(s, TORL, size+4); + s->mac_reg[GORCL] = s->mac_reg[TORL]; + s->mac_reg[GORCH] = s->mac_reg[TORH]; n = E1000_ICS_RXT0; if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH]) @@ -1119,6 +1215,30 @@ mac_readreg(E1000State *s, int index) } static uint32_t +mac_low4_read(E1000State *s, int index) +{ + return s->mac_reg[index] & 0xf; +} + +static uint32_t +mac_low11_read(E1000State *s, int index) +{ + return s->mac_reg[index] & 0x7ff; +} + +static uint32_t +mac_low13_read(E1000State *s, int index) +{ + return s->mac_reg[index] & 0x1fff; +} + +static uint32_t +mac_low16_read(E1000State *s, int index) +{ + return s->mac_reg[index] & 0xffff; +} + +static uint32_t mac_icr_read(E1000State *s, int index) { uint32_t ret = s->mac_reg[ICR]; @@ -1211,46 +1331,144 @@ set_ims(E1000State *s, int index, uint32_t val) set_ics(s, 0, 0); } -#define getreg(x) [x] = mac_readreg +#define getreg(x) [x] = mac_readreg static uint32_t (*macreg_readops[])(E1000State *, int) = { - getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL), - getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL), - getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS), - getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL), - getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS), - getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL), - getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV), - getreg(TADV), getreg(ITR), - - [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4, - [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4, - [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read, - [CRCERRS ... MPC] = &mac_readreg, - [RA ... RA+31] = &mac_readreg, - [MTA ... MTA+127] = &mac_readreg, + getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL), + getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL), + getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS), + getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL), + getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS), + getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL), + getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV), + getreg(TADV), getreg(ITR), getreg(FCRUC), getreg(IPAV), + getreg(WUC), getreg(WUS), getreg(SCC), getreg(ECOL), + getreg(MCC), getreg(LATECOL), getreg(COLC), getreg(DC), + getreg(TNCRS), getreg(SEC), getreg(CEXTERR), getreg(RLEC), + getreg(XONRXC), getreg(XONTXC), getreg(XOFFRXC), getreg(XOFFTXC), + getreg(RFC), getreg(RJC), getreg(RNBC), getreg(TSCTFC), + getreg(MGTPRC), getreg(MGTPDC), getreg(MGTPTC), getreg(GORCL), + getreg(GOTCL), + + [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, + [GOTCH] = mac_read_clr8, [GORCH] = mac_read_clr8, + [PRC64] = mac_read_clr4, [PRC127] = mac_read_clr4, + [PRC255] = mac_read_clr4, [PRC511] = mac_read_clr4, + [PRC1023] = mac_read_clr4, [PRC1522] = mac_read_clr4, + [PTC64] = mac_read_clr4, [PTC127] = mac_read_clr4, + [PTC255] = mac_read_clr4, [PTC511] = mac_read_clr4, + [PTC1023] = mac_read_clr4, [PTC1522] = mac_read_clr4, + [GPRC] = mac_read_clr4, [GPTC] = mac_read_clr4, + [TPT] = mac_read_clr4, [TPR] = mac_read_clr4, + [RUC] = mac_read_clr4, [ROC] = mac_read_clr4, + [BPRC] = mac_read_clr4, [MPRC] = mac_read_clr4, + [TSCTC] = mac_read_clr4, [BPTC] = mac_read_clr4, + [MPTC] = mac_read_clr4, + [ICR] = mac_icr_read, [EECD] = get_eecd, + [EERD] = flash_eerd_read, + [RDFH] = mac_low13_read, [RDFT] = mac_low13_read, + [RDFHS] = mac_low13_read, [RDFTS] = mac_low13_read, + [RDFPC] = mac_low13_read, + [TDFH] = mac_low11_read, [TDFT] = mac_low11_read, + [TDFHS] = mac_low13_read, [TDFTS] = mac_low13_read, + [TDFPC] = mac_low13_read, + [AIT] = mac_low16_read, + + [CRCERRS ... MPC] = &mac_readreg, + [IP6AT ... IP6AT+3] = &mac_readreg, [IP4AT ... IP4AT+6] = &mac_readreg, + [FFLT ... FFLT+6] = &mac_low11_read, + [RA ... RA+31] = &mac_readreg, + [WUPM ... WUPM+31] = &mac_readreg, + [MTA ... MTA+127] = &mac_readreg, [VFTA ... VFTA+127] = &mac_readreg, + [FFMT ... FFMT+254] = &mac_low4_read, + [FFVT ... FFVT+254] = &mac_readreg, + [PBM ... PBM+16383] = &mac_readreg, }; enum { NREADOPS = ARRAY_SIZE(macreg_readops) }; -#define putreg(x) [x] = mac_writereg +#define putreg(x) [x] = mac_writereg static void (*macreg_writeops[])(E1000State *, int, uint32_t) = { - putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC), - putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH), - putreg(RDBAL), putreg(LEDCTL), putreg(VET), - [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl, - [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics, - [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt, - [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr, - [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl, - [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit, - [ITR] = set_16bit, - [RA ... RA+31] = &mac_writereg, - [MTA ... MTA+127] = &mac_writereg, + putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC), + putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH), + putreg(RDBAL), putreg(LEDCTL), putreg(VET), putreg(FCRUC), + putreg(TDFH), putreg(TDFT), putreg(TDFHS), putreg(TDFTS), + putreg(TDFPC), putreg(RDFH), putreg(RDFT), putreg(RDFHS), + putreg(RDFTS), putreg(RDFPC), putreg(IPAV), putreg(WUC), + putreg(WUS), putreg(AIT), + + [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl, + [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics, + [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt, + [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr, + [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl, + [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit, + [ITR] = set_16bit, + + [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg, + [FFLT ... FFLT+6] = &mac_writereg, + [RA ... RA+31] = &mac_writereg, + [WUPM ... WUPM+31] = &mac_writereg, + [MTA ... MTA+127] = &mac_writereg, [VFTA ... VFTA+127] = &mac_writereg, + [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg, + [PBM ... PBM+16383] = &mac_writereg, }; enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) }; +enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 }; + +#define markflag(x) ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED) +/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p] + * f - flag bits (up to 6 possible flags) + * n - flag needed + * p - partially implenented */ +static const uint8_t mac_reg_access[0x8000] = { + [RDTR] = markflag(MIT), [TADV] = markflag(MIT), + [RADV] = markflag(MIT), [ITR] = markflag(MIT), + + [IPAV] = markflag(MAC), [WUC] = markflag(MAC), + [IP6AT] = markflag(MAC), [IP4AT] = markflag(MAC), + [FFVT] = markflag(MAC), [WUPM] = markflag(MAC), + [ECOL] = markflag(MAC), [MCC] = markflag(MAC), + [DC] = markflag(MAC), [TNCRS] = markflag(MAC), + [RLEC] = markflag(MAC), [XONRXC] = markflag(MAC), + [XOFFTXC] = markflag(MAC), [RFC] = markflag(MAC), + [TSCTFC] = markflag(MAC), [MGTPRC] = markflag(MAC), + [WUS] = markflag(MAC), [AIT] = markflag(MAC), + [FFLT] = markflag(MAC), [FFMT] = markflag(MAC), + [SCC] = markflag(MAC), [FCRUC] = markflag(MAC), + [LATECOL] = markflag(MAC), [COLC] = markflag(MAC), + [SEC] = markflag(MAC), [CEXTERR] = markflag(MAC), + [XONTXC] = markflag(MAC), [XOFFRXC] = markflag(MAC), + [RJC] = markflag(MAC), [RNBC] = markflag(MAC), + [MGTPDC] = markflag(MAC), [MGTPTC] = markflag(MAC), + [RUC] = markflag(MAC), [ROC] = markflag(MAC), + [GORCL] = markflag(MAC), [GORCH] = markflag(MAC), + [GOTCL] = markflag(MAC), [GOTCH] = markflag(MAC), + [BPRC] = markflag(MAC), [MPRC] = markflag(MAC), + [TSCTC] = markflag(MAC), [PRC64] = markflag(MAC), + [PRC127] = markflag(MAC), [PRC255] = markflag(MAC), + [PRC511] = markflag(MAC), [PRC1023] = markflag(MAC), + [PRC1522] = markflag(MAC), [PTC64] = markflag(MAC), + [PTC127] = markflag(MAC), [PTC255] = markflag(MAC), + [PTC511] = markflag(MAC), [PTC1023] = markflag(MAC), + [PTC1522] = markflag(MAC), [MPTC] = markflag(MAC), + [BPTC] = markflag(MAC), + + [TDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [TDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [RDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [RDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [PBM] = markflag(MAC) | MAC_ACCESS_PARTIAL, +}; + static void e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) @@ -1259,9 +1477,20 @@ e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned int index = (addr & 0x1ffff) >> 2; if (index < NWRITEOPS && macreg_writeops[index]) { - macreg_writeops[index](s, index, val); + if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED) + || (s->compat_flags & (mac_reg_access[index] >> 2))) { + if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) { + DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. " + "It is not fully implemented.\n", index<<2); + } + macreg_writeops[index](s, index, val); + } else { /* "flag needed" bit is set, but the flag is not active */ + DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n", + index<<2); + } } else if (index < NREADOPS && macreg_readops[index]) { - DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val); + DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", + index<<2, val); } else { DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n", index<<2, val); @@ -1274,11 +1503,21 @@ e1000_mmio_read(void *opaque, hwaddr addr, unsigned size) E1000State *s = opaque; unsigned int index = (addr & 0x1ffff) >> 2; - if (index < NREADOPS && macreg_readops[index]) - { - return macreg_readops[index](s, index); + if (index < NREADOPS && macreg_readops[index]) { + if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED) + || (s->compat_flags & (mac_reg_access[index] >> 2))) { + if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) { + DBGOUT(GENERAL, "Reading register at offset: 0x%08x. " + "It is not fully implemented.\n", index<<2); + } + return macreg_readops[index](s, index); + } else { /* "flag needed" bit is set, but the flag is not active */ + DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n", + index<<2); + } + } else { + DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2); } - DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2); return 0; } @@ -1345,7 +1584,7 @@ static int e1000_post_load(void *opaque, int version_id) E1000State *s = opaque; NetClientState *nc = qemu_get_queue(s->nic); - if (!(s->compat_flags & E1000_FLAG_MIT)) { + if (!chkflag(MIT)) { s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] = s->mac_reg[TADV] = 0; s->mit_irq_level = false; @@ -1372,7 +1611,14 @@ static bool e1000_mit_state_needed(void *opaque) { E1000State *s = opaque; - return s->compat_flags & E1000_FLAG_MIT; + return chkflag(MIT); +} + +static bool e1000_full_mac_needed(void *opaque) +{ + E1000State *s = opaque; + + return chkflag(MAC); } static const VMStateDescription vmstate_e1000_mit_state = { @@ -1390,6 +1636,17 @@ static const VMStateDescription vmstate_e1000_mit_state = { } }; +static const VMStateDescription vmstate_e1000_full_mac_state = { + .name = "e1000/full_mac_state", + .version_id = 1, + .minimum_version_id = 1, + .needed = e1000_full_mac_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_e1000 = { .name = "e1000", .version_id = 2, @@ -1469,6 +1726,7 @@ static const VMStateDescription vmstate_e1000 = { }, .subsections = (const VMStateDescription*[]) { &vmstate_e1000_mit_state, + &vmstate_e1000_full_mac_state, NULL } }; @@ -1601,6 +1859,8 @@ static Property e1000_properties[] = { compat_flags, E1000_FLAG_AUTONEG_BIT, true), DEFINE_PROP_BIT("mitigation", E1000State, compat_flags, E1000_FLAG_MIT_BIT, true), + DEFINE_PROP_BIT("extra_mac_registers", E1000State, + compat_flags, E1000_FLAG_MAC_BIT, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h index 60b96aaf13..1c40244ab5 100644 --- a/hw/net/e1000_regs.h +++ b/hw/net/e1000_regs.h @@ -158,7 +158,8 @@ #define E1000_PHY_CTRL 0x00F10 /* PHY Control Register in CSR */ #define FEXTNVM_SW_CONFIG 0x0001 #define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ -#define E1000_PBS 0x01008 /* Packet Buffer Size */ +#define E1000_PBM 0x10000 /* Packet Buffer Memory - RW */ +#define E1000_PBS 0x01008 /* Packet Buffer Size - RW */ #define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ #define E1000_FLASH_UPDATES 1000 #define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ @@ -191,6 +192,11 @@ #define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */ #define E1000_TXDMAC 0x03000 /* TX DMA Control - RW */ #define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */ +#define E1000_RDFH 0x02410 /* Receive Data FIFO Head Register - RW */ +#define E1000_RDFT 0x02418 /* Receive Data FIFO Tail Register - RW */ +#define E1000_RDFHS 0x02420 /* Receive Data FIFO Head Saved Register - RW */ +#define E1000_RDFTS 0x02428 /* Receive Data FIFO Tail Saved Register - RW */ +#define E1000_RDFPC 0x02430 /* Receive Data FIFO Packet Count - RW */ #define E1000_TDFH 0x03410 /* TX Data FIFO Head - RW */ #define E1000_TDFT 0x03418 /* TX Data FIFO Tail - RW */ #define E1000_TDFHS 0x03420 /* TX Data FIFO Head Saved - RW */ diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c index 60333b7fcc..685a4781b9 100644 --- a/hw/net/eepro100.c +++ b/hw/net/eepro100.c @@ -774,6 +774,11 @@ static void tx_command(EEPRO100State *s) #if 0 uint16_t tx_buffer_el = lduw_le_pci_dma(&s->dev, tbd_address + 6); #endif + if (tx_buffer_size == 0) { + /* Prevent an endless loop. */ + logout("loop in %s:%u\n", __FILE__, __LINE__); + break; + } tbd_address += 8; TRACE(RXTX, logout ("TBD (simplified mode): buffer address 0x%08x, size 0x%04x\n", @@ -855,6 +860,10 @@ static void set_multicast_list(EEPRO100State *s) static void action_command(EEPRO100State *s) { + /* The loop below won't stop if it gets special handcrafted data. + Therefore we limit the number of iterations. */ + unsigned max_loop_count = 16; + for (;;) { bool bit_el; bool bit_s; @@ -870,6 +879,13 @@ static void action_command(EEPRO100State *s) #if 0 bool bit_sf = ((s->tx.command & COMMAND_SF) != 0); #endif + + if (max_loop_count-- == 0) { + /* Prevent an endless loop. */ + logout("loop in %s:%u\n", __FILE__, __LINE__); + break; + } + s->cu_offset = s->tx.link; TRACE(OTHER, logout("val=(cu start), status=0x%04x, command=0x%04x, link=0x%08x\n", diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index bb6fdc364d..c57f1a661a 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -101,8 +101,7 @@ RockerSwitch *qmp_query_rocker(const char *name, Error **errp) r = rocker_find(name); if (!r) { - error_set(errp, ERROR_CLASS_GENERIC_ERROR, - "rocker %s not found", name); + error_setg(errp, "rocker %s not found", name); return NULL; } @@ -122,8 +121,7 @@ RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp) r = rocker_find(name); if (!r) { - error_set(errp, ERROR_CLASS_GENERIC_ERROR, - "rocker %s not found", name); + error_setg(errp, "rocker %s not found", name); return NULL; } diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index 1ad2791965..3cf1d61fe4 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -2462,15 +2462,13 @@ RockerOfDpaFlowList *qmp_query_rocker_of_dpa_flows(const char *name, r = rocker_find(name); if (!r) { - error_set(errp, ERROR_CLASS_GENERIC_ERROR, - "rocker %s not found", name); + error_setg(errp, "rocker %s not found", name); return NULL; } w = rocker_get_world(r, ROCKER_WORLD_TYPE_OF_DPA); if (!w) { - error_set(errp, ERROR_CLASS_GENERIC_ERROR, - "rocker %s doesn't have OF-DPA world", name); + error_setg(errp, "rocker %s doesn't have OF-DPA world", name); return NULL; } @@ -2597,15 +2595,13 @@ RockerOfDpaGroupList *qmp_query_rocker_of_dpa_groups(const char *name, r = rocker_find(name); if (!r) { - error_set(errp, ERROR_CLASS_GENERIC_ERROR, - "rocker %s not found", name); + error_setg(errp, "rocker %s not found", name); return NULL; } w = rocker_get_world(r, ROCKER_WORLD_TYPE_OF_DPA); if (!w) { - error_set(errp, ERROR_CLASS_GENERIC_ERROR, - "rocker %s doesn't have OF-DPA world", name); + error_setg(errp, "rocker %s doesn't have OF-DPA world", name); return NULL; } diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index d91b7b155e..318c3e6ad2 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -77,14 +77,8 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MRG_RXBUF, - VIRTIO_NET_F_STATUS, - VIRTIO_NET_F_CTRL_VQ, - VIRTIO_NET_F_CTRL_RX, - VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_NET_F_CTRL_RX_EXTRA, - VIRTIO_NET_F_CTRL_MAC_ADDR, - VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, + /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, @@ -292,12 +286,6 @@ static void vhost_net_stop_one(struct vhost_net *net, int r = vhost_ops->vhost_net_set_backend(&net->dev, &file); assert(r >= 0); } - } else if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) { - for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { - const VhostOps *vhost_ops = net->dev.vhost_ops; - int r = vhost_ops->vhost_reset_device(&net->dev); - assert(r >= 0); - } } if (net->nc->info->poll) { net->nc->info->poll(net->nc, true); diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 7b2fbf9598..715208b22a 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -34,6 +34,7 @@ #include "sysemu/sysemu.h" #include "hw/i386/ioapic.h" #include "qapi/visitor.h" +#include "qemu/error-report.h" /* * I440FX chipset data sheet. @@ -301,6 +302,10 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) static void i440fx_realize(PCIDevice *dev, Error **errp) { dev->config[I440FX_SMRAM] = 0x02; + + if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) { + error_report("warning: i440fx doesn't support emulated iommu"); + } } PCIBus *i440fx_init(const char *host_type, const char *pci_type, diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index c81507d710..1fb470758b 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -506,7 +506,7 @@ static void mch_realize(PCIDevice *d, Error **errp) PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } /* Intel IOMMU (VT-d) */ - if (machine_iommu(current_machine)) { + if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) { mch_init_dmar(mch); } } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index b0bf54061f..168b9cc56b 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -847,6 +847,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, PCIConfigWriteFunc *config_write = pc->config_write; Error *local_err = NULL; AddressSpace *dma_as; + DeviceState *dev = DEVICE(pci_dev); + + pci_dev->bus = bus; if (devfn < 0) { for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices); @@ -864,9 +867,17 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, PCI_SLOT(devfn), PCI_FUNC(devfn), name, bus->devices[devfn]->name); return NULL; + } else if (dev->hotplugged && + pci_get_function_0(pci_dev)) { + error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s," + " new func %s cannot be exposed to guest.", + PCI_SLOT(devfn), + bus->devices[PCI_DEVFN(PCI_SLOT(devfn), 0)]->name, + name); + + return NULL; } - pci_dev->bus = bus; pci_dev->devfn = devfn; dma_as = pci_device_iommu_address_space(pci_dev); @@ -2454,6 +2465,33 @@ void pci_bus_get_w64_range(PCIBus *bus, Range *range) pci_for_each_device_under_bus(bus, pci_dev_get_w64, range); } +static bool pcie_has_upstream_port(PCIDevice *dev) +{ + PCIDevice *parent_dev = pci_bridge_get_device(dev->bus); + + /* Device associated with an upstream port. + * As there are several types of these, it's easier to check the + * parent device: upstream ports are always connected to + * root or downstream ports. + */ + return parent_dev && + pci_is_express(parent_dev) && + parent_dev->exp.exp_cap && + (pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_ROOT_PORT || + pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_DOWNSTREAM); +} + +PCIDevice *pci_get_function_0(PCIDevice *pci_dev) +{ + if(pcie_has_upstream_port(pci_dev)) { + /* With an upstream PCIe port, we only support 1 device at slot 0 */ + return pci_dev->bus->devices[0]; + } else { + /* Other bus types might support multiple devices at slots 0-31 */ + return pci_dev->bus->devices[PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 0)]; + } +} + static const TypeInfo pci_device_type_info = { .name = TYPE_PCI_DEVICE, .parent = TYPE_DEVICE, diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index 3e26f9256c..49f59a5dbc 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -20,6 +20,7 @@ #include "hw/pci/pci.h" #include "hw/pci/pci_host.h" +#include "hw/pci/pci_bus.h" #include "trace.h" /* debug PCI */ @@ -52,6 +53,13 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, uint32_t limit, uint32_t val, uint32_t len) { assert(len <= 4); + /* non-zero functions are only exposed when function 0 is present, + * allowing direct removal of unexposed functions. + */ + if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + return; + } + trace_pci_cfg_write(pci_dev->name, PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn), addr, val); pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr)); @@ -63,6 +71,13 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, uint32_t ret; assert(len <= 4); + /* non-zero functions are only exposed when function 0 is present, + * allowing direct removal of unexposed functions. + */ + if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + return ~0x0; + } + ret = pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr)); trace_pci_cfg_read(pci_dev->name, PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn), addr, ret); diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 6e28985bd1..0eab29d534 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -249,25 +249,43 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, return; } - /* TODO: multifunction hot-plug. - * Right now, only a device of function = 0 is allowed to be - * hot plugged/unplugged. + /* To enable multifunction hot-plug, we just ensure the function + * 0 added last. When function 0 is added, we set the sltsta and + * inform OS via event notification. */ - assert(PCI_FUNC(pci_dev->devfn) == 0); + if (pci_get_function_0(pci_dev)) { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + pcie_cap_slot_event(PCI_DEVICE(hotplug_dev), + PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); + } +} - pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); - pcie_cap_slot_event(PCI_DEVICE(hotplug_dev), - PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); +static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + object_unparent(OBJECT(dev)); } void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { uint8_t *exp_cap; + PCIDevice *pci_dev = PCI_DEVICE(dev); + PCIBus *bus = pci_dev->bus; pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); + /* In case user cancel the operation of multi-function hot-add, + * remove the function that is unexposed to guest individually, + * without interaction with guest. + */ + if (pci_dev->devfn && + !bus->devices[0]) { + pcie_unplug_device(bus, pci_dev, NULL); + + return; + } + pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev)); } @@ -378,11 +396,6 @@ void pcie_cap_slot_reset(PCIDevice *dev) hotplug_event_update_event_status(dev); } -static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) -{ - object_unparent(OBJECT(dev)); -} - void pcie_cap_slot_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) { @@ -413,13 +426,13 @@ void pcie_cap_slot_write_config(PCIDevice *dev, */ if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) && ((val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF)) { - PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - pcie_unplug_device, NULL); + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + pcie_unplug_device, NULL); - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); - pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC); } diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h index 8bdba30c1e..e375ed2b2b 100644 --- a/hw/ppc/mac.h +++ b/hw/ppc/mac.h @@ -103,6 +103,9 @@ typedef struct CUDAState { uint8_t last_b; uint8_t last_acr; + /* MacOS 9 is racy and requires a delay upon setting the SR_INT bit */ + QEMUTimer *sr_delay_timer; + int data_in_size; int data_in_index; int data_out_index; diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 66d016c6fa..1b9a573cae 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -371,12 +371,13 @@ static void ppc_core99_init(MachineState *machine) /* 970 gets a U3 bus */ pci_bus = pci_pmac_u3_init(pic, get_system_memory(), get_system_io()); machine_arch = ARCH_MAC99_U3; - machine->usb |= defaults_enabled() && !machine->usb_disabled; } else { pci_bus = pci_pmac_init(pic, get_system_memory(), get_system_io()); machine_arch = ARCH_MAC99; } + machine->usb |= defaults_enabled() && !machine->usb_disabled; + /* Timebase Frequency */ if (kvm_enabled()) { tbfreq = kvmppc_get_tbfreq(); diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c index ec87587d6d..31bc186427 100644 --- a/hw/ppc/ppc405_boards.c +++ b/hw/ppc/ppc405_boards.c @@ -408,7 +408,7 @@ struct taihu_cpld_t { uint8_t reg1; }; -static uint32_t taihu_cpld_readb (void *opaque, hwaddr addr) +static uint64_t taihu_cpld_read(void *opaque, hwaddr addr, unsigned size) { taihu_cpld_t *cpld; uint32_t ret; @@ -429,8 +429,8 @@ static uint32_t taihu_cpld_readb (void *opaque, hwaddr addr) return ret; } -static void taihu_cpld_writeb (void *opaque, - hwaddr addr, uint32_t value) +static void taihu_cpld_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) { taihu_cpld_t *cpld; @@ -447,48 +447,12 @@ static void taihu_cpld_writeb (void *opaque, } } -static uint32_t taihu_cpld_readw (void *opaque, hwaddr addr) -{ - uint32_t ret; - - ret = taihu_cpld_readb(opaque, addr) << 8; - ret |= taihu_cpld_readb(opaque, addr + 1); - - return ret; -} - -static void taihu_cpld_writew (void *opaque, - hwaddr addr, uint32_t value) -{ - taihu_cpld_writeb(opaque, addr, (value >> 8) & 0xFF); - taihu_cpld_writeb(opaque, addr + 1, value & 0xFF); -} - -static uint32_t taihu_cpld_readl (void *opaque, hwaddr addr) -{ - uint32_t ret; - - ret = taihu_cpld_readb(opaque, addr) << 24; - ret |= taihu_cpld_readb(opaque, addr + 1) << 16; - ret |= taihu_cpld_readb(opaque, addr + 2) << 8; - ret |= taihu_cpld_readb(opaque, addr + 3); - - return ret; -} - -static void taihu_cpld_writel (void *opaque, - hwaddr addr, uint32_t value) -{ - taihu_cpld_writel(opaque, addr, (value >> 24) & 0xFF); - taihu_cpld_writel(opaque, addr + 1, (value >> 16) & 0xFF); - taihu_cpld_writel(opaque, addr + 2, (value >> 8) & 0xFF); - taihu_cpld_writeb(opaque, addr + 3, value & 0xFF); -} - static const MemoryRegionOps taihu_cpld_ops = { - .old_mmio = { - .read = { taihu_cpld_readb, taihu_cpld_readw, taihu_cpld_readl, }, - .write = { taihu_cpld_writeb, taihu_cpld_writew, taihu_cpld_writel, }, + .read = taihu_cpld_read, + .write = taihu_cpld_write, + .impl = { + .min_access_size = 1, + .max_access_size = 1, }, .endianness = DEVICE_NATIVE_ENDIAN, }; diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e1202cec9f..6bfb908da7 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -125,6 +125,7 @@ static XICSState *xics_system_init(MachineState *machine, error_report("kernel_irqchip requested but unavailable: %s", error_get_pretty(err)); } + error_free(err); } if (!icp) { @@ -1021,9 +1022,19 @@ static void spapr_alloc_htab(sPAPRMachineState *spapr) * RAM */ shift = kvmppc_reset_htab(spapr->htab_shift); - - if (shift > 0) { - /* Kernel handles htab, we don't need to allocate one */ + if (shift < 0) { + /* + * For HV KVM, host kernel will return -ENOMEM when requested + * HTAB size can't be allocated. + */ + error_setg(&error_abort, "Failed to allocate HTAB of requested size, try with smaller maxmem"); + } else if (shift > 0) { + /* + * Kernel handles htab, we don't need to allocate one + * + * Older kernels can fall back to lower HTAB shift values, + * but we don't allow booting of such guests. + */ if (shift != spapr->htab_shift) { error_setg(&error_abort, "Failed to allocate HTAB of requested size, try with smaller maxmem"); } @@ -1055,7 +1066,9 @@ static void spapr_reset_htab(sPAPRMachineState *spapr) int index; shift = kvmppc_reset_htab(spapr->htab_shift); - if (shift > 0) { + if (shift < 0) { + error_setg(&error_abort, "Failed to reset HTAB"); + } else if (shift > 0) { if (shift != spapr->htab_shift) { error_setg(&error_abort, "Requested HTAB allocation failed during reset"); } @@ -1588,7 +1601,7 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id) static SaveVMHandlers savevm_htab_handlers = { .save_live_setup = htab_save_setup, .save_live_iterate = htab_save_iterate, - .save_live_complete = htab_save_complete, + .save_live_complete_precopy = htab_save_complete, .load_state = htab_load, }; @@ -2157,7 +2170,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } - pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, false, &local_err); + pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err); if (local_err) { goto out; } @@ -2285,7 +2298,11 @@ static const TypeInfo spapr_machine_info = { }, }; +#define SPAPR_COMPAT_2_4 \ + HW_COMPAT_2_4 + #define SPAPR_COMPAT_2_3 \ + SPAPR_COMPAT_2_4 \ HW_COMPAT_2_3 \ {\ .driver = "spapr-pci-host-bridge",\ @@ -2399,11 +2416,14 @@ static const TypeInfo spapr_machine_2_3_info = { static void spapr_machine_2_4_class_init(ObjectClass *oc, void *data) { + static GlobalProperty compat_props[] = { + SPAPR_COMPAT_2_4 + { /* end of list */ } + }; MachineClass *mc = MACHINE_CLASS(oc); mc->desc = "pSeries Logical Partition (PAPR compliant) v2.4"; - mc->alias = "pseries"; - mc->is_default = 0; + mc->compat_props = compat_props; } static const TypeInfo spapr_machine_2_4_info = { diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 5d6ea7ce41..8be62c349b 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -254,10 +254,16 @@ static void prop_get_fdt(Object *obj, Visitor *v, void *opaque, const char *name, Error **errp) { sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); + Error *err = NULL; int fdt_offset_next, fdt_offset, fdt_depth; void *fdt; if (!drc->fdt) { + visit_start_struct(v, NULL, NULL, name, 0, &err); + if (!err) { + visit_end_struct(v, &err); + } + error_propagate(errp, err); return; } @@ -276,24 +282,43 @@ static void prop_get_fdt(Object *obj, Visitor *v, void *opaque, case FDT_BEGIN_NODE: fdt_depth++; name = fdt_get_name(fdt, fdt_offset, &name_len); - visit_start_struct(v, NULL, NULL, name, 0, NULL); + visit_start_struct(v, NULL, NULL, name, 0, &err); + if (err) { + error_propagate(errp, err); + return; + } break; case FDT_END_NODE: /* shouldn't ever see an FDT_END_NODE before FDT_BEGIN_NODE */ g_assert(fdt_depth > 0); - visit_end_struct(v, NULL); + visit_end_struct(v, &err); + if (err) { + error_propagate(errp, err); + return; + } fdt_depth--; break; case FDT_PROP: { int i; prop = fdt_get_property_by_offset(fdt, fdt_offset, &prop_len); name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff)); - visit_start_list(v, name, NULL); + visit_start_list(v, name, &err); + if (err) { + error_propagate(errp, err); + return; + } for (i = 0; i < prop_len; i++) { - visit_type_uint8(v, (uint8_t *)&prop->data[i], NULL, NULL); - + visit_type_uint8(v, (uint8_t *)&prop->data[i], NULL, &err); + if (err) { + error_propagate(errp, err); + return; + } + } + visit_end_list(v, &err); + if (err) { + error_propagate(errp, err); + return; } - visit_end_list(v, NULL); break; } default: @@ -574,6 +599,10 @@ static void spapr_dr_connector_class_init(ObjectClass *k, void *data) drck->attach = attach; drck->detach = detach; drck->release_pending = release_pending; + /* + * Reason: it crashes FIXME find and document the real reason + */ + dk->cannot_instantiate_with_device_add_yet = true; } static const TypeInfo spapr_dr_connector_info = { @@ -657,6 +686,7 @@ int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner, { Object *root_container; ObjectProperty *prop; + ObjectPropertyIterator *iter; uint32_t drc_count = 0; GArray *drc_indexes, *drc_power_domains; GString *drc_names, *drc_types; @@ -680,7 +710,8 @@ int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner, */ root_container = container_get(object_get_root(), DRC_CONTAINER_PATH); - QTAILQ_FOREACH(prop, &root_container->properties, node) { + iter = object_property_iter_init(root_container); + while ((prop = object_property_iter_next(iter))) { Object *obj; sPAPRDRConnector *drc; sPAPRDRConnectorClass *drck; @@ -721,6 +752,7 @@ int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner, spapr_drc_get_type_str(drc->type)); drc_types = g_string_insert_len(drc_types, -1, "\0", 1); } + object_property_iter_free(iter); /* now write the drc count into the space we reserved at the * beginning of the arrays previously diff --git a/hw/s390x/css.c b/hw/s390x/css.c index c033612889..19851ce6a9 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -892,8 +892,14 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) /* If a unit check is pending, copy sense data. */ if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) && (p->chars & PMCW_CHARS_MASK_CSENSE)) { + int i; + irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL; + /* Attention: sense_data is already BE! */ memcpy(irb.ecw, sch->sense_data, sizeof(sch->sense_data)); + for (i = 0; i < ARRAY_SIZE(irb.ecw); i++) { + irb.ecw[i] = be32_to_cpu(irb.ecw[i]); + } irb.esw[1] = 0x01000000 | (sizeof(sch->sense_data) << 8); } } diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 5f7f34900a..b91fcc6e79 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -15,7 +15,6 @@ #include "cpu.h" #include "elf.h" #include "hw/loader.h" -#include "hw/sysbus.h" #include "hw/s390x/virtio-ccw.h" #include "hw/s390x/css.h" #include "ipl.h" @@ -29,44 +28,6 @@ #define ZIPL_IMAGE_START 0x009000UL #define IPL_PSW_MASK (PSW_MASK_32 | PSW_MASK_64) -#define TYPE_S390_IPL "s390-ipl" -#define S390_IPL(obj) \ - OBJECT_CHECK(S390IPLState, (obj), TYPE_S390_IPL) -#if 0 -#define S390_IPL_CLASS(klass) \ - OBJECT_CLASS_CHECK(S390IPLState, (klass), TYPE_S390_IPL) -#define S390_IPL_GET_CLASS(obj) \ - OBJECT_GET_CLASS(S390IPLState, (obj), TYPE_S390_IPL) -#endif - -typedef struct S390IPLClass { - /*< private >*/ - SysBusDeviceClass parent_class; - /*< public >*/ - - void (*parent_reset) (SysBusDevice *dev); -} S390IPLClass; - -typedef struct S390IPLState { - /*< private >*/ - SysBusDevice parent_obj; - uint64_t start_addr; - uint64_t bios_start_addr; - bool enforce_bios; - IplParameterBlock iplb; - bool iplb_valid; - bool reipl_requested; - - /*< public >*/ - char *kernel; - char *initrd; - char *cmdline; - char *firmware; - uint8_t cssid; - uint8_t ssid; - uint16_t devno; -} S390IPLState; - static const VMStateDescription vmstate_iplb = { .name = "ipl/iplb", .version_id = 0, @@ -110,11 +71,12 @@ static uint64_t bios_translate_addr(void *opaque, uint64_t srcaddr) return srcaddr + dstaddr; } -static int s390_ipl_init(SysBusDevice *dev) +static void s390_ipl_realize(DeviceState *dev, Error **errp) { S390IPLState *ipl = S390_IPL(dev); uint64_t pentry = KERN_IMAGE_START; int kernel_size; + Error *l_err = NULL; int bios_size; char *bios_filename; @@ -132,7 +94,8 @@ static int s390_ipl_init(SysBusDevice *dev) bios_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); if (bios_filename == NULL) { - hw_error("could not find stage1 bootloader\n"); + error_setg(&l_err, "could not find stage1 bootloader\n"); + goto error; } bios_size = load_elf(bios_filename, bios_translate_addr, &fwbase, @@ -150,7 +113,8 @@ static int s390_ipl_init(SysBusDevice *dev) g_free(bios_filename); if (bios_size == -1) { - hw_error("could not load bootloader '%s'\n", bios_name); + error_setg(&l_err, "could not load bootloader '%s'\n", bios_name); + goto error; } /* default boot target is the bios */ @@ -164,8 +128,8 @@ static int s390_ipl_init(SysBusDevice *dev) kernel_size = load_image_targphys(ipl->kernel, 0, ram_size); } if (kernel_size < 0) { - fprintf(stderr, "could not load kernel '%s'\n", ipl->kernel); - return -1; + error_setg(&l_err, "could not load kernel '%s'\n", ipl->kernel); + goto error; } /* * Is it a Linux kernel (starting at 0x10000)? If yes, we fill in the @@ -192,9 +156,8 @@ static int s390_ipl_init(SysBusDevice *dev) initrd_size = load_image_targphys(ipl->initrd, initrd_offset, ram_size - initrd_offset); if (initrd_size == -1) { - fprintf(stderr, "qemu: could not load initrd '%s'\n", - ipl->initrd); - exit(1); + error_setg(&l_err, "could not load initrd '%s'\n", ipl->initrd); + goto error; } /* @@ -205,7 +168,9 @@ static int s390_ipl_init(SysBusDevice *dev) stq_p(rom_ptr(INITRD_PARM_SIZE), initrd_size); } } - return 0; + qemu_register_reset(qdev_reset_all_fn, dev); +error: + error_propagate(errp, l_err); } static Property s390_ipl_properties[] = { @@ -308,9 +273,8 @@ static void s390_ipl_reset(DeviceState *dev) static void s390_ipl_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); - k->init = s390_ipl_init; + dc->realize = s390_ipl_realize; dc->props = s390_ipl_properties; dc->reset = s390_ipl_reset; dc->vmsd = &vmstate_ipl; @@ -319,8 +283,8 @@ static void s390_ipl_class_init(ObjectClass *klass, void *data) static const TypeInfo s390_ipl_info = { .class_init = s390_ipl_class_init, - .parent = TYPE_SYS_BUS_DEVICE, - .name = "s390-ipl", + .parent = TYPE_DEVICE, + .name = TYPE_S390_IPL, .instance_size = sizeof(S390IPLState), }; diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h index 7f2b4033d4..6b48ed7b93 100644 --- a/hw/s390x/ipl.h +++ b/hw/s390x/ipl.h @@ -12,6 +12,7 @@ #ifndef HW_S390_IPL_H #define HW_S390_IPL_H +#include "hw/qdev.h" #include "cpu.h" typedef struct IplParameterBlock { @@ -25,4 +26,28 @@ void s390_ipl_prepare_cpu(S390CPU *cpu); IplParameterBlock *s390_ipl_get_iplb(void); void s390_reipl_request(void); +#define TYPE_S390_IPL "s390-ipl" +#define S390_IPL(obj) OBJECT_CHECK(S390IPLState, (obj), TYPE_S390_IPL) + +struct S390IPLState { + /*< private >*/ + DeviceState parent_obj; + uint64_t start_addr; + uint64_t bios_start_addr; + bool enforce_bios; + IplParameterBlock iplb; + bool iplb_valid; + bool reipl_requested; + + /*< public >*/ + char *kernel; + char *initrd; + char *cmdline; + char *firmware; + uint8_t cssid; + uint8_t ssid; + uint16_t devno; +}; +typedef struct S390IPLState S390IPLState; + #endif diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 560b66a501..98c726cfcd 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -308,9 +308,8 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr, { uint64_t pte; uint32_t flags; - S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, mr); - S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev) - ->qbus.parent); + S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, iommu_mr); + S390pciState *s; IOMMUTLBEntry ret = { .target_as = &address_space_memory, .iova = 0, @@ -319,8 +318,13 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr, .perm = IOMMU_NONE, }; + if (!pbdev->configured || !pbdev->pdev) { + return ret; + } + DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr); + s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)->qbus.parent); /* s390 does not have an APIC mapped to main storage so we use * a separate AddressSpace only for msix notifications */ @@ -450,14 +454,32 @@ static const MemoryRegionOps s390_msi_ctrl_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; +void s390_pcihost_iommu_configure(S390PCIBusDevice *pbdev, bool enable) +{ + pbdev->configured = false; + + if (enable) { + uint64_t size = pbdev->pal - pbdev->pba + 1; + memory_region_init_iommu(&pbdev->iommu_mr, OBJECT(&pbdev->mr), + &s390_iommu_ops, "iommu-s390", size); + memory_region_add_subregion(&pbdev->mr, pbdev->pba, &pbdev->iommu_mr); + } else { + memory_region_del_subregion(&pbdev->mr, &pbdev->iommu_mr); + } + + pbdev->configured = true; +} + static void s390_pcihost_init_as(S390pciState *s) { int i; + S390PCIBusDevice *pbdev; for (i = 0; i < PCI_SLOT_MAX; i++) { - memory_region_init_iommu(&s->pbdev[i].mr, OBJECT(s), - &s390_iommu_ops, "iommu-s390", UINT64_MAX); - address_space_init(&s->pbdev[i].as, &s->pbdev[i].mr, "iommu-pci"); + pbdev = &s->pbdev[i]; + memory_region_init(&pbdev->mr, OBJECT(s), + "iommu-root-s390", UINT64_MAX); + address_space_init(&pbdev->as, &pbdev->mr, "iommu-pci"); } memory_region_init_io(&s->msix_notify_mr, OBJECT(s), diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h index 464a92eedf..80345dacb1 100644 --- a/hw/s390x/s390-pci-bus.h +++ b/hw/s390x/s390-pci-bus.h @@ -231,6 +231,7 @@ typedef struct S390PCIBusDevice { AdapterRoutes routes; AddressSpace as; MemoryRegion mr; + MemoryRegion iommu_mr; } S390PCIBusDevice; typedef struct S390pciState { @@ -244,6 +245,7 @@ typedef struct S390pciState { int chsc_sei_nt2_get_event(void *res); int chsc_sei_nt2_have_event(void); void s390_pci_sclp_configure(int configure, SCCB *sccb); +void s390_pcihost_iommu_configure(S390PCIBusDevice *pbdev, bool enable); S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx); S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh); S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid); diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index f9151a9afb..8c1dc82b1f 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -528,7 +528,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) goto out; } - mr = pci_device_iommu_address_space(pbdev->pdev)->root; + mr = &pbdev->iommu_mr; while (start < end) { entry = mr->iommu_ops->translate(mr, start, 0); @@ -689,6 +689,9 @@ static int reg_ioat(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib) pbdev->pba = pba; pbdev->pal = pal; pbdev->g_iota = g_iota; + + s390_pcihost_iommu_configure(pbdev, true); + return 0; } @@ -697,6 +700,8 @@ static void dereg_ioat(S390PCIBusDevice *pbdev) pbdev->pba = 0; pbdev->pal = 0; pbdev->g_iota = 0; + + s390_pcihost_iommu_configure(pbdev, false); } int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index faba773592..5a52ff26eb 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -20,6 +20,7 @@ #include "qemu/config-file.h" #include "s390-pci-bus.h" #include "hw/s390x/storage-keys.h" +#include "hw/compat.h" #define TYPE_S390_CCW_MACHINE "s390-ccw-machine" @@ -103,8 +104,7 @@ void s390_memory_init(ram_addr_t mem_size) MemoryRegion *ram = g_new(MemoryRegion, 1); /* allocate RAM for core */ - memory_region_init_ram(ram, NULL, "s390.ram", mem_size, &error_fatal); - vmstate_register_ram_global(ram); + memory_region_allocate_system_memory(ram, NULL, "s390.ram", mem_size); memory_region_add_subregion(sysmem, 0, ram); /* Initialize storage key device */ @@ -236,6 +236,7 @@ static const TypeInfo ccw_machine_info = { }; #define CCW_COMPAT_2_4 \ + HW_COMPAT_2_4 \ {\ .driver = TYPE_S390_SKEYS,\ .property = "migration-enabled",\ diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c index cbde9772e5..ae55760d64 100644 --- a/hw/s390x/s390-virtio.c +++ b/hw/s390x/s390-virtio.c @@ -31,9 +31,9 @@ #include "hw/boards.h" #include "hw/loader.h" #include "hw/virtio/virtio.h" -#include "hw/sysbus.h" #include "sysemu/kvm.h" #include "exec/address-spaces.h" +#include "sysemu/qtest.h" #include "hw/s390x/s390-virtio-bus.h" #include "hw/s390x/sclp.h" @@ -151,9 +151,9 @@ void s390_init_ipl_dev(const char *kernel_filename, const char *firmware, bool enforce_bios) { - DeviceState *dev; + Object *new = object_new(TYPE_S390_IPL); + DeviceState *dev = DEVICE(new); - dev = qdev_create(NULL, "s390-ipl"); if (kernel_filename) { qdev_prop_set_string(dev, "kernel", kernel_filename); } @@ -163,8 +163,9 @@ void s390_init_ipl_dev(const char *kernel_filename, qdev_prop_set_string(dev, "cmdline", kernel_cmdline); qdev_prop_set_string(dev, "firmware", firmware); qdev_prop_set_bit(dev, "enforce_bios", enforce_bios); - object_property_add_child(qdev_get_machine(), "s390-ipl", - OBJECT(dev), NULL); + object_property_add_child(qdev_get_machine(), TYPE_S390_IPL, + new, NULL); + object_unref(new); qdev_init_nofail(dev); } @@ -268,6 +269,12 @@ static void s390_init(MachineState *machine) hwaddr virtio_region_len; hwaddr virtio_region_start; + if (!qtest_enabled()) { + error_printf("WARNING\n" + "The s390-virtio machine (non-ccw) is deprecated.\n" + "It will be removed in 2.6. Please use s390-ccw-virtio\n"); + } + if (machine->ram_slots) { error_report("Memory hotplug not supported by the selected machine."); exit(EXIT_FAILURE); @@ -334,7 +341,7 @@ static void s390_machine_class_init(ObjectClass *oc, void *data) NMIClass *nc = NMI_CLASS(oc); mc->alias = "s390"; - mc->desc = "VirtIO based S390 machine"; + mc->desc = "VirtIO based S390 machine (deprecated)"; mc->init = s390_init; mc->reset = s390_machine_reset; mc->block_default_type = IF_VIRTIO; diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index fb103b78ac..63da303864 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -1555,6 +1555,17 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp) d->hotplugged, 1); } +static void virtio_ccw_post_plugged(DeviceState *d, Error **errp) +{ + VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); + VirtIODevice *vdev = virtio_bus_get_device(&dev->bus); + + if (!virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1)) { + /* A backend didn't support modern virtio. */ + dev->max_rev = 0; + } +} + static void virtio_ccw_device_unplugged(DeviceState *d) { VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); @@ -1891,6 +1902,7 @@ static void virtio_ccw_bus_class_init(ObjectClass *klass, void *data) k->save_config = virtio_ccw_save_config; k->load_config = virtio_ccw_load_config; k->device_plugged = virtio_ccw_device_plugged; + k->post_plugged = virtio_ccw_post_plugged; k->device_unplugged = virtio_ccw_device_unplugged; } diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index dcd724e6a5..d7dc6672ec 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -757,7 +757,7 @@ static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd) memcpy(info.product_name, base_class->product_name, 24); snprintf(info.serial_number, 32, "%s", s->hba_serial); - snprintf(info.package_version, 0x60, "%s-QEMU", QEMU_VERSION); + snprintf(info.package_version, 0x60, "%s-QEMU", qemu_hw_version()); memcpy(info.image_component[0].name, "APP", 3); snprintf(info.image_component[0].version, 10, "%s-QEMU", base_class->product_version); diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index d373c1b676..fd1171e481 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -453,7 +453,7 @@ static bool scsi_target_emulate_inquiry(SCSITargetReq *r) r->buf[7] = 0x10 | (r->req.bus->info->tcq ? 0x02 : 0); /* Sync, TCQ. */ memcpy(&r->buf[8], "QEMU ", 8); memcpy(&r->buf[16], "QEMU TARGET ", 16); - pstrcpy((char *) &r->buf[32], 4, qemu_get_version()); + pstrcpy((char *) &r->buf[32], 4, qemu_hw_version()); } return true; } diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index bada9a7f62..4797d83683 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -90,7 +90,7 @@ struct SCSIDiskState bool tray_locked; }; -static int scsi_handle_rw_error(SCSIDiskReq *r, int error); +static int scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed); static void scsi_free_request(SCSIRequest *req) { @@ -169,18 +169,18 @@ static void scsi_aio_complete(void *opaque, int ret) assert(r->req.aiocb != NULL); r->req.aiocb = NULL; - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); if (r->req.io_canceled) { scsi_req_cancel_complete(&r->req); goto done; } if (ret < 0) { - if (scsi_handle_rw_error(r, -ret)) { + if (scsi_handle_rw_error(r, -ret, true)) { goto done; } } + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); scsi_req_complete(&r->req, GOOD); done: @@ -247,7 +247,7 @@ static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) } if (ret < 0) { - if (scsi_handle_rw_error(r, -ret)) { + if (scsi_handle_rw_error(r, -ret, false)) { goto done; } } @@ -273,7 +273,11 @@ static void scsi_dma_complete(void *opaque, int ret) assert(r->req.aiocb != NULL); r->req.aiocb = NULL; - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } scsi_dma_complete_noio(r, ret); } @@ -285,18 +289,18 @@ static void scsi_read_complete(void * opaque, int ret) assert(r->req.aiocb != NULL); r->req.aiocb = NULL; - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); if (r->req.io_canceled) { scsi_req_cancel_complete(&r->req); goto done; } if (ret < 0) { - if (scsi_handle_rw_error(r, -ret)) { + if (scsi_handle_rw_error(r, -ret, true)) { goto done; } } + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->qiov.size); n = r->qiov.size / 512; @@ -322,7 +326,7 @@ static void scsi_do_read(SCSIDiskReq *r, int ret) } if (ret < 0) { - if (scsi_handle_rw_error(r, -ret)) { + if (scsi_handle_rw_error(r, -ret, false)) { goto done; } } @@ -355,7 +359,11 @@ static void scsi_do_read_cb(void *opaque, int ret) assert (r->req.aiocb != NULL); r->req.aiocb = NULL; - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } scsi_do_read(opaque, ret); } @@ -407,7 +415,7 @@ static void scsi_read_data(SCSIRequest *req) * scsi_handle_rw_error always manages its reference counts, independent * of the return value. */ -static int scsi_handle_rw_error(SCSIDiskReq *r, int error) +static int scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) { bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); @@ -415,6 +423,9 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error) is_read, error); if (action == BLOCK_ERROR_ACTION_REPORT) { + if (acct_failed) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } switch (error) { case ENOMEDIUM: scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); @@ -452,7 +463,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) } if (ret < 0) { - if (scsi_handle_rw_error(r, -ret)) { + if (scsi_handle_rw_error(r, -ret, false)) { goto done; } } @@ -481,7 +492,11 @@ static void scsi_write_complete(void * opaque, int ret) assert (r->req.aiocb != NULL); r->req.aiocb = NULL; - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } scsi_write_complete_noio(r, ret); } @@ -1592,7 +1607,7 @@ static void scsi_unmap_complete_noio(UnmapCBData *data, int ret) } if (ret < 0) { - if (scsi_handle_rw_error(r, -ret)) { + if (scsi_handle_rw_error(r, -ret, false)) { goto done; } } @@ -1696,18 +1711,19 @@ static void scsi_write_same_complete(void *opaque, int ret) assert(r->req.aiocb != NULL); r->req.aiocb = NULL; - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); if (r->req.io_canceled) { scsi_req_cancel_complete(&r->req); goto done; } if (ret < 0) { - if (scsi_handle_rw_error(r, -ret)) { + if (scsi_handle_rw_error(r, -ret, true)) { goto done; } } + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + data->nb_sectors -= data->iov.iov_len / 512; data->sector += data->iov.iov_len / 512; data->iov.iov_len = MIN(data->nb_sectors * 512, data->iov.iov_len); @@ -2315,7 +2331,7 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) } if (!s->version) { - s->version = g_strdup(qemu_get_version()); + s->version = g_strdup(qemu_hw_version()); } if (!s->vendor) { s->vendor = g_strdup("QEMU"); diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 20885fb7f1..3a4f520fbb 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -205,20 +205,8 @@ static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq) assert(n < vs->conf.num_queues); req = virtio_scsi_init_req(s, vs->cmd_vqs[n]); qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem)); - /* TODO: add a way for SCSIBusInfo's load_request to fail, - * and fail migration instead of asserting here. - * When we do, we might be able to re-enable NDEBUG below. - */ -#ifdef NDEBUG -#error building with NDEBUG is not supported -#endif - assert(req->elem.in_num <= ARRAY_SIZE(req->elem.in_sg)); - assert(req->elem.out_num <= ARRAY_SIZE(req->elem.out_sg)); - virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr, - req->elem.in_num, 1); - virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr, - req->elem.out_num, 0); + virtqueue_map(&req->elem); if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size, sizeof(VirtIOSCSICmdResp) + vs->sense_size) < 0) { @@ -262,7 +250,7 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) int target; int ret = 0; - if (s->dataplane_started) { + if (s->dataplane_started && d) { assert(blk_get_aio_context(d->conf.blk) == s->ctx); } /* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */ @@ -665,6 +653,11 @@ static void virtio_scsi_reset(VirtIODevice *vdev) static void virtio_scsi_save(QEMUFile *f, void *opaque) { VirtIODevice *vdev = VIRTIO_DEVICE(opaque); + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + + if (s->dataplane_started) { + virtio_scsi_dataplane_stop(s); + } virtio_save(vdev, f); } diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c index 2209ef1d52..b430d56687 100644 --- a/hw/sd/milkymist-memcard.c +++ b/hw/sd/milkymist-memcard.c @@ -28,7 +28,7 @@ #include "qemu/error-report.h" #include "sysemu/block-backend.h" #include "sysemu/blockdev.h" -#include "hw/sd.h" +#include "hw/sd/sd.h" enum { ENABLE_CMD_TX = (1<<0), diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c index 35d8033402..5bc47193f9 100644 --- a/hw/sd/omap_mmc.c +++ b/hw/sd/omap_mmc.c @@ -18,7 +18,7 @@ */ #include "hw/hw.h" #include "hw/arm/omap.h" -#include "hw/sd.h" +#include "hw/sd/sd.h" struct omap_mmc_s { qemu_irq irq; diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c index 5242176a33..326c53ad92 100644 --- a/hw/sd/pl181.c +++ b/hw/sd/pl181.c @@ -10,7 +10,7 @@ #include "sysemu/block-backend.h" #include "sysemu/blockdev.h" #include "hw/sysbus.h" -#include "hw/sd.h" +#include "hw/sd/sd.h" //#define DEBUG_PL181 1 diff --git a/hw/sd/pxa2xx_mmci.c b/hw/sd/pxa2xx_mmci.c index d1fe6d58e8..b217080075 100644 --- a/hw/sd/pxa2xx_mmci.c +++ b/hw/sd/pxa2xx_mmci.c @@ -12,7 +12,7 @@ #include "hw/hw.h" #include "hw/arm/pxa.h" -#include "hw/sd.h" +#include "hw/sd/sd.h" #include "hw/qdev.h" struct PXA2xxMMCIState { diff --git a/hw/sd/sd.c b/hw/sd/sd.c index 393a75c0bc..ce4d44be91 100644 --- a/hw/sd/sd.c +++ b/hw/sd/sd.c @@ -31,7 +31,7 @@ #include "hw/hw.h" #include "sysemu/block-backend.h" -#include "hw/sd.h" +#include "hw/sd/sd.h" #include "qemu/bitmap.h" //#define DEBUG_SD 1 diff --git a/hw/sd/sdhci.h b/hw/sd/sdhci-internal.h index e2de92d553..c712daf4ee 100644 --- a/hw/sd/sdhci.h +++ b/hw/sd/sdhci-internal.h @@ -21,15 +21,10 @@ * You should have received a copy of the GNU General Public License along * with this program; if not, see <http://www.gnu.org/licenses/>. */ +#ifndef SDHCI_INTERNAL_H +#define SDHCI_INTERNAL_H -#ifndef SDHCI_H -#define SDHCI_H - -#include "qemu-common.h" -#include "hw/block/block.h" -#include "hw/pci/pci.h" -#include "hw/sysbus.h" -#include "hw/sd.h" +#include "hw/sd/sdhci.h" /* R/W SDMA System Address register 0x0 */ #define SDHC_SYSAD 0x00 @@ -232,66 +227,6 @@ enum { sdhc_gap_write = 2 /* SDHC stopped at block gap during write operation */ }; -/* SD/MMC host controller state */ -typedef struct SDHCIState { - union { - PCIDevice pcidev; - SysBusDevice busdev; - }; - SDState *card; - MemoryRegion iomem; - BlockConf conf; - - QEMUTimer *insert_timer; /* timer for 'changing' sd card. */ - QEMUTimer *transfer_timer; - qemu_irq eject_cb; - qemu_irq ro_cb; - qemu_irq irq; - - uint32_t sdmasysad; /* SDMA System Address register */ - uint16_t blksize; /* Host DMA Buff Boundary and Transfer BlkSize Reg */ - uint16_t blkcnt; /* Blocks count for current transfer */ - uint32_t argument; /* Command Argument Register */ - uint16_t trnmod; /* Transfer Mode Setting Register */ - uint16_t cmdreg; /* Command Register */ - uint32_t rspreg[4]; /* Response Registers 0-3 */ - uint32_t prnsts; /* Present State Register */ - uint8_t hostctl; /* Host Control Register */ - uint8_t pwrcon; /* Power control Register */ - uint8_t blkgap; /* Block Gap Control Register */ - uint8_t wakcon; /* WakeUp Control Register */ - uint16_t clkcon; /* Clock control Register */ - uint8_t timeoutcon; /* Timeout Control Register */ - uint8_t admaerr; /* ADMA Error Status Register */ - uint16_t norintsts; /* Normal Interrupt Status Register */ - uint16_t errintsts; /* Error Interrupt Status Register */ - uint16_t norintstsen; /* Normal Interrupt Status Enable Register */ - uint16_t errintstsen; /* Error Interrupt Status Enable Register */ - uint16_t norintsigen; /* Normal Interrupt Signal Enable Register */ - uint16_t errintsigen; /* Error Interrupt Signal Enable Register */ - uint16_t acmd12errsts; /* Auto CMD12 error status register */ - uint64_t admasysaddr; /* ADMA System Address Register */ - - uint32_t capareg; /* Capabilities Register */ - uint32_t maxcurr; /* Maximum Current Capabilities Register */ - uint8_t *fifo_buffer; /* SD host i/o FIFO buffer */ - uint32_t buf_maxsz; - uint16_t data_count; /* current element in FIFO buffer */ - uint8_t stopped_state;/* Current SDHC state */ - /* Buffer Data Port Register - virtual access point to R and W buffers */ - /* Software Reset Register - always reads as 0 */ - /* Force Event Auto CMD12 Error Interrupt Reg - write only */ - /* Force Event Error Interrupt Register- write only */ - /* RO Host Controller Version Register always reads as 0x2401 */ -} SDHCIState; - extern const VMStateDescription sdhci_vmstate; -#define TYPE_PCI_SDHCI "sdhci-pci" -#define PCI_SDHCI(obj) OBJECT_CHECK(SDHCIState, (obj), TYPE_PCI_SDHCI) - -#define TYPE_SYSBUS_SDHCI "generic-sdhci" -#define SYSBUS_SDHCI(obj) \ - OBJECT_CHECK(SDHCIState, (obj), TYPE_SYSBUS_SDHCI) - -#endif /* SDHCI_H */ +#endif diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index 7f73527d44..d70d1a6ab9 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -29,8 +29,7 @@ #include "sysemu/dma.h" #include "qemu/timer.h" #include "qemu/bitops.h" - -#include "sdhci.h" +#include "sdhci-internal.h" /* host controller debug messages */ #ifndef SDHC_DEBUG diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c index e4b2d4f83b..c49ff62f56 100644 --- a/hw/sd/ssi-sd.c +++ b/hw/sd/ssi-sd.c @@ -13,7 +13,7 @@ #include "sysemu/block-backend.h" #include "sysemu/blockdev.h" #include "hw/ssi.h" -#include "hw/sd.h" +#include "hw/sd/sd.h" //#define DEBUG_SSI_SD 1 diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c index 3037bef72e..7f0391c786 100644 --- a/hw/timer/hpet.c +++ b/hw/timer/hpet.c @@ -116,12 +116,12 @@ static uint32_t timer_enabled(HPETTimer *t) static uint32_t hpet_time_after(uint64_t a, uint64_t b) { - return ((int32_t)(b) - (int32_t)(a) < 0); + return ((int32_t)(b - a) < 0); } static uint32_t hpet_time_after64(uint64_t a, uint64_t b) { - return ((int64_t)(b) - (int64_t)(a) < 0); + return ((int64_t)(b - a) < 0); } static uint64_t ticks_to_ns(uint64_t value) diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c index 0806b5f82e..ff073d501a 100644 --- a/hw/tpm/tpm_tis.c +++ b/hw/tpm/tpm_tis.c @@ -141,7 +141,7 @@ #define TPM_TIS_IFACE_ID_SUPPORTED_FLAGS1_3 \ (TPM_TIS_IFACE_ID_INTERFACE_TIS1_3 | \ - (~0 << 4)/* all of it is don't care */) + (~0u << 4)/* all of it is don't care */) /* if backend was a TPM 2.0: */ #define TPM_TIS_IFACE_ID_SUPPORTED_FLAGS2_0 \ diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c index 72329ed7d7..869a63c5b7 100644 --- a/hw/usb/ccid-card-emulated.c +++ b/hw/usb/ccid-card-emulated.c @@ -166,7 +166,7 @@ static void emulated_push_event(EmulatedState *card, EmulEvent *event) static void emulated_push_type(EmulatedState *card, uint32_t type) { - EmulEvent *event = (EmulEvent *)g_malloc(sizeof(EmulEvent)); + EmulEvent *event = g_new(EmulEvent, 1); assert(event); event->p.gen.type = type; @@ -175,7 +175,7 @@ static void emulated_push_type(EmulatedState *card, uint32_t type) static void emulated_push_error(EmulatedState *card, uint64_t code) { - EmulEvent *event = (EmulEvent *)g_malloc(sizeof(EmulEvent)); + EmulEvent *event = g_new(EmulEvent, 1); assert(event); event->p.error.type = EMUL_ERROR; diff --git a/hw/usb/dev-mtp.c b/hw/usb/dev-mtp.c index 809b1cb118..a2762679eb 100644 --- a/hw/usb/dev-mtp.c +++ b/hw/usb/dev-mtp.c @@ -359,8 +359,7 @@ static void usb_mtp_object_readdir(MTPState *s, MTPObject *o) } while ((entry = readdir(dir)) != NULL) { if ((o->nchildren % 32) == 0) { - o->children = g_realloc(o->children, - (o->nchildren + 32) * sizeof(MTPObject *)); + o->children = g_renew(MTPObject *, o->children, o->nchildren + 32); } o->children[o->nchildren] = usb_mtp_object_alloc(s, s->next_handle++, o, entry->d_name); diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index 64a54c6e84..4e2161b5a7 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -726,7 +726,7 @@ static void ehci_detach(USBPort *port) ehci_queues_rip_device(s, port->dev, 0); ehci_queues_rip_device(s, port->dev, 1); - *portsc &= ~(PORTSC_CONNECT|PORTSC_PED); + *portsc &= ~(PORTSC_CONNECT|PORTSC_PED|PORTSC_SUSPEND); *portsc |= PORTSC_CSC; ehci_raise_irq(s, USBSTS_PCD); diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 1c57e20e70..268ab36468 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -2188,7 +2188,7 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, xfer->trbs = NULL; } if (!xfer->trbs) { - xfer->trbs = g_malloc(sizeof(XHCITRB) * length); + xfer->trbs = g_new(XHCITRB, length); xfer->trb_alloced = length; } xfer->trb_count = length; diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c index 7695a97143..3f8e540bd2 100644 --- a/hw/usb/host-libusb.c +++ b/hw/usb/host-libusb.c @@ -1240,7 +1240,7 @@ static void usb_host_handle_control(USBDevice *udev, USBPacket *p, /* Fix up USB-3 ep0 maxpacket size to allow superspeed connected devices * to work redirected to a not superspeed capable hcd */ - if (udev->speed == USB_SPEED_SUPER && + if ((udev->speedmask & USB_SPEED_MASK_SUPER) && !(udev->port->speedmask & USB_SPEED_MASK_SUPER) && request == 0x8006 && value == 0x100 && index == 0) { r->usb3ep0quirk = true; diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index 38086cd0f2..30ff742730 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -322,7 +322,7 @@ static void packet_id_queue_add(struct PacketIdQueue *q, uint64_t id) DPRINTF("adding packet id %"PRIu64" to %s queue\n", id, q->name); - e = g_malloc0(sizeof(struct PacketIdQueueEntry)); + e = g_new0(struct PacketIdQueueEntry, 1); e->id = id; QTAILQ_INSERT_TAIL(&q->head, e, next); q->size++; @@ -468,7 +468,7 @@ static void bufp_alloc(USBRedirDevice *dev, uint8_t *data, uint16_t len, dev->endpoint[EP2I(ep)].bufpq_dropping_packets = 0; } - bufp = g_malloc(sizeof(struct buf_packet)); + bufp = g_new(struct buf_packet, 1); bufp->data = data; bufp->len = len; bufp->offset = 0; @@ -2234,7 +2234,7 @@ static int usbredir_get_bufpq(QEMUFile *f, void *priv, size_t unused) endp->bufpq_size = qemu_get_be32(f); for (i = 0; i < endp->bufpq_size; i++) { - bufp = g_malloc(sizeof(struct buf_packet)); + bufp = g_new(struct buf_packet, 1); bufp->len = qemu_get_be32(f); bufp->status = qemu_get_be32(f); bufp->offset = 0; diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index c675d1bc30..30c68a1e2b 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -284,7 +284,7 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) } quirk = g_malloc0(sizeof(*quirk)); - quirk->mem = g_malloc0(sizeof(MemoryRegion)); + quirk->mem = g_new0(MemoryRegion, 1); quirk->nr_mem = 1; memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev, @@ -319,7 +319,7 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) } quirk = g_malloc0(sizeof(*quirk)); - quirk->mem = g_malloc0(sizeof(MemoryRegion) * 2); + quirk->mem = g_new0(MemoryRegion, 2); quirk->nr_mem = 2; window = quirk->data = g_malloc0(sizeof(*window) + sizeof(VFIOConfigWindowMatch)); @@ -368,7 +368,7 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr) quirk = g_malloc0(sizeof(*quirk)); mirror = quirk->data = g_malloc0(sizeof(*mirror)); - mirror->mem = quirk->mem = g_malloc0(sizeof(MemoryRegion)); + mirror->mem = quirk->mem = g_new0(MemoryRegion, 1); quirk->nr_mem = 1; mirror->vdev = vdev; mirror->offset = 0x4000; @@ -544,7 +544,7 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) quirk = g_malloc0(sizeof(*quirk)); quirk->data = data = g_malloc0(sizeof(*data)); - quirk->mem = g_malloc0(sizeof(MemoryRegion) * 2); + quirk->mem = g_new0(MemoryRegion, 2); quirk->nr_mem = 2; data->vdev = vdev; @@ -661,7 +661,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) } quirk = g_malloc0(sizeof(*quirk)); - quirk->mem = g_malloc0(sizeof(MemoryRegion) * 4); + quirk->mem = g_new0(MemoryRegion, 4); quirk->nr_mem = 4; bar5 = quirk->data = g_malloc0(sizeof(*bar5) + (sizeof(VFIOConfigWindowMatch) * 2)); @@ -756,7 +756,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) quirk = g_malloc0(sizeof(*quirk)); mirror = quirk->data = g_malloc0(sizeof(*mirror)); - mirror->mem = quirk->mem = g_malloc0(sizeof(MemoryRegion)); + mirror->mem = quirk->mem = g_new0(MemoryRegion, 1); quirk->nr_mem = 1; mirror->vdev = vdev; mirror->offset = 0x88000; @@ -775,7 +775,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) if (vdev->has_vga) { quirk = g_malloc0(sizeof(*quirk)); mirror = quirk->data = g_malloc0(sizeof(*mirror)); - mirror->mem = quirk->mem = g_malloc0(sizeof(MemoryRegion)); + mirror->mem = quirk->mem = g_new0(MemoryRegion, 1); quirk->nr_mem = 1; mirror->vdev = vdev; mirror->offset = 0x1800; @@ -938,7 +938,7 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) } quirk = g_malloc0(sizeof(*quirk)); - quirk->mem = g_malloc0(sizeof(MemoryRegion) * 2); + quirk->mem = g_new0(MemoryRegion, 2); quirk->nr_mem = 2; quirk->data = rtl = g_malloc0(sizeof(*rtl)); rtl->vdev = vdev; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 8fadbcf682..1fb868c244 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -28,6 +28,7 @@ #include "config.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" +#include "hw/pci/pci_bridge.h" #include "qemu/error-report.h" #include "qemu/range.h" #include "sysemu/kvm.h" @@ -586,7 +587,7 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) { vfio_disable_interrupts(vdev); - vdev->msi_vectors = g_malloc0(vdev->msix->entries * sizeof(VFIOMSIVector)); + vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries); vdev->interrupt = VFIO_INT_MSIX; @@ -622,7 +623,7 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); retry: - vdev->msi_vectors = g_malloc0(vdev->nr_vectors * sizeof(VFIOMSIVector)); + vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); for (i = 0; i < vdev->nr_vectors; i++) { VFIOMSIVector *vector = &vdev->msi_vectors[i]; @@ -1524,13 +1525,38 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size) } if (!pci_bus_is_express(vdev->pdev.bus)) { + PCIBus *bus = vdev->pdev.bus; + PCIDevice *bridge; + /* - * Use express capability as-is on PCI bus. It doesn't make much - * sense to even expose, but some drivers (ex. tg3) depend on it - * and guests don't seem to be particular about it. We'll need - * to revist this or force express devices to express buses if we - * ever expose an IOMMU to the guest. + * Traditionally PCI device assignment exposes the PCIe capability + * as-is on non-express buses. The reason being that some drivers + * simply assume that it's there, for example tg3. However when + * we're running on a native PCIe machine type, like Q35, we need + * to hide the PCIe capability. The reason for this is twofold; + * first Windows guests get a Code 10 error when the PCIe capability + * is exposed in this configuration. Therefore express devices won't + * work at all unless they're attached to express buses in the VM. + * Second, a native PCIe machine introduces the possibility of fine + * granularity IOMMUs supporting both translation and isolation. + * Guest code to discover the IOMMU visibility of a device, such as + * IOMMU grouping code on Linux, is very aware of device types and + * valid transitions between bus types. An express device on a non- + * express bus is not a valid combination on bare metal systems. + * + * Drivers that require a PCIe capability to make the device + * functional are simply going to need to have their devices placed + * on a PCIe bus in the VM. */ + while (!pci_bus_is_root(bus)) { + bridge = pci_bridge_get_device(bus); + bus = bridge->bus; + } + + if (pci_bus_is_express(bus)) { + return 0; + } + } else if (pci_bus_is_root(vdev->pdev.bus)) { /* * On a Root Complex bus Endpoints become Root Complex Integrated diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 5c1156ca3b..289b498ca9 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -478,7 +478,7 @@ static int vfio_populate_device(VFIODevice *vbasedev) struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; VFIORegion *ptr; - vdev->regions[i] = g_malloc0(sizeof(VFIORegion)); + vdev->regions[i] = g_new0(VFIORegion, 1); ptr = vdev->regions[i]; reg_info.index = i; ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c index 68f1994434..23f667ef4b 100644 --- a/hw/virtio/dataplane/vring.c +++ b/hw/virtio/dataplane/vring.c @@ -25,15 +25,30 @@ /* vring_map can be coupled with vring_unmap or (if you still have the * value returned in *mr) memory_region_unref. + * Returns NULL on failure. + * Callers that can handle a partial mapping must supply mapped_len pointer to + * get the actual length mapped. + * Passing mapped_len == NULL requires either a full mapping or a failure. */ -static void *vring_map(MemoryRegion **mr, hwaddr phys, hwaddr len, +static void *vring_map(MemoryRegion **mr, hwaddr phys, + hwaddr len, hwaddr *mapped_len, bool is_write) { MemoryRegionSection section = memory_region_find(get_system_memory(), phys, len); + uint64_t size; - if (!section.mr || int128_get64(section.size) < len) { + if (!section.mr) { goto out; } + + size = int128_get64(section.size); + assert(size); + + /* Passing mapped_len == NULL requires either a full mapping or a failure. */ + if (!mapped_len && size < len) { + goto out; + } + if (is_write && section.readonly) { goto out; } @@ -46,6 +61,10 @@ static void *vring_map(MemoryRegion **mr, hwaddr phys, hwaddr len, goto out; } + if (mapped_len) { + *mapped_len = MIN(size, len); + } + *mr = section.mr; return memory_region_get_ram_ptr(section.mr) + section.offset_within_region; @@ -78,7 +97,7 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n) addr = virtio_queue_get_desc_addr(vdev, n); size = virtio_queue_get_desc_size(vdev, n); /* Map the descriptor area as read only */ - ptr = vring_map(&vring->mr_desc, addr, size, false); + ptr = vring_map(&vring->mr_desc, addr, size, NULL, false); if (!ptr) { error_report("Failed to map 0x%" HWADDR_PRIx " byte for vring desc " "at 0x%" HWADDR_PRIx, @@ -92,7 +111,7 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n) /* Add the size of the used_event_idx */ size += sizeof(uint16_t); /* Map the driver area as read only */ - ptr = vring_map(&vring->mr_avail, addr, size, false); + ptr = vring_map(&vring->mr_avail, addr, size, NULL, false); if (!ptr) { error_report("Failed to map 0x%" HWADDR_PRIx " byte for vring avail " "at 0x%" HWADDR_PRIx, @@ -106,7 +125,7 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n) /* Add the size of the avail_event_idx */ size += sizeof(uint16_t); /* Map the device area as read-write */ - ptr = vring_map(&vring->mr_used, addr, size, true); + ptr = vring_map(&vring->mr_used, addr, size, NULL, true); if (!ptr) { error_report("Failed to map 0x%" HWADDR_PRIx " byte for vring used " "at 0x%" HWADDR_PRIx, @@ -206,6 +225,7 @@ static int get_desc(Vring *vring, VirtQueueElement *elem, struct iovec *iov; hwaddr *addr; MemoryRegion *mr; + hwaddr len; if (desc->flags & VRING_DESC_F_WRITE) { num = &elem->in_num; @@ -224,26 +244,30 @@ static int get_desc(Vring *vring, VirtQueueElement *elem, } } - /* Stop for now if there are not enough iovecs available. */ - if (*num >= VIRTQUEUE_MAX_SIZE) { - error_report("Invalid SG num: %u", *num); - return -EFAULT; - } + while (desc->len) { + /* Stop for now if there are not enough iovecs available. */ + if (*num >= VIRTQUEUE_MAX_SIZE) { + error_report("Invalid SG num: %u", *num); + return -EFAULT; + } - /* TODO handle non-contiguous memory across region boundaries */ - iov->iov_base = vring_map(&mr, desc->addr, desc->len, - desc->flags & VRING_DESC_F_WRITE); - if (!iov->iov_base) { - error_report("Failed to map descriptor addr %#" PRIx64 " len %u", - (uint64_t)desc->addr, desc->len); - return -EFAULT; + iov->iov_base = vring_map(&mr, desc->addr, desc->len, &len, + desc->flags & VRING_DESC_F_WRITE); + if (!iov->iov_base) { + error_report("Failed to map descriptor addr %#" PRIx64 " len %u", + (uint64_t)desc->addr, desc->len); + return -EFAULT; + } + + /* The MemoryRegion is looked up again and unref'ed later, leave the + * ref in place. */ + (iov++)->iov_len = len; + *addr++ = desc->addr; + desc->len -= len; + desc->addr += len; + *num += 1; } - /* The MemoryRegion is looked up again and unref'ed later, leave the - * ref in place. */ - iov->iov_len = desc->len; - *addr = desc->addr; - *num += 1; return 0; } @@ -257,6 +281,21 @@ static void copy_in_vring_desc(VirtIODevice *vdev, host->next = virtio_lduw_p(vdev, &guest->next); } +static bool read_vring_desc(VirtIODevice *vdev, + hwaddr guest, + struct vring_desc *host) +{ + if (address_space_read(&address_space_memory, guest, MEMTXATTRS_UNSPECIFIED, + (uint8_t *)host, sizeof *host)) { + return false; + } + host->addr = virtio_tswap64(vdev, host->addr); + host->len = virtio_tswap32(vdev, host->len); + host->flags = virtio_tswap16(vdev, host->flags); + host->next = virtio_tswap16(vdev, host->next); + return true; +} + /* This is stolen from linux/drivers/vhost/vhost.c. */ static int get_indirect(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem, struct vring_desc *indirect) @@ -284,23 +323,16 @@ static int get_indirect(VirtIODevice *vdev, Vring *vring, } do { - struct vring_desc *desc_ptr; - MemoryRegion *mr; - /* Translate indirect descriptor */ - desc_ptr = vring_map(&mr, - indirect->addr + found * sizeof(desc), - sizeof(desc), false); - if (!desc_ptr) { - error_report("Failed to map indirect descriptor " + if (!read_vring_desc(vdev, indirect->addr + found * sizeof(desc), + &desc)) { + error_report("Failed to read indirect descriptor " "addr %#" PRIx64 " len %zu", (uint64_t)indirect->addr + found * sizeof(desc), sizeof(desc)); vring->broken = true; return -EFAULT; } - copy_in_vring_desc(vdev, desc_ptr, &desc); - memory_region_unref(mr); /* Ensure descriptor has been loaded before accessing fields */ barrier(); /* read_barrier_depends(); */ diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 1d5f684eb0..b734a601a1 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -156,7 +156,7 @@ static int vhost_kernel_set_owner(struct vhost_dev *dev) static int vhost_kernel_reset_device(struct vhost_dev *dev) { - return vhost_kernel_call(dev, VHOST_RESET_DEVICE, NULL); + return vhost_kernel_call(dev, VHOST_RESET_OWNER, NULL); } static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 0aa8e0d93a..1b6c5ac238 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -43,7 +43,7 @@ typedef enum VhostUserRequest { VHOST_USER_GET_FEATURES = 1, VHOST_USER_SET_FEATURES = 2, VHOST_USER_SET_OWNER = 3, - VHOST_USER_RESET_DEVICE = 4, + VHOST_USER_RESET_OWNER = 4, VHOST_USER_SET_MEM_TABLE = 5, VHOST_USER_SET_LOG_BASE = 6, VHOST_USER_SET_LOG_FD = 7, @@ -75,6 +75,11 @@ typedef struct VhostUserMemory { VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; } VhostUserMemory; +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + typedef struct VhostUserMsg { VhostUserRequest request; @@ -89,6 +94,7 @@ typedef struct VhostUserMsg { struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; + VhostUserLog log; } payload; } QEMU_PACKED VhostUserMsg; @@ -115,8 +121,8 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) r = qemu_chr_fe_read_all(chr, p, size); if (r != size) { - error_report("Failed to read msg header. Read %d instead of %d.", r, - size); + error_report("Failed to read msg header. Read %d instead of %d." + " Original request %d.", r, size, msg->request); goto fail; } @@ -157,7 +163,7 @@ static bool vhost_user_one_time_request(VhostUserRequest request) { switch (request) { case VHOST_USER_SET_OWNER: - case VHOST_USER_RESET_DEVICE: + case VHOST_USER_RESET_OWNER: case VHOST_USER_SET_MEM_TABLE: case VHOST_USER_GET_QUEUE_NUM: return true; @@ -200,8 +206,9 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, VhostUserMsg msg = { .request = VHOST_USER_SET_LOG_BASE, .flags = VHOST_USER_VERSION, - .payload.u64 = base, - .size = sizeof(m.payload.u64), + .payload.log.mmap_size = log->size * sizeof(*(log->log)), + .payload.log.mmap_offset = 0, + .size = sizeof(msg.payload.log), }; if (shmfd && log->fd != -1) { @@ -265,8 +272,8 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, return -1; } - msg.size = sizeof(m.payload.memory.nregions); - msg.size += sizeof(m.payload.memory.padding); + msg.size = sizeof(msg.payload.memory.nregions); + msg.size += sizeof(msg.payload.memory.padding); msg.size += fd_num * sizeof(VhostUserMemoryRegion); vhost_user_write(dev, &msg, fds, fd_num); @@ -281,7 +288,7 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev, .request = VHOST_USER_SET_VRING_ADDR, .flags = VHOST_USER_VERSION, .payload.addr = *addr, - .size = sizeof(*addr), + .size = sizeof(msg.payload.addr), }; vhost_user_write(dev, &msg, NULL, 0); @@ -304,7 +311,7 @@ static int vhost_set_vring(struct vhost_dev *dev, .request = request, .flags = VHOST_USER_VERSION, .payload.state = *ring, - .size = sizeof(*ring), + .size = sizeof(msg.payload.state), }; vhost_user_write(dev, &msg, NULL, 0); @@ -326,18 +333,23 @@ static int vhost_user_set_vring_base(struct vhost_dev *dev, static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) { - struct vhost_vring_state state = { - .index = dev->vq_index, - .num = enable, - }; + int i; - if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) { + if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { return -1; } - return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); -} + for (i = 0; i < dev->nvqs; ++i) { + struct vhost_vring_state state = { + .index = dev->vq_index + i, + .num = enable, + }; + vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); + } + + return 0; +} static int vhost_user_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) @@ -346,7 +358,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev, .request = VHOST_USER_GET_VRING_BASE, .flags = VHOST_USER_VERSION, .payload.state = *ring, - .size = sizeof(*ring), + .size = sizeof(msg.payload.state), }; vhost_user_write(dev, &msg, NULL, 0); @@ -361,7 +373,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev, return -1; } - if (msg.size != sizeof(m.payload.state)) { + if (msg.size != sizeof(msg.payload.state)) { error_report("Received bad msg size."); return -1; } @@ -381,7 +393,7 @@ static int vhost_set_vring_file(struct vhost_dev *dev, .request = request, .flags = VHOST_USER_VERSION, .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, - .size = sizeof(m.payload.u64), + .size = sizeof(msg.payload.u64), }; if (ioeventfd_enabled() && file->fd > 0) { @@ -413,7 +425,7 @@ static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) .request = request, .flags = VHOST_USER_VERSION, .payload.u64 = u64, - .size = sizeof(m.payload.u64), + .size = sizeof(msg.payload.u64), }; vhost_user_write(dev, &msg, NULL, 0); @@ -456,7 +468,7 @@ static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) return -1; } - if (msg.size != sizeof(m.payload.u64)) { + if (msg.size != sizeof(msg.payload.u64)) { error_report("Received bad msg size."); return -1; } @@ -486,7 +498,7 @@ static int vhost_user_set_owner(struct vhost_dev *dev) static int vhost_user_reset_device(struct vhost_dev *dev) { VhostUserMsg msg = { - .request = VHOST_USER_RESET_DEVICE, + .request = VHOST_USER_RESET_OWNER, .flags = VHOST_USER_VERSION, }; @@ -592,7 +604,7 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) msg.request = VHOST_USER_SEND_RARP; msg.flags = VHOST_USER_VERSION; memcpy((char *)&msg.payload.u64, mac_addr, 6); - msg.size = sizeof(m.payload.u64); + msg.size = sizeof(msg.payload.u64); err = vhost_user_write(dev, &msg, NULL, 0); return err; diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index c419b17143..9671635e63 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -37,9 +37,11 @@ static void balloon_page(void *addr, int deflate) { #if defined(__linux__) - if (!kvm_enabled() || kvm_has_sync_mmu()) + if (!qemu_balloon_is_inhibited() && (!kvm_enabled() || + kvm_has_sync_mmu())) { qemu_madvise(addr, TARGET_PAGE_SIZE, deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED); + } #endif } diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index febda76b94..81c7cdd507 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -56,6 +56,9 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) assert(vdc->get_features != NULL); vdev->host_features = vdc->get_features(vdev, vdev->host_features, errp); + if (klass->post_plugged != NULL) { + klass->post_plugged(qbus->parent, errp); + } } /* Reset the virtio_bus */ diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index f55dd2bf8e..94667e6256 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -86,6 +86,129 @@ static void virtio_pci_save_config(DeviceState *d, QEMUFile *f) qemu_put_be16(f, vdev->config_vector); } +static void virtio_pci_load_modern_queue_state(VirtIOPCIQueue *vq, + QEMUFile *f) +{ + vq->num = qemu_get_be16(f); + vq->enabled = qemu_get_be16(f); + vq->desc[0] = qemu_get_be32(f); + vq->desc[1] = qemu_get_be32(f); + vq->avail[0] = qemu_get_be32(f); + vq->avail[1] = qemu_get_be32(f); + vq->used[0] = qemu_get_be32(f); + vq->used[1] = qemu_get_be32(f); +} + +static bool virtio_pci_has_extra_state(DeviceState *d) +{ + VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); + + return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA; +} + +static int get_virtio_pci_modern_state(QEMUFile *f, void *pv, size_t size) +{ + VirtIOPCIProxy *proxy = pv; + int i; + + proxy->dfselect = qemu_get_be32(f); + proxy->gfselect = qemu_get_be32(f); + proxy->guest_features[0] = qemu_get_be32(f); + proxy->guest_features[1] = qemu_get_be32(f); + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + virtio_pci_load_modern_queue_state(&proxy->vqs[i], f); + } + + return 0; +} + +static void virtio_pci_save_modern_queue_state(VirtIOPCIQueue *vq, + QEMUFile *f) +{ + qemu_put_be16(f, vq->num); + qemu_put_be16(f, vq->enabled); + qemu_put_be32(f, vq->desc[0]); + qemu_put_be32(f, vq->desc[1]); + qemu_put_be32(f, vq->avail[0]); + qemu_put_be32(f, vq->avail[1]); + qemu_put_be32(f, vq->used[0]); + qemu_put_be32(f, vq->used[1]); +} + +static void put_virtio_pci_modern_state(QEMUFile *f, void *pv, size_t size) +{ + VirtIOPCIProxy *proxy = pv; + int i; + + qemu_put_be32(f, proxy->dfselect); + qemu_put_be32(f, proxy->gfselect); + qemu_put_be32(f, proxy->guest_features[0]); + qemu_put_be32(f, proxy->guest_features[1]); + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + virtio_pci_save_modern_queue_state(&proxy->vqs[i], f); + } +} + +static const VMStateInfo vmstate_info_virtio_pci_modern_state = { + .name = "virtqueue_state", + .get = get_virtio_pci_modern_state, + .put = put_virtio_pci_modern_state, +}; + +static bool virtio_pci_modern_state_needed(void *opaque) +{ + VirtIOPCIProxy *proxy = opaque; + + return !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN); +} + +static const VMStateDescription vmstate_virtio_pci_modern_state = { + .name = "virtio_pci/modern_state", + .version_id = 1, + .minimum_version_id = 1, + .needed = &virtio_pci_modern_state_needed, + .fields = (VMStateField[]) { + { + .name = "modern_state", + .version_id = 0, + .field_exists = NULL, + .size = 0, + .info = &vmstate_info_virtio_pci_modern_state, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_virtio_pci = { + .name = "virtio_pci", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_virtio_pci_modern_state, + NULL + } +}; + +static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f) +{ + VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); + + vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL); +} + +static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f) +{ + VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); + + return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1); +} + static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -133,6 +256,7 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) if (vector != VIRTIO_NO_VECTOR) { return msix_vector_use(&proxy->pci_dev, vector); } + return 0; } @@ -146,7 +270,10 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, EventNotifier *notifier = virtio_queue_get_host_notifier(vq); bool legacy = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_LEGACY); bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN); + bool fast_mmio = kvm_ioeventfd_any_length_enabled(); + bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; MemoryRegion *modern_mr = &proxy->notify.mr; + MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr; MemoryRegion *legacy_mr = &proxy->bar; hwaddr modern_addr = QEMU_VIRTIO_PCI_QUEUE_MEM_MULT * virtio_get_queue_index(vq); @@ -162,8 +289,17 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, } virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); if (modern) { - memory_region_add_eventfd(modern_mr, modern_addr, 2, - true, n, notifier); + if (fast_mmio) { + memory_region_add_eventfd(modern_mr, modern_addr, 0, + false, n, notifier); + } else { + memory_region_add_eventfd(modern_mr, modern_addr, 2, + false, n, notifier); + } + if (modern_pio) { + memory_region_add_eventfd(modern_notify_mr, 0, 2, + true, n, notifier); + } } if (legacy) { memory_region_add_eventfd(legacy_mr, legacy_addr, 2, @@ -171,8 +307,17 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, } } else { if (modern) { - memory_region_del_eventfd(modern_mr, modern_addr, 2, - true, n, notifier); + if (fast_mmio) { + memory_region_del_eventfd(modern_mr, modern_addr, 0, + false, n, notifier); + } else { + memory_region_del_eventfd(modern_mr, modern_addr, 2, + false, n, notifier); + } + if (modern_pio) { + memory_region_del_eventfd(modern_notify_mr, 0, 2, + true, n, notifier); + } } if (legacy) { memory_region_del_eventfd(legacy_mr, legacy_addr, 2, @@ -1239,6 +1384,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, proxy->vqs[vdev->queue_sel].avail[0], ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 | proxy->vqs[vdev->queue_sel].used[0]); + proxy->vqs[vdev->queue_sel].enabled = 1; break; case VIRTIO_PCI_COMMON_Q_DESCLO: proxy->vqs[vdev->queue_sel].desc[0] = val; @@ -1281,6 +1427,17 @@ static void virtio_pci_notify_write(void *opaque, hwaddr addr, } } +static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + VirtIODevice *vdev = opaque; + unsigned queue = val; + + if (queue < VIRTIO_QUEUE_MAX) { + virtio_queue_notify(vdev, queue); + } +} + static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr, unsigned size) { @@ -1374,6 +1531,16 @@ static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy) }, .endianness = DEVICE_LITTLE_ENDIAN, }; + static const MemoryRegionOps notify_pio_ops = { + .read = virtio_pci_notify_read, + .write = virtio_pci_notify_write_pio, + .impl = { + .min_access_size = 1, + .max_access_size = 4, + }, + .endianness = DEVICE_LITTLE_ENDIAN, + }; + memory_region_init_io(&proxy->common.mr, OBJECT(proxy), &common_ops, @@ -1398,30 +1565,60 @@ static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy) virtio_bus_get_device(&proxy->bus), "virtio-pci-notify", proxy->notify.size); + + memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy), + ¬ify_pio_ops, + virtio_bus_get_device(&proxy->bus), + "virtio-pci-notify-pio", + proxy->notify.size); } static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy, VirtIOPCIRegion *region, - struct virtio_pci_cap *cap) + struct virtio_pci_cap *cap, + MemoryRegion *mr, + uint8_t bar) { - memory_region_add_subregion(&proxy->modern_bar, - region->offset, - ®ion->mr); + memory_region_add_subregion(mr, region->offset, ®ion->mr); cap->cfg_type = region->type; - cap->bar = proxy->modern_mem_bar; + cap->bar = bar; cap->offset = cpu_to_le32(region->offset); cap->length = cpu_to_le32(region->size); virtio_pci_add_mem_cap(proxy, cap); + +} + +static void virtio_pci_modern_mem_region_map(VirtIOPCIProxy *proxy, + VirtIOPCIRegion *region, + struct virtio_pci_cap *cap) +{ + virtio_pci_modern_region_map(proxy, region, cap, + &proxy->modern_bar, proxy->modern_mem_bar); +} + +static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy, + VirtIOPCIRegion *region, + struct virtio_pci_cap *cap) +{ + virtio_pci_modern_region_map(proxy, region, cap, + &proxy->io_bar, proxy->modern_io_bar); } -static void virtio_pci_modern_region_unmap(VirtIOPCIProxy *proxy, - VirtIOPCIRegion *region) +static void virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy *proxy, + VirtIOPCIRegion *region) { memory_region_del_subregion(&proxy->modern_bar, ®ion->mr); } +static void virtio_pci_modern_io_region_unmap(VirtIOPCIProxy *proxy, + VirtIOPCIRegion *region) +{ + memory_region_del_subregion(&proxy->io_bar, + ®ion->mr); +} + /* This is called by virtio-bus just after the device is plugged. */ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) { @@ -1429,6 +1626,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) VirtioBusState *bus = &proxy->bus; bool legacy = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_LEGACY); bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN); + bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; uint8_t *config; uint32_t size; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); @@ -1467,16 +1665,31 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) .cap.cap_len = sizeof cfg, .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG, }; - struct virtio_pci_cfg_cap *cfg_mask; + struct virtio_pci_notify_cap notify_pio = { + .cap.cap_len = sizeof notify, + .notify_off_multiplier = cpu_to_le32(0x0), + }; - /* TODO: add io access for speed */ + struct virtio_pci_cfg_cap *cfg_mask; virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1); virtio_pci_modern_regions_init(proxy); - virtio_pci_modern_region_map(proxy, &proxy->common, &cap); - virtio_pci_modern_region_map(proxy, &proxy->isr, &cap); - virtio_pci_modern_region_map(proxy, &proxy->device, &cap); - virtio_pci_modern_region_map(proxy, &proxy->notify, ¬ify.cap); + + virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap); + virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap); + virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap); + virtio_pci_modern_mem_region_map(proxy, &proxy->notify, ¬ify.cap); + + if (modern_pio) { + memory_region_init(&proxy->io_bar, OBJECT(proxy), + "virtio-pci-io", 0x4); + + pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar, + PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar); + + virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio, + ¬ify_pio.cap); + } pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar, PCI_BASE_ADDRESS_SPACE_MEMORY | @@ -1532,14 +1745,18 @@ static void virtio_pci_device_unplugged(DeviceState *d) { VirtIOPCIProxy *proxy = VIRTIO_PCI(d); bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN); + bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; virtio_pci_stop_ioeventfd(proxy); if (modern) { - virtio_pci_modern_region_unmap(proxy, &proxy->common); - virtio_pci_modern_region_unmap(proxy, &proxy->isr); - virtio_pci_modern_region_unmap(proxy, &proxy->device); - virtio_pci_modern_region_unmap(proxy, &proxy->notify); + virtio_pci_modern_mem_region_unmap(proxy, &proxy->common); + virtio_pci_modern_mem_region_unmap(proxy, &proxy->isr); + virtio_pci_modern_mem_region_unmap(proxy, &proxy->device); + virtio_pci_modern_mem_region_unmap(proxy, &proxy->notify); + if (modern_pio) { + virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio); + } } } @@ -1559,6 +1776,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) */ proxy->legacy_io_bar = 0; proxy->msix_bar = 1; + proxy->modern_io_bar = 2; proxy->modern_mem_bar = 4; proxy->common.offset = 0x0; @@ -1578,6 +1796,10 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) QEMU_VIRTIO_PCI_QUEUE_MEM_MULT * VIRTIO_QUEUE_MAX; proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG; + proxy->notify_pio.offset = 0x0; + proxy->notify_pio.size = 0x4; + proxy->notify_pio.type = VIRTIO_PCI_CAP_NOTIFY_CFG; + /* subclasses can enforce modern, so do this unconditionally */ memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci", 2 * QEMU_VIRTIO_PCI_QUEUE_MEM_MULT * @@ -1592,6 +1814,29 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) address_space_init(&proxy->modern_as, &proxy->modern_cfg, "virtio-pci-cfg-as"); + if (pci_is_express(pci_dev) && pci_bus_is_express(pci_dev->bus) && + !pci_bus_is_root(pci_dev->bus)) { + int pos; + + pos = pcie_endpoint_cap_init(pci_dev, 0); + assert(pos > 0); + + pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF); + assert(pos > 0); + + /* + * Indicates that this function complies with revision 1.2 of the + * PCI Power Management Interface Specification. + */ + pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3); + } else { + /* + * make future invocations of pci_is_express() return false + * and pci_config_size() return PCI_CONFIG_SPACE_SIZE. + */ + pci_dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS; + } + virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy); if (k->realize) { k->realize(proxy, errp); @@ -1610,9 +1855,15 @@ static void virtio_pci_reset(DeviceState *qdev) { VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev); VirtioBusState *bus = VIRTIO_BUS(&proxy->bus); + int i; + virtio_pci_stop_ioeventfd(proxy); virtio_bus_reset(bus); msix_unuse_all_vectors(&proxy->pci_dev); + + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + proxy->vqs[i].enabled = 0; + } } static Property virtio_pci_properties[] = { @@ -1622,13 +1873,34 @@ static Property virtio_pci_properties[] = { VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT, false), DEFINE_PROP_BIT("disable-modern", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT, true), + DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), + DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false), + DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false), DEFINE_PROP_END_OF_LIST(), }; +static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) +{ + VirtioPCIClass *vpciklass = VIRTIO_PCI_GET_CLASS(qdev); + VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev); + PCIDevice *pci_dev = &proxy->pci_dev; + + if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) && + !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN)) { + pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; + } + + vpciklass->parent_dc_realize(qdev, errp); +} + static void virtio_pci_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass); dc->props = virtio_pci_properties; k->realize = virtio_pci_realize; @@ -1636,6 +1908,8 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data) k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; k->revision = VIRTIO_PCI_ABI_VERSION; k->class_id = PCI_CLASS_OTHERS; + vpciklass->parent_dc_realize = dc->realize; + dc->realize = virtio_pci_dc_realize; dc->reset = virtio_pci_reset; } @@ -2212,6 +2486,9 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data) k->load_config = virtio_pci_load_config; k->save_queue = virtio_pci_save_queue; k->load_queue = virtio_pci_load_queue; + k->save_extra_state = virtio_pci_save_extra_state; + k->load_extra_state = virtio_pci_load_extra_state; + k->has_extra_state = virtio_pci_has_extra_state; k->query_guest_notifiers = virtio_pci_query_guest_notifiers; k->set_host_notifier = virtio_pci_set_host_notifier; k->set_guest_notifiers = virtio_pci_set_guest_notifiers; diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 801c23aef3..a104ff2072 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -72,8 +72,19 @@ typedef struct VirtioBusClass VirtioPCIBusClass; /* virtio version flags */ #define VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT 2 #define VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT 3 +#define VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT 4 #define VIRTIO_PCI_FLAG_DISABLE_LEGACY (1 << VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT) #define VIRTIO_PCI_FLAG_DISABLE_MODERN (1 << VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT) +#define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT) + +/* migrate extra state */ +#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT 4 +#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT) + +/* have pio notification for modern device ? */ +#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT 5 +#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \ + (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT) typedef struct { MSIMessage msg; @@ -94,6 +105,7 @@ typedef struct { typedef struct VirtioPCIClass { PCIDeviceClass parent_class; + DeviceRealize parent_dc_realize; void (*realize)(VirtIOPCIProxy *vpci_dev, Error **errp); } VirtioPCIClass; @@ -104,6 +116,14 @@ typedef struct VirtIOPCIRegion { uint32_t type; } VirtIOPCIRegion; +typedef struct VirtIOPCIQueue { + uint16_t num; + bool enabled; + uint32_t desc[2]; + uint32_t avail[2]; + uint32_t used[2]; +} VirtIOPCIQueue; + struct VirtIOPCIProxy { PCIDevice pci_dev; MemoryRegion bar; @@ -111,11 +131,14 @@ struct VirtIOPCIProxy { VirtIOPCIRegion isr; VirtIOPCIRegion device; VirtIOPCIRegion notify; + VirtIOPCIRegion notify_pio; MemoryRegion modern_bar; + MemoryRegion io_bar; MemoryRegion modern_cfg; AddressSpace modern_as; uint32_t legacy_io_bar; uint32_t msix_bar; + uint32_t modern_io_bar; uint32_t modern_mem_bar; int config_cap; uint32_t flags; @@ -124,13 +147,7 @@ struct VirtIOPCIProxy { uint32_t dfselect; uint32_t gfselect; uint32_t guest_features[2]; - struct { - uint16_t num; - bool enabled; - uint32_t desc[2]; - uint32_t avail[2]; - uint32_t used[2]; - } vqs[VIRTIO_QUEUE_MAX]; + VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX]; bool ioeventfd_disabled; bool ioeventfd_started; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index d0bc72e0e4..1edef5945d 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -448,28 +448,59 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, return in_bytes <= in_total && out_bytes <= out_total; } -void virtqueue_map_sg(struct iovec *sg, hwaddr *addr, - size_t num_sg, int is_write) +static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, + unsigned int *num_sg, unsigned int max_size, + int is_write) { unsigned int i; hwaddr len; - if (num_sg > VIRTQUEUE_MAX_SIZE) { - error_report("virtio: map attempt out of bounds: %zd > %d", - num_sg, VIRTQUEUE_MAX_SIZE); - exit(1); - } + /* Note: this function MUST validate input, some callers + * are passing in num_sg values received over the network. + */ + /* TODO: teach all callers that this can fail, and return failure instead + * of asserting here. + * When we do, we might be able to re-enable NDEBUG below. + */ +#ifdef NDEBUG +#error building with NDEBUG is not supported +#endif + assert(*num_sg <= max_size); - for (i = 0; i < num_sg; i++) { + for (i = 0; i < *num_sg; i++) { len = sg[i].iov_len; sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); - if (sg[i].iov_base == NULL || len != sg[i].iov_len) { + if (!sg[i].iov_base) { error_report("virtio: error trying to map MMIO memory"); exit(1); } + if (len == sg[i].iov_len) { + continue; + } + if (*num_sg >= max_size) { + error_report("virtio: memory split makes iovec too large"); + exit(1); + } + memmove(sg + i + 1, sg + i, sizeof(*sg) * (*num_sg - i)); + memmove(addr + i + 1, addr + i, sizeof(*addr) * (*num_sg - i)); + assert(len < sg[i + 1].iov_len); + sg[i].iov_len = len; + addr[i + 1] += len; + sg[i + 1].iov_len -= len; + ++*num_sg; } } +void virtqueue_map(VirtQueueElement *elem) +{ + virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num, + MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)), + 1); + virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num, + MIN(ARRAY_SIZE(elem->out_sg), ARRAY_SIZE(elem->out_addr)), + 0); +} + int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) { unsigned int i, head, max; @@ -531,8 +562,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max); /* Now map what we have collected */ - virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1); - virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0); + virtqueue_map(elem); elem->index = head; @@ -1086,6 +1116,16 @@ static bool virtio_ringsize_needed(void *opaque) return false; } +static bool virtio_extra_state_needed(void *opaque) +{ + VirtIODevice *vdev = opaque; + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + + return k->has_extra_state && + k->has_extra_state(qbus->parent); +} + static void put_virtqueue_state(QEMUFile *f, void *pv, size_t size) { VirtIODevice *vdev = pv; @@ -1180,6 +1220,53 @@ static const VMStateDescription vmstate_virtio_ringsize = { } }; +static int get_extra_state(QEMUFile *f, void *pv, size_t size) +{ + VirtIODevice *vdev = pv; + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + + if (!k->load_extra_state) { + return -1; + } else { + return k->load_extra_state(qbus->parent, f); + } +} + +static void put_extra_state(QEMUFile *f, void *pv, size_t size) +{ + VirtIODevice *vdev = pv; + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + + k->save_extra_state(qbus->parent, f); +} + +static const VMStateInfo vmstate_info_extra_state = { + .name = "virtqueue_extra_state", + .get = get_extra_state, + .put = put_extra_state, +}; + +static const VMStateDescription vmstate_virtio_extra_state = { + .name = "virtio/extra_state", + .version_id = 1, + .minimum_version_id = 1, + .needed = &virtio_extra_state_needed, + .fields = (VMStateField[]) { + { + .name = "extra_state", + .version_id = 0, + .field_exists = NULL, + .size = 0, + .info = &vmstate_info_extra_state, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio_device_endian = { .name = "virtio/device_endian", .version_id = 1, @@ -1215,6 +1302,7 @@ static const VMStateDescription vmstate_virtio = { &vmstate_virtio_64bit_features, &vmstate_virtio_virtqueues, &vmstate_virtio_ringsize, + &vmstate_virtio_extra_state, NULL } }; diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index 0efee112fd..3d8686d550 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -1937,7 +1937,7 @@ static int xen_pt_config_reg_init(XenPCIPassthroughState *s, break; case 4: rc = xen_host_pci_get_long(&s->real_device, offset, &val); break; - default: assert(1); + default: abort(); } if (rc) { /* Serious issues when we cannot read the host values! */ @@ -1982,7 +1982,7 @@ static int xen_pt_config_reg_init(XenPCIPassthroughState *s, break; case 4: pci_set_long(s->dev.config + offset, val); break; - default: assert(1); + default: abort(); } /* set register value pointer to the data. */ reg_entry->ptr.byte = s->dev.config + offset; diff --git a/hw/xenpv/xen_domainbuild.c b/hw/xenpv/xen_domainbuild.c index c0ab7537df..ac0e5ac9f0 100644 --- a/hw/xenpv/xen_domainbuild.c +++ b/hw/xenpv/xen_domainbuild.c @@ -234,7 +234,7 @@ int xen_domain_build_pv(const char *kernel, const char *ramdisk, int rc; memcpy(uuid, qemu_uuid, sizeof(uuid)); - rc = xc_domain_create(xen_xc, ssidref, uuid, flags, &xen_domid); + rc = xen_domain_create(xen_xc, ssidref, uuid, flags, &xen_domid); if (rc < 0) { fprintf(stderr, "xen: xc_domain_create() failed\n"); goto err; diff --git a/include/block/accounting.h b/include/block/accounting.h index 66637cdfed..0f46cb4ec1 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -2,6 +2,7 @@ * QEMU System Emulator block accounting * * Copyright (c) 2011 Christoph Hellwig + * Copyright (c) 2015 Igalia, S.L. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -25,8 +26,12 @@ #define BLOCK_ACCOUNTING_H #include <stdint.h> +#include <stdbool.h> #include "qemu/typedefs.h" +#include "qemu/timed-average.h" + +typedef struct BlockAcctTimedStats BlockAcctTimedStats; enum BlockAcctType { BLOCK_ACCT_READ, @@ -35,11 +40,23 @@ enum BlockAcctType { BLOCK_MAX_IOTYPE, }; +struct BlockAcctTimedStats { + TimedAverage latency[BLOCK_MAX_IOTYPE]; + unsigned interval_length; /* in seconds */ + QSLIST_ENTRY(BlockAcctTimedStats) entries; +}; + typedef struct BlockAcctStats { uint64_t nr_bytes[BLOCK_MAX_IOTYPE]; uint64_t nr_ops[BLOCK_MAX_IOTYPE]; + uint64_t invalid_ops[BLOCK_MAX_IOTYPE]; + uint64_t failed_ops[BLOCK_MAX_IOTYPE]; uint64_t total_time_ns[BLOCK_MAX_IOTYPE]; uint64_t merged[BLOCK_MAX_IOTYPE]; + int64_t last_access_time_ns; + QSLIST_HEAD(, BlockAcctTimedStats) intervals; + bool account_invalid; + bool account_failed; } BlockAcctStats; typedef struct BlockAcctCookie { @@ -48,10 +65,21 @@ typedef struct BlockAcctCookie { enum BlockAcctType type; } BlockAcctCookie; +void block_acct_init(BlockAcctStats *stats, bool account_invalid, + bool account_failed); +void block_acct_cleanup(BlockAcctStats *stats); +void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length); +BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats, + BlockAcctTimedStats *s); void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, int64_t bytes, enum BlockAcctType type); void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie); +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie); +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type); void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, int num_requests); +int64_t block_acct_idle_time_ns(BlockAcctStats *stats); +double block_acct_queue_depth(BlockAcctTimedStats *stats, + enum BlockAcctType type); #endif diff --git a/include/block/aio.h b/include/block/aio.h index bcc7d43f6a..e086e3b4ee 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -124,6 +124,11 @@ struct AioContext { QEMUTimerListGroup tlg; int external_disable_cnt; + + /* epoll(7) state used when built with CONFIG_EPOLL */ + int epollfd; + bool epoll_enabled; + bool epoll_available; }; /** @@ -209,6 +214,11 @@ void aio_notify(AioContext *ctx); void aio_notify_accept(AioContext *ctx); /** + * aio_bh_call: Executes callback function of the specified BH. + */ +void aio_bh_call(QEMUBH *bh); + +/** * aio_bh_poll: Poll bottom halves for an AioContext. * * These are internal functions used by the QEMU main loop. @@ -401,6 +411,17 @@ static inline void aio_enable_external(AioContext *ctx) } /** + * aio_external_disabled: + * @ctx: the aio context + * + * Return true if the external clients are disabled. + */ +static inline bool aio_external_disabled(AioContext *ctx) +{ + return atomic_read(&ctx->external_disable_cnt); +} + +/** * aio_node_check: * @ctx: the aio context * @is_external: Whether or not the checked node is an external event source. @@ -413,4 +434,12 @@ static inline bool aio_node_check(AioContext *ctx, bool is_external) return !is_external || !atomic_read(&ctx->external_disable_cnt); } +/** + * aio_context_setup: + * @ctx: the aio context + * + * Initialize the aio context. + */ +void aio_context_setup(AioContext *ctx, Error **errp); + #endif diff --git a/include/block/block.h b/include/block/block.h index 610db923d5..3477328737 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -14,6 +14,7 @@ typedef struct BlockDriver BlockDriver; typedef struct BlockJob BlockJob; typedef struct BdrvChild BdrvChild; typedef struct BdrvChildRole BdrvChildRole; +typedef struct BlockJobTxn BlockJobTxn; typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ @@ -60,7 +61,7 @@ typedef enum { * opened with BDRV_O_UNMAP. */ BDRV_REQ_MAY_UNMAP = 0x4, - BDRV_REQ_NO_COPY_ON_READ = 0x8, + BDRV_REQ_NO_SERIALISING = 0x8, } BdrvRequestFlags; typedef struct BlockSizes { @@ -247,7 +248,7 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); -int coroutine_fn bdrv_co_no_copy_on_readv(BlockDriverState *bs, +int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); @@ -335,10 +336,18 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb); typedef struct BlockRequest { /* Fields to be filled by multiwrite caller */ - int64_t sector; - int nb_sectors; - int flags; - QEMUIOVector *qiov; + union { + struct { + int64_t sector; + int nb_sectors; + int flags; + QEMUIOVector *qiov; + }; + struct { + int req; + void *buf; + }; + }; BlockCompletionFunc *cb; void *opaque; @@ -493,7 +502,6 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int nr_sectors); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int nr_sectors); -void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap); void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); diff --git a/include/block/block_int.h b/include/block/block_int.h index 3ceeb5a940..4012e36437 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -60,11 +60,19 @@ #define BLOCK_PROBE_BUF_SIZE 512 +enum BdrvTrackedRequestType { + BDRV_TRACKED_READ, + BDRV_TRACKED_WRITE, + BDRV_TRACKED_FLUSH, + BDRV_TRACKED_IOCTL, + BDRV_TRACKED_DISCARD, +}; + typedef struct BdrvTrackedRequest { BlockDriverState *bs; int64_t offset; unsigned int bytes; - bool is_write; + enum BdrvTrackedRequestType type; bool serialising; int64_t overlap_offset; @@ -219,7 +227,6 @@ struct BlockDriver { void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); /* to control generic scsi devices */ - int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf); BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque); @@ -288,6 +295,12 @@ struct BlockDriver { */ int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); + /** + * Drain and stop any internal sources of requests in the driver, and + * remain so until next I/O callback (e.g. bdrv_co_writev) is called. + */ + void (*bdrv_drain)(BlockDriverState *bs); + QLIST_ENTRY(BlockDriver) list; }; @@ -390,7 +403,10 @@ struct BlockDriverState { /* number of in-flight serialising requests */ unsigned int serialising_in_flight; - /* I/O throttling */ + /* I/O throttling. + * throttle_state tells us if this BDS has I/O limits configured. + * io_limits_enabled tells us if they are currently being + * enforced, but it can be temporarily set to false */ CoQueue throttled_reqs[2]; bool io_limits_enabled; /* The following fields are protected by the ThrottleGroup lock. @@ -473,6 +489,8 @@ extern BlockDriver bdrv_file; extern BlockDriver bdrv_raw; extern BlockDriver bdrv_qcow2; +extern QTAILQ_HEAD(BdrvStates, BlockDriverState) bdrv_states; + /** * bdrv_setup_io_funcs: * @@ -651,6 +669,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, * @on_target_error: The action to take upon error writing to the target. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. + * @txn: Transaction that this job is part of (may be NULL). * * Start a backup operation on @bs. Clusters in @bs are written to @target * until the job is cancelled or manually completed. @@ -661,7 +680,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockCompletionFunc *cb, void *opaque, - Error **errp); + BlockJobTxn *txn, Error **errp); void blk_set_bs(BlockBackend *blk, BlockDriverState *bs); @@ -675,4 +694,7 @@ void blk_dev_resize_cb(BlockBackend *blk); void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); bool bdrv_requests_pending(BlockDriverState *bs); +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); +void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); + #endif /* BLOCK_INT_H */ diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 289b13f0c0..d84ccd8d2c 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -50,6 +50,26 @@ typedef struct BlockJobDriver { * manually. */ void (*complete)(BlockJob *job, Error **errp); + + /** + * If the callback is not NULL, it will be invoked when all the jobs + * belonging to the same transaction complete; or upon this job's + * completion if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*commit)(BlockJob *job); + + /** + * If the callback is not NULL, it will be invoked when any job in the + * same transaction fails; or upon this job's failure (due to error or + * cancellation) if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*abort)(BlockJob *job); } BlockJobDriver; /** @@ -130,6 +150,21 @@ struct BlockJob { /** The opaque value that is passed to the completion function. */ void *opaque; + + /** Reference count of the block job */ + int refcnt; + + /* True if this job has reported completion by calling block_job_completed. + */ + bool completed; + + /* ret code passed to block_job_completed. + */ + int ret; + + /** Non-NULL if this job is part of a transaction */ + BlockJobTxn *txn; + QLIST_ENTRY(BlockJob) txn_list; }; /** @@ -174,12 +209,21 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); void block_job_yield(BlockJob *job); /** - * block_job_release: + * block_job_ref: * @bs: The block device. * - * Release job resources when an error occurred or job completed. + * Grab a reference to the block job. Should be paired with block_job_unref. */ -void block_job_release(BlockDriverState *bs); +void block_job_ref(BlockJob *job); + +/** + * block_job_unref: + * @bs: The block device. + * + * Release reference to the block job and release resources if it is the last + * reference. + */ +void block_job_unref(BlockJob *job); /** * block_job_completed: @@ -364,4 +408,39 @@ void block_job_defer_to_main_loop(BlockJob *job, BlockJobDeferToMainLoopFn *fn, void *opaque); +/** + * block_job_txn_new: + * + * Allocate and return a new block job transaction. Jobs can be added to the + * transaction using block_job_txn_add_job(). + * + * The transaction is automatically freed when the last job completes or is + * cancelled. + * + * All jobs in the transaction either complete successfully or fail/cancel as a + * group. Jobs wait for each other before completing. Cancelling one job + * cancels all jobs in the transaction. + */ +BlockJobTxn *block_job_txn_new(void); + +/** + * block_job_txn_unref: + * + * Release a reference that was previously acquired with block_job_txn_add_job + * or block_job_txn_new. If it's the last reference to the object, it will be + * freed. + */ +void block_job_txn_unref(BlockJobTxn *txn); + +/** + * block_job_txn_add_job: + * @txn: The transaction (may be NULL) + * @job: Job to add to the transaction + * + * Add @job to the transaction. The @job must not already be in a transaction. + * The caller must call either block_job_txn_unref() or block_job_completed() + * to release the reference that is automatically grabbed here. + */ +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job); + #endif diff --git a/include/block/snapshot.h b/include/block/snapshot.h index 770d9bbc8c..c6910da63a 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -63,9 +63,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id, const char *name, Error **errp); -void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, - const char *id_or_name, - Error **errp); +int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, + const char *id_or_name, + Error **errp); int bdrv_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_info); int bdrv_snapshot_load_tmp(BlockDriverState *bs, @@ -75,4 +75,22 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, const char *id_or_name, Error **errp); + + +/* Group operations. All block drivers are involved. + * These functions will properly handle dataplane (take aio_context_acquire + * when appropriate for appropriate block drivers */ + +bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs); +int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs, + Error **err); +int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bsd_bs); +int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs); +int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + BlockDriverState *vm_state_bs, + uint64_t vm_state_size, + BlockDriverState **first_bad_bs); + +BlockDriverState *bdrv_all_find_vmstate_bs(void); + #endif diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index f9998b9732..83b1781afc 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -174,11 +174,13 @@ extern unsigned long reserved_va; #define TARGET_PAGE_MASK ~(TARGET_PAGE_SIZE - 1) #define TARGET_PAGE_ALIGN(addr) (((addr) + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK) -/* ??? These should be the larger of uintptr_t and target_ulong. */ +/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even + * when intptr_t is 32-bit and we are aligning a long long. + */ extern uintptr_t qemu_real_host_page_size; -extern uintptr_t qemu_real_host_page_mask; +extern intptr_t qemu_real_host_page_mask; extern uintptr_t qemu_host_page_size; -extern uintptr_t qemu_host_page_mask; +extern intptr_t qemu_host_page_mask; #define HOST_PAGE_ALIGN(addr) (((addr) + qemu_host_page_size - 1) & qemu_host_page_mask) #define REAL_HOST_PAGE_ALIGN(addr) (((addr) + qemu_real_host_page_size - 1) & \ diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 9fb1d541d4..85aa4033e7 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -64,8 +64,12 @@ typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr); void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); /* This should not be used by devices. */ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr); +RAMBlock *qemu_ram_block_by_name(const char *name); +RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, + ram_addr_t *ram_addr, ram_addr_t *offset); void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev); void qemu_ram_unset_idstr(ram_addr_t addr); +const char *qemu_ram_get_idstr(RAMBlock *rb); void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf, int len, int is_write); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 9b93b9b47d..d900b0d078 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -72,7 +72,6 @@ void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb, void cpu_gen_init(void); bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc); -void page_size_init(void); void QEMU_NORETURN cpu_resume_from_signal(CPUState *cpu, void *puc); void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); @@ -190,6 +189,7 @@ struct TranslationBlock { #define CF_LAST_IO 0x8000 /* Last insn may be an IO access. */ #define CF_NOCACHE 0x10000 /* To be freed after execution */ #define CF_USE_ICOUNT 0x20000 +#define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */ void *tc_ptr; /* pointer to the translated code */ uint8_t *tc_search; /* pointer to search data */ diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 3360ac5fde..7115154bc1 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -22,8 +22,6 @@ #ifndef CONFIG_USER_ONLY #include "hw/xen/xen.h" -typedef struct RAMBlock RAMBlock; - struct RAMBlock { struct rcu_head rcu; struct MemoryRegion *mr; diff --git a/include/glib-compat.h b/include/glib-compat.h index fb25f437b4..03d8b12675 100644 --- a/include/glib-compat.h +++ b/include/glib-compat.h @@ -165,6 +165,14 @@ static inline GThread *g_thread_new(const char *name, #define CompatGCond GCond #endif /* glib 2.31 */ +#if !GLIB_CHECK_VERSION(2, 32, 0) +/* Beware, function returns gboolean since 2.39.2, see GLib commit 9101915 */ +static inline void g_hash_table_add(GHashTable *hash_table, gpointer key) +{ + g_hash_table_replace(hash_table, key, key); +} +#endif + #ifndef g_assert_true #define g_assert_true(expr) \ do { \ diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h index 01a189bcdc..6b32a99e21 100644 --- a/include/hw/arm/allwinner-a10.h +++ b/include/hw/arm/allwinner-a10.h @@ -7,6 +7,8 @@ #include "hw/timer/allwinner-a10-pit.h" #include "hw/intc/allwinner-a10-pic.h" #include "hw/net/allwinner_emac.h" +#include "hw/ide/pci.h" +#include "hw/ide/ahci.h" #include "sysemu/sysemu.h" #include "exec/address-spaces.h" @@ -16,6 +18,7 @@ #define AW_A10_PIT_REG_BASE 0x01c20c00 #define AW_A10_UART0_REG_BASE 0x01c28000 #define AW_A10_EMAC_BASE 0x01c0b000 +#define AW_A10_SATA_BASE 0x01c18000 #define AW_A10_SDRAM_BASE 0x40000000 @@ -32,6 +35,7 @@ typedef struct AwA10State { AwA10PITState timer; AwA10PICState intc; AwEmacState emac; + AllwinnerAHCIState sata; } AwA10State; #define ALLWINNER_H_ diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h index 4dcd4f9b63..c26b0e357f 100644 --- a/include/hw/arm/arm.h +++ b/include/hw/arm/arm.h @@ -17,7 +17,7 @@ #include "cpu.h" /* armv7m.c */ -qemu_irq *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, +DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, const char *kernel_filename, const char *cpu_model); /* @@ -87,6 +87,22 @@ struct arm_boot_info { * -pflash. It also implies that fw_cfg_find() will succeed. */ bool firmware_loaded; + + /* Address at which board specific loader/setup code exists. If enabled, + * this code-blob will run before anything else. It must return to the + * caller via the link register. There is no stack set up. Enabled by + * defining write_board_setup, which is responsible for loading the blob + * to the specified address. + */ + hwaddr board_setup_addr; + void (*write_board_setup)(ARMCPU *cpu, + const struct arm_boot_info *info); + + /* If set, the board specific loader/setup blob will be run from secure + * mode, regardless of secure_boot. The blob becomes responsible for + * changing to non-secure state if implementing a non-secure boot + */ + bool secure_board_setup; }; /** diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h index 4005a9960b..d1160920cc 100644 --- a/include/hw/arm/xlnx-zynqmp.h +++ b/include/hw/arm/xlnx-zynqmp.h @@ -24,6 +24,7 @@ #include "hw/char/cadence_uart.h" #include "hw/ide/pci.h" #include "hw/ide/ahci.h" +#include "hw/sd/sdhci.h" #define TYPE_XLNX_ZYNQMP "xlnx,zynqmp" #define XLNX_ZYNQMP(obj) OBJECT_CHECK(XlnxZynqMPState, (obj), \ @@ -33,6 +34,7 @@ #define XLNX_ZYNQMP_NUM_RPU_CPUS 2 #define XLNX_ZYNQMP_NUM_GEMS 4 #define XLNX_ZYNQMP_NUM_UARTS 2 +#define XLNX_ZYNQMP_NUM_SDHCI 2 #define XLNX_ZYNQMP_NUM_OCM_BANKS 4 #define XLNX_ZYNQMP_OCM_RAM_0_ADDRESS 0xFFFC0000 @@ -63,6 +65,7 @@ typedef struct XlnxZynqMPState { CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS]; CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS]; SysbusAHCIState sata; + SDHCIState sdhci[XLNX_ZYNQMP_NUM_SDHCI]; char *boot_cpu; ARMCPU *boot_cpu_ptr; diff --git a/include/hw/boards.h b/include/hw/boards.h index 3e9a92c055..5da4fb00ee 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -33,7 +33,6 @@ MachineClass *find_default_machine(void); extern MachineState *current_machine; bool machine_usb(MachineState *machine); -bool machine_iommu(MachineState *machine); bool machine_kernel_irqchip_allowed(MachineState *machine); bool machine_kernel_irqchip_required(MachineState *machine); int machine_kvm_shadow_mem(MachineState *machine); @@ -52,6 +51,11 @@ bool machine_mem_merge(MachineState *machine); * used to provide @cpu_index to socket number mapping, allowing * a machine to group CPU threads belonging to the same socket/package * Returns: socket number given cpu_index belongs to. + * @hw_version: + * Value of QEMU_VERSION when the machine was added to QEMU. + * Set only by old machines because they need to keep + * compatibility on code that exposed QEMU_VERSION to guests in + * the past (and now use qemu_hw_version()). */ struct MachineClass { /*< private >*/ diff --git a/include/hw/bt.h b/include/hw/bt.h index cb2a7e6579..c7c7909a37 100644 --- a/include/hw/bt.h +++ b/include/hw/bt.h @@ -504,7 +504,6 @@ typedef struct { #define OCF_CREATE_CONN_CANCEL 0x0008 typedef struct { - uint8_t status; bdaddr_t bdaddr; } QEMU_PACKED create_conn_cancel_cp; #define CREATE_CONN_CANCEL_CP_SIZE 6 @@ -1266,13 +1265,13 @@ typedef struct { uint8_t status; uint16_t handle; } QEMU_PACKED reset_failed_contact_counter_rp; -#define RESET_FAILED_CONTACT_COUNTER_RP_SIZE 4 +#define RESET_FAILED_CONTACT_COUNTER_RP_SIZE 3 #define OCF_READ_LINK_QUALITY 0x0003 typedef struct { uint16_t handle; } QEMU_PACKED read_link_quality_cp; -#define READ_LINK_QUALITY_CP_SIZE 4 +#define READ_LINK_QUALITY_CP_SIZE 2 typedef struct { uint8_t status; @@ -1332,7 +1331,7 @@ typedef struct { uint8_t dev_class[3]; uint16_t clock_offset; } QEMU_PACKED inquiry_info; -#define INQUIRY_INFO_SIZE 14 +#define INQUIRY_INFO_SIZE 15 #define EVT_CONN_COMPLETE 0x03 typedef struct { @@ -1381,7 +1380,7 @@ typedef struct { uint16_t handle; uint8_t encrypt; } QEMU_PACKED evt_encrypt_change; -#define EVT_ENCRYPT_CHANGE_SIZE 5 +#define EVT_ENCRYPT_CHANGE_SIZE 4 #define EVT_CHANGE_CONN_LINK_KEY_COMPLETE 0x09 typedef struct { @@ -1629,18 +1628,6 @@ typedef struct { } QEMU_PACKED evt_sniff_subrate; #define EVT_SNIFF_SUBRATE_SIZE 11 -#define EVT_EXTENDED_INQUIRY_RESULT 0x2F -typedef struct { - bdaddr_t bdaddr; - uint8_t pscan_rep_mode; - uint8_t pscan_period_mode; - uint8_t dev_class[3]; - uint16_t clock_offset; - int8_t rssi; - uint8_t data[240]; -} QEMU_PACKED extended_inquiry_info; -#define EXTENDED_INQUIRY_INFO_SIZE 254 - #define EVT_TESTING 0xFE #define EVT_VENDOR 0xFF diff --git a/include/hw/compat.h b/include/hw/compat.h index 095de5d12f..d0b1c4f1ef 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -2,7 +2,23 @@ #define HW_COMPAT_H #define HW_COMPAT_2_4 \ - /* empty */ + {\ + .driver = "virtio-blk-device",\ + .property = "scsi",\ + .value = "true",\ + },{\ + .driver = "e1000",\ + .property = "extra_mac_registers",\ + .value = "off",\ + },{\ + .driver = "virtio-pci",\ + .property = "x-disable-pcie",\ + .value = "on",\ + },{\ + .driver = "virtio-pci",\ + .property = "migrate-extra",\ + .value = "off",\ + }, #define HW_COMPAT_2_3 \ {\ diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 7037de044d..854c330b66 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -60,7 +60,6 @@ struct PCMachineClass { /*< public >*/ bool broken_reserved_end; - bool inter_dimm_gap; HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); }; @@ -323,8 +322,50 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .driver = "host" "-" TYPE_X86_CPU,\ .property = "host-cache-info",\ .value = "on",\ + },\ + {\ + .driver = TYPE_X86_CPU,\ + .property = "check",\ + .value = "off",\ + },\ + {\ + .driver = "qemu64" "-" TYPE_X86_CPU,\ + .property = "sse4a",\ + .value = "on",\ + },\ + {\ + .driver = "qemu64" "-" TYPE_X86_CPU,\ + .property = "abm",\ + .value = "on",\ + },\ + {\ + .driver = "qemu64" "-" TYPE_X86_CPU,\ + .property = "popcnt",\ + .value = "on",\ + },\ + {\ + .driver = "qemu32" "-" TYPE_X86_CPU,\ + .property = "popcnt",\ + .value = "on",\ + },{\ + .driver = "Opteron_G2" "-" TYPE_X86_CPU,\ + .property = "rdtscp",\ + .value = "on",\ + },{\ + .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ + .property = "rdtscp",\ + .value = "on",\ + },{\ + .driver = "Opteron_G4" "-" TYPE_X86_CPU,\ + .property = "rdtscp",\ + .value = "on",\ + },{\ + .driver = "Opteron_G5" "-" TYPE_X86_CPU,\ + .property = "rdtscp",\ + .value = "on",\ }, + #define PC_COMPAT_2_3 \ PC_COMPAT_2_4 \ HW_COMPAT_2_3 \ diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h index c1ee7b0408..d83bf30ea9 100644 --- a/include/hw/mem/pc-dimm.h +++ b/include/hw/mem/pc-dimm.h @@ -83,16 +83,15 @@ typedef struct MemoryHotplugState { uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_size, - uint64_t *hint, uint64_t align, bool gap, - uint64_t size, Error **errp); + uint64_t *hint, uint64_t align, uint64_t size, + Error **errp); int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); int qmp_pc_dimm_device_list(Object *obj, void *opaque); uint64_t pc_existing_dimms_capacity(Error **errp); void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, - MemoryRegion *mr, uint64_t align, bool gap, - Error **errp); + MemoryRegion *mr, uint64_t align, Error **errp); void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, MemoryRegion *mr); #endif diff --git a/include/hw/misc/zynq-xadc.h b/include/hw/misc/zynq-xadc.h new file mode 100644 index 0000000000..f1a410a376 --- /dev/null +++ b/include/hw/misc/zynq-xadc.h @@ -0,0 +1,46 @@ +/* + * Device model for Zynq ADC controller + * + * Copyright (c) 2015 Guenter Roeck <linux@roeck-us.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef ZYNQ_XADC_H +#define ZYNQ_XADC_H + +#include "hw/sysbus.h" + +#define ZYNQ_XADC_MMIO_SIZE 0x0020 +#define ZYNQ_XADC_NUM_IO_REGS (ZYNQ_XADC_MMIO_SIZE / 4) +#define ZYNQ_XADC_NUM_ADC_REGS 128 +#define ZYNQ_XADC_FIFO_DEPTH 15 + +#define TYPE_ZYNQ_XADC "xlnx,zynq-xadc" +#define ZYNQ_XADC(obj) \ + OBJECT_CHECK(ZynqXADCState, (obj), TYPE_ZYNQ_XADC) + +typedef struct ZynqXADCState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + + uint32_t regs[ZYNQ_XADC_NUM_IO_REGS]; + uint16_t xadc_regs[ZYNQ_XADC_NUM_ADC_REGS]; + uint16_t xadc_read_reg_previous; + uint16_t xadc_dfifo[ZYNQ_XADC_FIFO_DEPTH]; + uint16_t xadc_dfifo_entries; + + struct IRQState *qemu_irq; + +} ZynqXADCState; + +#endif /* ZYNQ_XADC_H */ diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index f5e7fd818a..379b6e1a45 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -397,6 +397,7 @@ void pci_for_each_bus_depth_first(PCIBus *bus, void *(*begin)(PCIBus *bus, void *parent_state), void (*end)(PCIBus *bus, void *state), void *parent_state); +PCIDevice *pci_get_function_0(PCIDevice *pci_dev); /* Use this wrapper when specific scan order is not required. */ static inline diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 8057aedaa6..c537969f4e 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -237,7 +237,7 @@ struct BusState { struct Property { const char *name; PropertyInfo *info; - int offset; + ptrdiff_t offset; uint8_t bitnr; qtype_code qtype; int64_t defval; @@ -337,6 +337,7 @@ int qdev_walk_children(DeviceState *dev, void *opaque); void qdev_reset_all(DeviceState *dev); +void qdev_reset_all_fn(void *opaque); /** * @qbus_reset_all: diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h index cdaf0f8eb7..1915a7342e 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h @@ -250,7 +250,6 @@ SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d, SCSIRequest *scsi_req_new(SCSIDevice *d, uint32_t tag, uint32_t lun, uint8_t *buf, void *hba_private); int32_t scsi_req_enqueue(SCSIRequest *req); -void scsi_req_free(SCSIRequest *req); SCSIRequest *scsi_req_ref(SCSIRequest *req); void scsi_req_unref(SCSIRequest *req); diff --git a/include/hw/sd.h b/include/hw/sd/sd.h index 79adb5bb48..79adb5bb48 100644 --- a/include/hw/sd.h +++ b/include/hw/sd/sd.h diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h new file mode 100644 index 0000000000..9b3031fb81 --- /dev/null +++ b/include/hw/sd/sdhci.h @@ -0,0 +1,94 @@ +/* + * SD Association Host Standard Specification v2.0 controller emulation + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. + * Mitsyanko Igor <i.mitsyanko@samsung.com> + * Peter A.G. Crosthwaite <peter.crosthwaite@petalogix.com> + * + * Based on MMC controller for Samsung S5PC1xx-based board emulation + * by Alexey Merkulov and Vladimir Monakhov. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU _General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef SDHCI_H +#define SDHCI_H + +#include "qemu-common.h" +#include "hw/block/block.h" +#include "hw/pci/pci.h" +#include "hw/sysbus.h" +#include "hw/sd/sd.h" + +/* SD/MMC host controller state */ +typedef struct SDHCIState { + union { + PCIDevice pcidev; + SysBusDevice busdev; + }; + SDState *card; + MemoryRegion iomem; + BlockConf conf; + + QEMUTimer *insert_timer; /* timer for 'changing' sd card. */ + QEMUTimer *transfer_timer; + qemu_irq eject_cb; + qemu_irq ro_cb; + qemu_irq irq; + + uint32_t sdmasysad; /* SDMA System Address register */ + uint16_t blksize; /* Host DMA Buff Boundary and Transfer BlkSize Reg */ + uint16_t blkcnt; /* Blocks count for current transfer */ + uint32_t argument; /* Command Argument Register */ + uint16_t trnmod; /* Transfer Mode Setting Register */ + uint16_t cmdreg; /* Command Register */ + uint32_t rspreg[4]; /* Response Registers 0-3 */ + uint32_t prnsts; /* Present State Register */ + uint8_t hostctl; /* Host Control Register */ + uint8_t pwrcon; /* Power control Register */ + uint8_t blkgap; /* Block Gap Control Register */ + uint8_t wakcon; /* WakeUp Control Register */ + uint16_t clkcon; /* Clock control Register */ + uint8_t timeoutcon; /* Timeout Control Register */ + uint8_t admaerr; /* ADMA Error Status Register */ + uint16_t norintsts; /* Normal Interrupt Status Register */ + uint16_t errintsts; /* Error Interrupt Status Register */ + uint16_t norintstsen; /* Normal Interrupt Status Enable Register */ + uint16_t errintstsen; /* Error Interrupt Status Enable Register */ + uint16_t norintsigen; /* Normal Interrupt Signal Enable Register */ + uint16_t errintsigen; /* Error Interrupt Signal Enable Register */ + uint16_t acmd12errsts; /* Auto CMD12 error status register */ + uint64_t admasysaddr; /* ADMA System Address Register */ + + uint32_t capareg; /* Capabilities Register */ + uint32_t maxcurr; /* Maximum Current Capabilities Register */ + uint8_t *fifo_buffer; /* SD host i/o FIFO buffer */ + uint32_t buf_maxsz; + uint16_t data_count; /* current element in FIFO buffer */ + uint8_t stopped_state;/* Current SDHC state */ + /* Buffer Data Port Register - virtual access point to R and W buffers */ + /* Software Reset Register - always reads as 0 */ + /* Force Event Auto CMD12 Error Interrupt Reg - write only */ + /* Force Event Error Interrupt Register- write only */ + /* RO Host Controller Version Register always reads as 0x2401 */ +} SDHCIState; + +#define TYPE_PCI_SDHCI "sdhci-pci" +#define PCI_SDHCI(obj) OBJECT_CHECK(SDHCIState, (obj), TYPE_PCI_SDHCI) + +#define TYPE_SYSBUS_SDHCI "generic-sdhci" +#define SYSBUS_SDHCI(obj) \ + OBJECT_CHECK(SDHCIState, (obj), TYPE_SYSBUS_SDHCI) + +#endif /* SDHCI_H */ diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 7437fd476a..b60d7585b4 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -66,7 +66,6 @@ struct vhost_dev { int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type); void vhost_dev_cleanup(struct vhost_dev *hdev); -bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev); int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h index 8811415fa6..3f2c1363d0 100644 --- a/include/hw/virtio/virtio-bus.h +++ b/include/hw/virtio/virtio-bus.h @@ -44,9 +44,12 @@ typedef struct VirtioBusClass { void (*notify)(DeviceState *d, uint16_t vector); void (*save_config)(DeviceState *d, QEMUFile *f); void (*save_queue)(DeviceState *d, int n, QEMUFile *f); + void (*save_extra_state)(DeviceState *d, QEMUFile *f); int (*load_config)(DeviceState *d, QEMUFile *f); int (*load_queue)(DeviceState *d, int n, QEMUFile *f); int (*load_done)(DeviceState *d, QEMUFile *f); + int (*load_extra_state)(DeviceState *d, QEMUFile *f); + bool (*has_extra_state)(DeviceState *d); bool (*query_guest_notifiers)(DeviceState *d); int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assign); int (*set_host_notifier)(DeviceState *d, int n, bool assigned); @@ -57,6 +60,11 @@ typedef struct VirtioBusClass { */ void (*device_plugged)(DeviceState *d, Error **errp); /* + * Re-evaluate setup after feature bits have been validated + * by the device backend. + */ + void (*post_plugged)(DeviceState *d, Error **errp); + /* * transport independent exit function. * This is called by virtio-bus just before the device is unplugged. */ diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 9d09115fab..205fadf234 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -151,8 +151,7 @@ void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx); -void virtqueue_map_sg(struct iovec *sg, hwaddr *addr, - size_t num_sg, int is_write); +void virtqueue_map(VirtQueueElement *elem); int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem); int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, unsigned int out_bytes); diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h index d7fa6a4d01..4ac0c6f443 100644 --- a/include/hw/xen/xen_common.h +++ b/include/hw/xen/xen_common.h @@ -439,4 +439,20 @@ static inline int xen_xc_domain_add_to_physmap(XenXC xch, uint32_t domid, } #endif +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 470 +static inline int xen_domain_create(XenXC xc, uint32_t ssidref, + xen_domain_handle_t handle, uint32_t flags, + uint32_t *pdomid) +{ + return xc_domain_create(xc, ssidref, handle, flags, pdomid); +} +#else +static inline int xen_domain_create(XenXC xc, uint32_t ssidref, + xen_domain_handle_t handle, uint32_t flags, + uint32_t *pdomid) +{ + return xc_domain_create(xc, ssidref, handle, flags, pdomid, NULL); +} +#endif + #endif /* QEMU_HW_XEN_COMMON_H */ diff --git a/include/migration/migration.h b/include/migration/migration.h index 83346210b1..fd018b74a2 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -35,6 +35,7 @@ #define QEMU_VM_SUBSECTION 0x05 #define QEMU_VM_VMDESCRIPTION 0x06 #define QEMU_VM_CONFIGURATION 0x07 +#define QEMU_VM_COMMAND 0x08 #define QEMU_VM_SECTION_FOOTER 0x7e struct MigrationParams { @@ -42,13 +43,67 @@ struct MigrationParams { bool shared; }; -typedef struct MigrationState MigrationState; +/* Messages sent on the return path from destination to source */ +enum mig_rp_message_type { + MIG_RP_MSG_INVALID = 0, /* Must be 0 */ + MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ + MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ + + MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ + MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ + + MIG_RP_MSG_MAX +}; typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head; +/* The current postcopy state is read/set by postcopy_state_get/set + * which update it atomically. + * The state is updated as postcopy messages are received, and + * in general only one thread should be writing to the state at any one + * time, initially the main thread and then the listen thread; + * Corner cases are where either thread finishes early and/or errors. + * The state is checked as messages are received to ensure that + * the source is sending us messages in the correct order. + * The state is also used by the RAM reception code to know if it + * has to place pages atomically, and the cleanup code at the end of + * the main thread to know if it has to delay cleanup until the end + * of postcopy. + */ +typedef enum { + POSTCOPY_INCOMING_NONE = 0, /* Initial state - no postcopy */ + POSTCOPY_INCOMING_ADVISE, + POSTCOPY_INCOMING_DISCARD, + POSTCOPY_INCOMING_LISTENING, + POSTCOPY_INCOMING_RUNNING, + POSTCOPY_INCOMING_END +} PostcopyState; + /* State for the incoming migration */ struct MigrationIncomingState { - QEMUFile *file; + QEMUFile *from_src_file; + + /* + * Free at the start of the main state load, set as the main thread finishes + * loading state. + */ + QemuEvent main_thread_load_event; + + bool have_fault_thread; + QemuThread fault_thread; + QemuSemaphore fault_thread_sem; + + bool have_listen_thread; + QemuThread listen_thread; + QemuSemaphore listen_thread_sem; + + /* For the kernel to send us notifications */ + int userfault_fd; + /* To tell the fault_thread to quit */ + int userfault_quit_fd; + QEMUFile *to_src_file; + QemuMutex rp_mutex; /* We send replies from multiple threads */ + void *postcopy_tmp_page; /* See savevm.c */ LoadStateEntry_Head loadvm_handlers; @@ -58,6 +113,18 @@ MigrationIncomingState *migration_incoming_get_current(void); MigrationIncomingState *migration_incoming_state_new(QEMUFile *f); void migration_incoming_state_destroy(void); +/* + * An outstanding page request, on the source, having been received + * and queued + */ +struct MigrationSrcPageRequest { + RAMBlock *rb; + hwaddr offset; + hwaddr len; + + QSIMPLEQ_ENTRY(MigrationSrcPageRequest) next_req; +}; + struct MigrationState { int64_t bandwidth_limit; @@ -70,6 +137,14 @@ struct MigrationState int state; MigrationParams params; + + /* State related to return path */ + struct { + QEMUFile *from_dst_file; + QemuThread rp_thread; + bool error; + } rp_state; + double mbps; int64_t total_time; int64_t downtime; @@ -80,6 +155,18 @@ struct MigrationState int64_t xbzrle_cache_size; int64_t setup_time; int64_t dirty_sync_count; + + /* Flag set once the migration has been asked to enter postcopy */ + bool start_postcopy; + + /* Flag set once the migration thread is running (and needs joining) */ + bool migration_thread_running; + + /* Queue of outstanding page requests from the destination */ + QemuMutex src_page_req_mutex; + QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests; + /* The RAMBlock used in the last src_page_request */ + RAMBlock *last_req_rb; }; void process_incoming_migration(QEMUFile *f); @@ -116,9 +203,12 @@ int migrate_fd_close(MigrationState *s); void add_migration_state_change_notifier(Notifier *notify); void remove_migration_state_change_notifier(Notifier *notify); +MigrationState *migrate_init(const MigrationParams *params); bool migration_in_setup(MigrationState *); bool migration_has_finished(MigrationState *); bool migration_has_failed(MigrationState *); +/* True if outgoing migration has entered postcopy phase */ +bool migration_in_postcopy(MigrationState *); MigrationState *migrate_get_current(void); void migrate_compress_threads_create(void); @@ -145,6 +235,13 @@ uint64_t xbzrle_mig_pages_cache_miss(void); double xbzrle_mig_cache_miss_rate(void); void ram_handle_compressed(void *host, uint8_t ch, uint64_t size); +void ram_debug_dump_bitmap(unsigned long *todump, bool expected); +/* For outgoing discard bitmap */ +int ram_postcopy_send_discard_bitmap(MigrationState *ms); +/* For incoming postcopy discard */ +int ram_discard_range(MigrationIncomingState *mis, const char *block_name, + uint64_t start, size_t length); +int ram_postcopy_incoming_init(MigrationIncomingState *mis); /** * @migrate_add_blocker - prevent migration from proceeding @@ -160,6 +257,7 @@ void migrate_add_blocker(Error *reason); */ void migrate_del_blocker(Error *reason); +bool migrate_postcopy_ram(void); bool migrate_zero_blocks(void); bool migrate_auto_converge(void); @@ -179,6 +277,17 @@ int migrate_compress_threads(void); int migrate_decompress_threads(void); bool migrate_use_events(void); +/* Sending on the return path - generic and then for each message type */ +void migrate_send_rp_message(MigrationIncomingState *mis, + enum mig_rp_message_type message_type, + uint16_t len, void *data); +void migrate_send_rp_shut(MigrationIncomingState *mis, + uint32_t value); +void migrate_send_rp_pong(MigrationIncomingState *mis, + uint32_t value); +void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname, + ram_addr_t start, size_t len); + void ram_control_before_iterate(QEMUFile *f, uint64_t flags); void ram_control_after_iterate(QEMUFile *f, uint64_t flags); void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data); @@ -204,4 +313,12 @@ void global_state_set_optional(void); void savevm_skip_configuration(void); int global_state_store(void); void global_state_store_running(void); + +void flush_page_queue(MigrationState *ms); +int ram_save_queue_pages(MigrationState *ms, const char *rbname, + ram_addr_t start, ram_addr_t len); + +PostcopyState postcopy_state_get(void); +/* Set the state and return the old state */ +PostcopyState postcopy_state_set(PostcopyState new_state); #endif diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h new file mode 100644 index 0000000000..b6a7491f2d --- /dev/null +++ b/include/migration/postcopy-ram.h @@ -0,0 +1,99 @@ +/* + * Postcopy migration for RAM + * + * Copyright 2013 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Dave Gilbert <dgilbert@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#ifndef QEMU_POSTCOPY_RAM_H +#define QEMU_POSTCOPY_RAM_H + +/* Return true if the host supports everything we need to do postcopy-ram */ +bool postcopy_ram_supported_by_host(void); + +/* + * Make all of RAM sensitive to accesses to areas that haven't yet been written + * and wire up anything necessary to deal with it. + */ +int postcopy_ram_enable_notify(MigrationIncomingState *mis); + +/* + * Initialise postcopy-ram, setting the RAM to a state where we can go into + * postcopy later; must be called prior to any precopy. + * called from ram.c's similarly named ram_postcopy_incoming_init + */ +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages); + +/* + * At the end of a migration where postcopy_ram_incoming_init was called. + */ +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis); + +/* + * Discard the contents of 'length' bytes from 'start' + * We can assume that if we've been called postcopy_ram_hosttest returned true + */ +int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, + size_t length); + +/* + * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard + * however leaving it until after precopy means that most of the precopy + * data is still THPd + */ +int postcopy_ram_prepare_discard(MigrationIncomingState *mis); + +/* + * Called at the start of each RAMBlock by the bitmap code. + * 'offset' is the bitmap offset of the named RAMBlock in the migration + * bitmap. + * Returns a new PDS + */ +PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, + unsigned long offset, + const char *name); + +/* + * Called by the bitmap code for each chunk to discard. + * May send a discard message, may just leave it queued to + * be sent later. + * @start,@length: a range of pages in the migration bitmap in the + * RAM block passed to postcopy_discard_send_init() (length=1 is one page) + */ +void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, + unsigned long start, unsigned long length); + +/* + * Called at the end of each RAMBlock by the bitmap code. + * Sends any outstanding discard messages, frees the PDS. + */ +void postcopy_discard_send_finish(MigrationState *ms, + PostcopyDiscardState *pds); + +/* + * Place a page (from) at (host) efficiently + * There are restrictions on how 'from' must be mapped, in general best + * to use other postcopy_ routines to allocate. + * returns 0 on success + */ +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from); + +/* + * Place a zero page at (host) atomically + * returns 0 on success + */ +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host); + +/* + * Allocate a page of memory that can be mapped at a later point in time + * using postcopy_place_page + * Returns: Pointer to allocated page + */ +void *postcopy_get_tmp_page(MigrationIncomingState *mis); + +#endif diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 29a338d0a9..b5d08d217d 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -89,6 +89,11 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque, uint64_t *bytes_sent); /* + * Return a QEMUFile for comms in the opposite direction + */ +typedef QEMUFile *(QEMURetPathFunc)(void *opaque); + +/* * Stop any read or write (depending on flags) on the underlying * transport on the QEMUFile. * Existing blocking reads/writes must be woken @@ -106,6 +111,7 @@ typedef struct QEMUFileOps { QEMURamHookFunc *after_ram_iterate; QEMURamHookFunc *hook_ram_load; QEMURamSaveFunc *save_page; + QEMURetPathFunc *get_return_path; QEMUFileShutdownFunc *shut_down; } QEMUFileOps; @@ -163,9 +169,11 @@ void qemu_put_be32(QEMUFile *f, unsigned int v); void qemu_put_be64(QEMUFile *f, uint64_t v); size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); +size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size, int level); int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); + /* * Note that you can only peek continuous bytes from where the current pointer * is; you aren't guaranteed to be able to peak to +n bytes unless you've @@ -194,7 +202,9 @@ int64_t qemu_file_get_rate_limit(QEMUFile *f); int qemu_file_get_error(QEMUFile *f); void qemu_file_set_error(QEMUFile *f, int ret); int qemu_file_shutdown(QEMUFile *f); +QEMUFile *qemu_file_get_return_path(QEMUFile *f); void qemu_fflush(QEMUFile *f); +void qemu_file_set_blocking(QEMUFile *f, bool block); static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv) { diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 9a65522da1..7267e38c1f 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -39,8 +39,9 @@ typedef struct SaveVMHandlers { void (*set_params)(const MigrationParams *params, void * opaque); SaveStateHandler *save_state; - void (*cancel)(void *opaque); - int (*save_live_complete)(QEMUFile *f, void *opaque); + void (*cleanup)(void *opaque); + int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque); + int (*save_live_complete_precopy)(QEMUFile *f, void *opaque); /* This runs both outside and inside the iothread lock. */ bool (*is_active)(void *opaque); @@ -54,8 +55,9 @@ typedef struct SaveVMHandlers { /* This runs outside the iothread lock! */ int (*save_live_setup)(QEMUFile *f, void *opaque); - uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size); - + void (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size, + uint64_t *non_postcopiable_pending, + uint64_t *postcopiable_pending); LoadStateHandler *load_state; } SaveVMHandlers; diff --git a/include/monitor/hmp-target.h b/include/monitor/hmp-target.h index 213566c612..bc2c9c04d0 100644 --- a/include/monitor/hmp-target.h +++ b/include/monitor/hmp-target.h @@ -35,6 +35,7 @@ struct MonitorDef { }; const MonitorDef *target_monitor_defs(void); +int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval); CPUArchState *mon_get_cpu_env(void); CPUState *mon_get_cpu(void); diff --git a/include/qapi/error.h b/include/qapi/error.h index c69dddbbf2..4d42cdc5fd 100644 --- a/include/qapi/error.h +++ b/include/qapi/error.h @@ -30,6 +30,10 @@ * Handle an error without reporting it (just for completeness): * error_free(err); * + * Assert that an expected error occurred, but clean it up without + * reporting it (primarily useful in testsuites): + * error_free_or_abort(&err); + * * Pass an existing error to the caller: * error_propagate(errp, err); * where Error **errp is a parameter, by convention the last one. @@ -190,6 +194,11 @@ Error *error_copy(const Error *err); void error_free(Error *err); /* + * Convenience function to assert that *@errp is set, then silently free it. + */ +void error_free_or_abort(Error **errp); + +/* * Convenience function to error_report() and free @err. */ void error_report_err(Error *); diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h index cdff0460a8..cb456d53e5 100644 --- a/include/qapi/qmp/json-lexer.h +++ b/include/qapi/qmp/json-lexer.h @@ -14,11 +14,16 @@ #ifndef QEMU_JSON_LEXER_H #define QEMU_JSON_LEXER_H -#include "qapi/qmp/qstring.h" -#include "qapi/qmp/qlist.h" +#include "glib-compat.h" typedef enum json_token_type { - JSON_OPERATOR = 100, + JSON_MIN = 100, + JSON_LCURLY = JSON_MIN, + JSON_RCURLY, + JSON_LSQUARE, + JSON_RSQUARE, + JSON_COLON, + JSON_COMMA, JSON_INTEGER, JSON_FLOAT, JSON_KEYWORD, @@ -30,13 +35,14 @@ typedef enum json_token_type { typedef struct JSONLexer JSONLexer; -typedef void (JSONLexerEmitter)(JSONLexer *, QString *, JSONTokenType, int x, int y); +typedef void (JSONLexerEmitter)(JSONLexer *, GString *, + JSONTokenType, int x, int y); struct JSONLexer { JSONLexerEmitter *emit; int state; - QString *token; + GString *token; int x, y; }; diff --git a/include/qapi/qmp/json-parser.h b/include/qapi/qmp/json-parser.h index 44d88f3468..fea89f873a 100644 --- a/include/qapi/qmp/json-parser.h +++ b/include/qapi/qmp/json-parser.h @@ -18,7 +18,7 @@ #include "qapi/qmp/qlist.h" #include "qapi/error.h" -QObject *json_parser_parse(QList *tokens, va_list *ap); -QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp); +QObject *json_parser_parse(GQueue *tokens, va_list *ap); +QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp); #endif diff --git a/include/qapi/qmp/json-streamer.h b/include/qapi/qmp/json-streamer.h index 823f7d7fa4..09b3d3ec15 100644 --- a/include/qapi/qmp/json-streamer.h +++ b/include/qapi/qmp/json-streamer.h @@ -14,21 +14,29 @@ #ifndef QEMU_JSON_STREAMER_H #define QEMU_JSON_STREAMER_H -#include "qapi/qmp/qlist.h" +#include <stdint.h> +#include "glib-compat.h" #include "qapi/qmp/json-lexer.h" +typedef struct JSONToken { + int type; + int x; + int y; + char str[]; +} JSONToken; + typedef struct JSONMessageParser { - void (*emit)(struct JSONMessageParser *parser, QList *tokens); + void (*emit)(struct JSONMessageParser *parser, GQueue *tokens); JSONLexer lexer; int brace_count; int bracket_count; - QList *tokens; + GQueue *tokens; uint64_t token_size; } JSONMessageParser; void json_message_parser_init(JSONMessageParser *parser, - void (*func)(JSONMessageParser *, QList *)); + void (*func)(JSONMessageParser *, GQueue *)); int json_message_parser_feed(JSONMessageParser *parser, const char *buffer, size_t size); diff --git a/include/qapi/qmp/qbool.h b/include/qapi/qmp/qbool.h index 4aa6be3b33..d9256e4268 100644 --- a/include/qapi/qmp/qbool.h +++ b/include/qapi/qmp/qbool.h @@ -18,7 +18,7 @@ #include "qapi/qmp/qobject.h" typedef struct QBool { - QObject_HEAD; + QObject base; bool value; } QBool; diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h index a37f4c1567..787c658967 100644 --- a/include/qapi/qmp/qdict.h +++ b/include/qapi/qmp/qdict.h @@ -28,7 +28,7 @@ typedef struct QDictEntry { } QDictEntry; typedef struct QDict { - QObject_HEAD; + QObject base; size_t size; QLIST_HEAD(,QDictEntry) table[QDICT_BUCKET_MAX]; } QDict; diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h index 842b27ae11..f60149978a 100644 --- a/include/qapi/qmp/qerror.h +++ b/include/qapi/qmp/qerror.h @@ -106,4 +106,7 @@ #define QERR_UNSUPPORTED \ "this feature or command is not currently supported" +#define QERR_REPLAY_NOT_SUPPORTED \ + "Record/replay feature is not supported for '%s'" + #endif /* QERROR_H */ diff --git a/include/qapi/qmp/qfloat.h b/include/qapi/qmp/qfloat.h index a8658443dc..46745e50d1 100644 --- a/include/qapi/qmp/qfloat.h +++ b/include/qapi/qmp/qfloat.h @@ -18,7 +18,7 @@ #include "qapi/qmp/qobject.h" typedef struct QFloat { - QObject_HEAD; + QObject base; double value; } QFloat; diff --git a/include/qapi/qmp/qint.h b/include/qapi/qmp/qint.h index 48a41b0f2a..339a9abb8f 100644 --- a/include/qapi/qmp/qint.h +++ b/include/qapi/qmp/qint.h @@ -17,7 +17,7 @@ #include "qapi/qmp/qobject.h" typedef struct QInt { - QObject_HEAD; + QObject base; int64_t value; } QInt; diff --git a/include/qapi/qmp/qlist.h b/include/qapi/qmp/qlist.h index 6cc4831df3..b1bf7852c5 100644 --- a/include/qapi/qmp/qlist.h +++ b/include/qapi/qmp/qlist.h @@ -22,7 +22,7 @@ typedef struct QListEntry { } QListEntry; typedef struct QList { - QObject_HEAD; + QObject base; QTAILQ_HEAD(,QListEntry) head; } QList; diff --git a/include/qapi/qmp/qobject.h b/include/qapi/qmp/qobject.h index 260d2ed3cc..4b96ed5837 100644 --- a/include/qapi/qmp/qobject.h +++ b/include/qapi/qmp/qobject.h @@ -59,10 +59,6 @@ typedef struct QObject { size_t refcnt; } QObject; -/* Objects definitions must include this */ -#define QObject_HEAD \ - QObject base - /* Get the 'base' part of an object */ #define QOBJECT(obj) (&(obj)->base) @@ -94,6 +90,7 @@ static inline void qobject_incref(QObject *obj) */ static inline void qobject_decref(QObject *obj) { + assert(!obj || obj->refcnt); if (obj && --obj->refcnt == 0) { assert(obj->type != NULL); assert(obj->type->destroy != NULL); diff --git a/include/qapi/qmp/qstring.h b/include/qapi/qmp/qstring.h index 1bc3666107..34675a7fc0 100644 --- a/include/qapi/qmp/qstring.h +++ b/include/qapi/qmp/qstring.h @@ -17,7 +17,7 @@ #include "qapi/qmp/qobject.h" typedef struct QString { - QObject_HEAD; + QObject base; char *string; size_t length; size_t capacity; diff --git a/include/qemu-common.h b/include/qemu-common.h index 2f74540a87..405364f2b9 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -499,5 +499,6 @@ size_t buffer_find_nonzero_offset(const void *buf, size_t len); int parse_debug_env(const char *name, int max, int initial); const char *qemu_ether_ntoa(const MACAddr *mac); +void page_size_init(void); #endif diff --git a/include/qemu/buffer.h b/include/qemu/buffer.h index b380cec6fa..dead9b77e1 100644 --- a/include/qemu/buffer.h +++ b/include/qemu/buffer.h @@ -34,12 +34,35 @@ typedef struct Buffer Buffer; */ struct Buffer { + char *name; size_t capacity; size_t offset; + uint64_t avg_size; uint8_t *buffer; }; /** + * buffer_init: + * @buffer: the buffer object + * @name: buffer name + * + * Optionally attach a name to the buffer, to make it easier + * to identify in debug traces. + */ +void buffer_init(Buffer *buffer, const char *name, ...) + GCC_FMT_ATTR(2, 3); + +/** + * buffer_shrink: + * @buffer: the buffer object + * + * Try to shrink the buffer. Checks current buffer capacity and size + * and reduces capacity in case only a fraction of the buffer is + * actually used. + */ +void buffer_shrink(Buffer *buffer); + +/** * buffer_reserve: * @buffer: the buffer object * @len: the minimum required free space @@ -115,4 +138,24 @@ uint8_t *buffer_end(Buffer *buffer); */ gboolean buffer_empty(Buffer *buffer); +/** + * buffer_move_empty: + * @to: destination buffer object + * @from: source buffer object + * + * Moves buffer, without copying data. 'to' buffer must be empty. + * 'from' buffer is empty and zero-sized on return. + */ +void buffer_move_empty(Buffer *to, Buffer *from); + +/** + * buffer_move: + * @to: destination buffer object + * @from: source buffer object + * + * Moves buffer, copying data (unless 'to' buffer happens to be empty). + * 'from' buffer is empty and zero-sized on return. + */ +void buffer_move(Buffer *to, Buffer *from); + #endif /* QEMU_BUFFER_H__ */ diff --git a/include/qemu/log.h b/include/qemu/log.h index 7de45001f2..362cbc4e67 100644 --- a/include/qemu/log.h +++ b/include/qemu/log.h @@ -35,7 +35,6 @@ static inline bool qemu_log_enabled(void) #define CPU_LOG_INT (1 << 4) #define CPU_LOG_EXEC (1 << 5) #define CPU_LOG_PCALL (1 << 6) -#define CPU_LOG_IOPORT (1 << 7) #define CPU_LOG_TB_CPU (1 << 8) #define CPU_LOG_RESET (1 << 9) #define LOG_UNIMP (1 << 10) diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h index 56388e689b..0899b2f01e 100644 --- a/include/qemu/mmap-alloc.h +++ b/include/qemu/mmap-alloc.h @@ -3,6 +3,8 @@ #include "qemu-common.h" +size_t qemu_fd_getpagesize(int fd); + void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); void qemu_ram_munmap(void *ptr, size_t size); diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index b56842420e..84e84ac700 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -139,6 +139,8 @@ void qemu_anon_ram_free(void *ptr, size_t size); #if defined(CONFIG_MADVISE) +#include <sys/mman.h> + #define QEMU_MADV_WILLNEED MADV_WILLNEED #define QEMU_MADV_DONTNEED MADV_DONTNEED #ifdef MADV_DONTFORK @@ -171,6 +173,11 @@ void qemu_anon_ram_free(void *ptr, size_t size); #else #define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID #endif +#ifdef MADV_NOHUGEPAGE +#define QEMU_MADV_NOHUGEPAGE MADV_NOHUGEPAGE +#else +#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID +#endif #elif defined(CONFIG_POSIX_MADVISE) @@ -182,6 +189,7 @@ void qemu_anon_ram_free(void *ptr, size_t size); #define QEMU_MADV_DODUMP QEMU_MADV_INVALID #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID #define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID +#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID #else /* no-op */ @@ -193,6 +201,7 @@ void qemu_anon_ram_free(void *ptr, size_t size); #define QEMU_MADV_DODUMP QEMU_MADV_INVALID #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID #define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID +#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID #endif @@ -247,8 +256,12 @@ static inline void qemu_timersub(const struct timeval *val1, void qemu_set_cloexec(int fd); -void qemu_set_version(const char *); -const char *qemu_get_version(void); +/* QEMU "hardware version" setting. Used to replace code that exposed + * QEMU_VERSION to guests in the past and need to keep compatibilty. + * Do not use qemu_hw_version() in new code. + */ +void qemu_set_hw_version(const char *); +const char *qemu_hw_version(void); void fips_set_state(bool requested); bool fips_get_state(void); diff --git a/include/qemu/timed-average.h b/include/qemu/timed-average.h new file mode 100644 index 0000000000..364bf88f70 --- /dev/null +++ b/include/qemu/timed-average.h @@ -0,0 +1,64 @@ +/* + * QEMU timed average computation + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) version 3 or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef TIMED_AVERAGE_H +#define TIMED_AVERAGE_H + +#include <stdint.h> + +#include "qemu/timer.h" + +typedef struct TimedAverageWindow TimedAverageWindow; +typedef struct TimedAverage TimedAverage; + +/* All fields of both structures are private */ + +struct TimedAverageWindow { + uint64_t min; /* minimum value accounted in the window */ + uint64_t max; /* maximum value accounted in the window */ + uint64_t sum; /* sum of all values */ + uint64_t count; /* number of values */ + int64_t expiration; /* the end of the current window in ns */ +}; + +struct TimedAverage { + uint64_t period; /* period in nanoseconds */ + TimedAverageWindow windows[2]; /* two overlapping windows of with + * an offset of period / 2 between them */ + unsigned current; /* the current window index: it's also the + * oldest window index */ + QEMUClockType clock_type; /* the clock used */ +}; + +void timed_average_init(TimedAverage *ta, QEMUClockType clock_type, + uint64_t period); + +void timed_average_account(TimedAverage *ta, uint64_t value); + +uint64_t timed_average_min(TimedAverage *ta); +uint64_t timed_average_avg(TimedAverage *ta); +uint64_t timed_average_max(TimedAverage *ta); +uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed); + +#endif diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index d961362d36..3eedcf4c8f 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -3,12 +3,12 @@ /* A load of opaque types so that device init declarations don't have to pull in all the real definitions. */ -struct Monitor; /* Please keep this list in alphabetical order */ typedef struct AdapterInfo AdapterInfo; typedef struct AddressSpace AddressSpace; typedef struct AioContext AioContext; +typedef struct AllwinnerAHCIState AllwinnerAHCIState; typedef struct AudioState AudioState; typedef struct BlockBackend BlockBackend; typedef struct BlockBackendRootState BlockBackendRootState; @@ -18,8 +18,8 @@ typedef struct BusState BusState; typedef struct CharDriverState CharDriverState; typedef struct CompatProperty CompatProperty; typedef struct CPUAddressSpace CPUAddressSpace; -typedef struct DeviceState DeviceState; typedef struct DeviceListener DeviceListener; +typedef struct DeviceState DeviceState; typedef struct DisplayChangeListener DisplayChangeListener; typedef struct DisplayState DisplayState; typedef struct DisplaySurface DisplaySurface; @@ -43,7 +43,9 @@ typedef struct MemoryRegion MemoryRegion; typedef struct MemoryRegionSection MemoryRegionSection; typedef struct MigrationIncomingState MigrationIncomingState; typedef struct MigrationParams MigrationParams; +typedef struct MigrationState MigrationState; typedef struct Monitor Monitor; +typedef struct MonitorDef MonitorDef; typedef struct MouseTransformInfo MouseTransformInfo; typedef struct MSIMessage MSIMessage; typedef struct NetClientState NetClientState; @@ -61,12 +63,13 @@ typedef struct PCIESlot PCIESlot; typedef struct PCIExpressDevice PCIExpressDevice; typedef struct PCIExpressHost PCIExpressHost; typedef struct PCIHostState PCIHostState; -typedef struct PCMachineState PCMachineState; typedef struct PCMachineClass PCMachineClass; +typedef struct PCMachineState PCMachineState; typedef struct PCMCIACardState PCMCIACardState; typedef struct PixelFormat PixelFormat; -typedef struct PropertyInfo PropertyInfo; +typedef struct PostcopyDiscardState PostcopyDiscardState; typedef struct Property Property; +typedef struct PropertyInfo PropertyInfo; typedef struct QEMUBH QEMUBH; typedef struct QemuConsole QemuConsole; typedef struct QEMUFile QEMUFile; @@ -75,8 +78,9 @@ typedef struct QemuOpts QemuOpts; typedef struct QemuOptsList QemuOptsList; typedef struct QEMUSGList QEMUSGList; typedef struct QEMUSizedBuffer QEMUSizedBuffer; -typedef struct QEMUTimerListGroup QEMUTimerListGroup; typedef struct QEMUTimer QEMUTimer; +typedef struct QEMUTimerListGroup QEMUTimerListGroup; +typedef struct RAMBlock RAMBlock; typedef struct Range Range; typedef struct SerialState SerialState; typedef struct SHPCDevice SHPCDevice; @@ -85,6 +89,5 @@ typedef struct SSIBus SSIBus; typedef struct uWireSlave uWireSlave; typedef struct VirtIODevice VirtIODevice; typedef struct Visitor Visitor; -typedef struct MonitorDef MonitorDef; #endif /* QEMU_TYPEDEFS_H */ diff --git a/include/qom/object.h b/include/qom/object.h index be7280c862..4509166f6f 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -344,8 +344,6 @@ typedef struct ObjectProperty ObjectPropertyResolve *resolve; ObjectPropertyRelease *release; void *opaque; - - QTAILQ_ENTRY(ObjectProperty) node; } ObjectProperty; /** @@ -396,16 +394,13 @@ struct ObjectClass * As a result, #Object contains a reference to the objects type as its * first member. This allows identification of the real type of the object at * run time. - * - * #Object also contains a list of #Interfaces that this object - * implements. */ struct Object { /*< private >*/ ObjectClass *class; ObjectFree *free; - QTAILQ_HEAD(, ObjectProperty) properties; + GHashTable *properties; uint32_t ref; Object *parent; }; @@ -510,16 +505,16 @@ struct TypeInfo /** * OBJECT_CLASS_CHECK: - * @class: The C type to use for the return value. - * @obj: A derivative of @type to cast. - * @name: the QOM typename of @class. + * @class_type: The C type to use for the return value. + * @class: A derivative class of @class_type to cast. + * @name: the QOM typename of @class_type. * * A type safe version of @object_class_dynamic_cast_assert. This macro is * typically wrapped by each type to perform type safe casts of a class to a * specific class type. */ -#define OBJECT_CLASS_CHECK(class, obj, name) \ - ((class *)object_class_dynamic_cast_assert(OBJECT_CLASS(obj), (name), \ +#define OBJECT_CLASS_CHECK(class_type, class, name) \ + ((class_type *)object_class_dynamic_cast_assert(OBJECT_CLASS(class), (name), \ __FILE__, __LINE__, __func__)) /** @@ -960,6 +955,55 @@ void object_property_del(Object *obj, const char *name, Error **errp); ObjectProperty *object_property_find(Object *obj, const char *name, Error **errp); +typedef struct ObjectPropertyIterator ObjectPropertyIterator; + +/** + * object_property_iter_init: + * @obj: the object + * + * Initializes an iterator for traversing all properties + * registered against an object instance. + * + * It is forbidden to modify the property list while iterating, + * whether removing or adding properties. + * + * Typical usage pattern would be + * + * <example> + * <title>Using object property iterators</title> + * <programlisting> + * ObjectProperty *prop; + * ObjectPropertyIterator *iter; + * + * iter = object_property_iter_init(obj); + * while ((prop = object_property_iter_next(iter))) { + * ... do something with prop ... + * } + * object_property_iter_free(iter); + * </programlisting> + * </example> + * + * Returns: the new iterator + */ +ObjectPropertyIterator *object_property_iter_init(Object *obj); + +/** + * object_property_iter_free: + * @iter: the iterator instance + * + * Releases any resources associated with the iterator. + */ +void object_property_iter_free(ObjectPropertyIterator *iter); + +/** + * object_property_iter_next: + * @iter: the iterator instance + * + * Returns: the next property, or %NULL when all properties + * have been traversed. + */ +ObjectProperty *object_property_iter_next(ObjectPropertyIterator *iter); + void object_unparent(Object *obj); /** @@ -1488,6 +1532,9 @@ void object_property_set_description(Object *obj, const char *name, * Call @fn passing each child of @obj and @opaque to it, until @fn returns * non-zero. * + * It is forbidden to add or remove children from @obj from the @fn + * callback. + * * Returns: The last value returned by @fn, or 0 if there is no child. */ int object_child_foreach(Object *obj, int (*fn)(Object *child, void *opaque), @@ -1503,6 +1550,9 @@ int object_child_foreach(Object *obj, int (*fn)(Object *child, void *opaque), * non-zero. Calls recursively, all child nodes of @obj will also be passed * all the way down to the leaf nodes of the tree. Depth first ordering. * + * It is forbidden to add or remove children from @obj (or its + * child nodes) from the @fn callback. + * * Returns: The last value returned by @fn, or 0 if there is no child. */ int object_child_foreach_recursive(Object *obj, diff --git a/include/sysemu/balloon.h b/include/sysemu/balloon.h index 17fe30070d..3f976b49e7 100644 --- a/include/sysemu/balloon.h +++ b/include/sysemu/balloon.h @@ -22,5 +22,7 @@ typedef void (QEMUBalloonStatus)(void *opaque, BalloonInfo *info); int qemu_add_balloon_handler(QEMUBalloonEvent *event_func, QEMUBalloonStatus *stat_func, void *opaque); void qemu_remove_balloon_handler(void *opaque); +bool qemu_balloon_is_inhibited(void); +void qemu_balloon_inhibit(bool state); #endif diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 9306a527bb..fb068ea47b 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -65,6 +65,7 @@ BlockBackend *blk_new_with_bs(const char *name, Error **errp); BlockBackend *blk_new_open(const char *name, const char *filename, const char *reference, QDict *options, int flags, Error **errp); +int blk_get_refcnt(BlockBackend *blk); void blk_ref(BlockBackend *blk); void blk_unref(BlockBackend *blk); const char *blk_name(BlockBackend *blk); @@ -72,6 +73,7 @@ BlockBackend *blk_by_name(const char *name); BlockBackend *blk_next(BlockBackend *blk); BlockDriverState *blk_bs(BlockBackend *blk); +void blk_remove_bs(BlockBackend *blk); void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs); void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk); @@ -166,6 +168,8 @@ void blk_io_unplug(BlockBackend *blk); BlockAcctStats *blk_get_stats(BlockBackend *blk); BlockBackendRootState *blk_get_root_state(BlockBackend *blk); void blk_update_root_state(BlockBackend *blk); +void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs); +int blk_get_open_flags_from_root_state(BlockBackend *blk); void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, BlockCompletionFunc *cb, void *opaque); @@ -180,5 +184,8 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz); int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo); +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret); #endif diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h index a00be94895..b06a0607a9 100644 --- a/include/sysemu/blockdev.h +++ b/include/sysemu/blockdev.h @@ -63,8 +63,6 @@ DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type); /* device-hotplug */ -void qmp_change_blockdev(const char *device, const char *filename, - const char *format, Error **errp); void hmp_commit(Monitor *mon, const QDict *qdict); void hmp_drive_del(Monitor *mon, const QDict *qdict); #endif diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index 30ddd1220e..3d1e5ba1e1 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -11,7 +11,6 @@ void cpu_stop_current(void); void cpu_synchronize_all_states(void); void cpu_synchronize_all_post_reset(void); void cpu_synchronize_all_post_init(void); -void cpu_clean_all_dirty(void); void qtest_clock_warp(int64_t dest); diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 461ef65dea..b31f325fa2 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -53,6 +53,7 @@ extern bool kvm_gsi_routing_allowed; extern bool kvm_gsi_direct_mapping; extern bool kvm_readonly_mem_allowed; extern bool kvm_direct_msi_allowed; +extern bool kvm_ioeventfd_any_length_allowed; #if defined CONFIG_KVM || !defined NEED_CPU_H #define kvm_enabled() (kvm_allowed) @@ -153,6 +154,12 @@ extern bool kvm_direct_msi_allowed; */ #define kvm_direct_msi_enabled() (kvm_direct_msi_allowed) +/** + * kvm_ioeventfd_any_length_enabled: + * Returns: true if KVM allows any length io eventfd. + */ +#define kvm_ioeventfd_any_length_enabled() (kvm_ioeventfd_any_length_allowed) + #else #define kvm_enabled() (0) #define kvm_irqchip_in_kernel() (false) @@ -166,6 +173,7 @@ extern bool kvm_direct_msi_allowed; #define kvm_gsi_direct_mapping() (false) #define kvm_readonly_mem_enabled() (false) #define kvm_direct_msi_enabled() (false) +#define kvm_ioeventfd_any_length_enabled() (false) #endif struct kvm_run; @@ -417,7 +425,6 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, void kvm_cpu_synchronize_state(CPUState *cpu); void kvm_cpu_synchronize_post_reset(CPUState *cpu); void kvm_cpu_synchronize_post_init(CPUState *cpu); -void kvm_cpu_clean_state(CPUState *cpu); /* generic hooks - to be moved/refactored once there are more users */ @@ -442,13 +449,6 @@ static inline void cpu_synchronize_post_init(CPUState *cpu) } } -static inline void cpu_clean_state(CPUState *cpu) -{ - if (kvm_enabled()) { - kvm_cpu_clean_state(cpu); - } -} - int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev); int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, PCIDevice *dev); diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index 13dcef6b4c..400e098607 100644 --- a/include/sysemu/os-win32.h +++ b/include/sysemu/os-win32.h @@ -87,7 +87,7 @@ static inline void os_setup_post(void) {} void os_set_line_buffering(void); static inline void os_set_proc_name(const char *dummy) {} -size_t getpagesize(void); +int getpagesize(void); #if !defined(EPROTONOSUPPORT) # define EPROTONOSUPPORT EINVAL diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h new file mode 100644 index 0000000000..abb4688200 --- /dev/null +++ b/include/sysemu/replay.h @@ -0,0 +1,120 @@ +#ifndef REPLAY_H +#define REPLAY_H + +/* + * replay.h + * + * Copyright (c) 2010-2015 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include <stdbool.h> +#include <stdint.h> +#include "qapi-types.h" +#include "qapi/error.h" +#include "qemu/typedefs.h" + +/* replay clock kinds */ +enum ReplayClockKind { + /* host_clock */ + REPLAY_CLOCK_HOST, + /* virtual_rt_clock */ + REPLAY_CLOCK_VIRTUAL_RT, + REPLAY_CLOCK_COUNT +}; +typedef enum ReplayClockKind ReplayClockKind; + +/* IDs of the checkpoints */ +enum ReplayCheckpoint { + CHECKPOINT_CLOCK_WARP, + CHECKPOINT_RESET_REQUESTED, + CHECKPOINT_SUSPEND_REQUESTED, + CHECKPOINT_CLOCK_VIRTUAL, + CHECKPOINT_CLOCK_HOST, + CHECKPOINT_CLOCK_VIRTUAL_RT, + CHECKPOINT_INIT, + CHECKPOINT_RESET, + CHECKPOINT_COUNT +}; +typedef enum ReplayCheckpoint ReplayCheckpoint; + +extern ReplayMode replay_mode; + +/* Replay process control functions */ + +/*! Enables recording or saving event log with specified parameters */ +void replay_configure(struct QemuOpts *opts); +/*! Initializes timers used for snapshotting and enables events recording */ +void replay_start(void); +/*! Closes replay log file and frees other resources. */ +void replay_finish(void); +/*! Adds replay blocker with the specified error description */ +void replay_add_blocker(Error *reason); + +/* Processing the instructions */ + +/*! Returns number of executed instructions. */ +uint64_t replay_get_current_step(void); +/*! Returns number of instructions to execute in replay mode. */ +int replay_get_instructions(void); +/*! Updates instructions counter in replay mode. */ +void replay_account_executed_instructions(void); + +/* Interrupts and exceptions */ + +/*! Called by exception handler to write or read + exception processing events. */ +bool replay_exception(void); +/*! Used to determine that exception is pending. + Does not proceed to the next event in the log. */ +bool replay_has_exception(void); +/*! Called by interrupt handlers to write or read + interrupt processing events. + \return true if interrupt should be processed */ +bool replay_interrupt(void); +/*! Tries to read interrupt event from the file. + Returns true, when interrupt request is pending */ +bool replay_has_interrupt(void); + +/* Processing clocks and other time sources */ + +/*! Save the specified clock */ +int64_t replay_save_clock(ReplayClockKind kind, int64_t clock); +/*! Read the specified clock from the log or return cached data */ +int64_t replay_read_clock(ReplayClockKind kind); +/*! Saves or reads the clock depending on the current replay mode. */ +#define REPLAY_CLOCK(clock, value) \ + (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock)) \ + : replay_mode == REPLAY_MODE_RECORD \ + ? replay_save_clock((clock), (value)) \ + : (value)) + +/* Events */ + +/*! Called when qemu shutdown is requested. */ +void replay_shutdown_request(void); +/*! Should be called at check points in the execution. + These check points are skipped, if they were not met. + Saves checkpoint in the SAVE mode and validates in the PLAY mode. + Returns 0 in PLAY mode if checkpoint was not found. + Returns 1 in all other cases. */ +bool replay_checkpoint(ReplayCheckpoint checkpoint); + +/* Asynchronous events queue */ + +/*! Disables storing events in the queue */ +void replay_disable_events(void); +/*! Returns true when saving events is enabled */ +bool replay_events_enabled(void); +/*! Adds bottom half event to the queue */ +void replay_bh_schedule_event(QEMUBH *bh); +/*! Adds input event to the queue */ +void replay_input_event(QemuConsole *src, InputEvent *evt); +/*! Adds input sync event to the queue */ +void replay_input_sync_event(void); + +#endif diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index c439975139..3bb8897727 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -70,6 +70,7 @@ void qemu_system_killed(int signal, pid_t pid); void qemu_devices_reset(void); void qemu_system_reset(bool report); void qemu_system_guest_panicked(void); +size_t qemu_target_page_bits(void); void qemu_add_exit_notifier(Notifier *notify); void qemu_remove_exit_notifier(Notifier *notify); @@ -83,14 +84,52 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict); void qemu_announce_self(void); +/* Subcommands for QEMU_VM_COMMAND */ +enum qemu_vm_cmd { + MIG_CMD_INVALID = 0, /* Must be 0 */ + MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */ + MIG_CMD_PING, /* Request a PONG on the RP */ + + MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just + warn we might want to do PC */ + MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming + pages as it's running. */ + MIG_CMD_POSTCOPY_RUN, /* Start execution */ + + MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that + were previously sent during + precopy but are dirty. */ + MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */ + MIG_CMD_MAX +}; + +#define MAX_VM_CMD_PACKAGED_SIZE (1ul << 24) + bool qemu_savevm_state_blocked(Error **errp); void qemu_savevm_state_begin(QEMUFile *f, const MigrationParams *params); void qemu_savevm_state_header(QEMUFile *f); -int qemu_savevm_state_iterate(QEMUFile *f); -void qemu_savevm_state_complete(QEMUFile *f); -void qemu_savevm_state_cancel(void); -uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size); +int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy); +void qemu_savevm_state_cleanup(void); +void qemu_savevm_state_complete_postcopy(QEMUFile *f); +void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only); +void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size, + uint64_t *res_non_postcopiable, + uint64_t *res_postcopiable); +void qemu_savevm_command_send(QEMUFile *f, enum qemu_vm_cmd command, + uint16_t len, uint8_t *data); +void qemu_savevm_send_ping(QEMUFile *f, uint32_t value); +void qemu_savevm_send_open_return_path(QEMUFile *f); +int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb); +void qemu_savevm_send_postcopy_advise(QEMUFile *f); +void qemu_savevm_send_postcopy_listen(QEMUFile *f); +void qemu_savevm_send_postcopy_run(QEMUFile *f); + +void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, + uint16_t len, + uint64_t *start_list, + uint64_t *length_list); + int qemu_loadvm_state(QEMUFile *f); typedef enum DisplayType @@ -133,6 +172,7 @@ extern int boot_menu; extern bool boot_strict; extern uint8_t *boot_splash_filedata; extern size_t boot_splash_filedata_size; +extern bool enable_mlock; extern uint8_t qemu_extra_params_fw[2]; extern QEMUClockType rtc_clock; extern const char *mem_path; diff --git a/include/ui/console.h b/include/ui/console.h index d887f911f3..c249db4f7c 100644 --- a/include/ui/console.h +++ b/include/ui/console.h @@ -321,13 +321,23 @@ static inline pixman_format_code_t surface_format(DisplaySurface *s) #ifdef CONFIG_CURSES #include <curses.h> typedef chtype console_ch_t; +extern chtype vga_to_curses[]; #else typedef unsigned long console_ch_t; #endif static inline void console_write_ch(console_ch_t *dest, uint32_t ch) { - if (!(ch & 0xff)) + uint8_t c = ch; +#ifdef CONFIG_CURSES + if (vga_to_curses[c]) { + ch &= ~(console_ch_t)0xff; + ch |= vga_to_curses[c]; + } +#else + if (c == '\0') { ch |= ' '; + } +#endif *dest = ch; } diff --git a/include/ui/input.h b/include/ui/input.h index 5d5ac00663..d06a12dd4c 100644 --- a/include/ui/input.h +++ b/include/ui/input.h @@ -33,7 +33,9 @@ void qemu_input_handler_bind(QemuInputHandlerState *s, const char *device_id, int head, Error **errp); void qemu_input_event_send(QemuConsole *src, InputEvent *evt); +void qemu_input_event_send_impl(QemuConsole *src, InputEvent *evt); void qemu_input_event_sync(void); +void qemu_input_event_sync_impl(void); InputEvent *qemu_input_event_new_key(KeyValue *key, bool down); void qemu_input_event_send_key(QemuConsole *src, KeyValue *key, bool down); @@ -30,14 +30,6 @@ #include "exec/memory.h" #include "exec/address-spaces.h" -//#define DEBUG_IOPORT - -#ifdef DEBUG_IOPORT -# define LOG_IOPORT(...) qemu_log_mask(CPU_LOG_IOPORT, ## __VA_ARGS__) -#else -# define LOG_IOPORT(...) do { } while (0) -#endif - typedef struct MemoryRegionPortioList { MemoryRegion mr; void *portio_opaque; @@ -62,8 +54,7 @@ const MemoryRegionOps unassigned_io_ops = { void cpu_outb(pio_addr_t addr, uint8_t val) { - LOG_IOPORT("outb: %04"FMT_pioaddr" %02"PRIx8"\n", addr, val); - trace_cpu_out(addr, val); + trace_cpu_out(addr, 'b', val); address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, &val, 1); } @@ -72,8 +63,7 @@ void cpu_outw(pio_addr_t addr, uint16_t val) { uint8_t buf[2]; - LOG_IOPORT("outw: %04"FMT_pioaddr" %04"PRIx16"\n", addr, val); - trace_cpu_out(addr, val); + trace_cpu_out(addr, 'w', val); stw_p(buf, val); address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 2); @@ -83,8 +73,7 @@ void cpu_outl(pio_addr_t addr, uint32_t val) { uint8_t buf[4]; - LOG_IOPORT("outl: %04"FMT_pioaddr" %08"PRIx32"\n", addr, val); - trace_cpu_out(addr, val); + trace_cpu_out(addr, 'l', val); stl_p(buf, val); address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 4); @@ -96,8 +85,7 @@ uint8_t cpu_inb(pio_addr_t addr) address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, &val, 1); - trace_cpu_in(addr, val); - LOG_IOPORT("inb : %04"FMT_pioaddr" %02"PRIx8"\n", addr, val); + trace_cpu_in(addr, 'b', val); return val; } @@ -108,8 +96,7 @@ uint16_t cpu_inw(pio_addr_t addr) address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 2); val = lduw_p(buf); - trace_cpu_in(addr, val); - LOG_IOPORT("inw : %04"FMT_pioaddr" %04"PRIx16"\n", addr, val); + trace_cpu_in(addr, 'w', val); return val; } @@ -120,8 +107,7 @@ uint32_t cpu_inl(pio_addr_t addr) address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 4); val = ldl_p(buf); - trace_cpu_in(addr, val); - LOG_IOPORT("inl : %04"FMT_pioaddr" %08"PRIx32"\n", addr, val); + trace_cpu_in(addr, 'l', val); return val; } diff --git a/iothread.c b/iothread.c index da6ce7b308..1b8c2bbecb 100644 --- a/iothread.c +++ b/iothread.c @@ -72,6 +72,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) { Error *local_error = NULL; IOThread *iothread = IOTHREAD(obj); + char *name, *thread_name; iothread->stopping = false; iothread->thread_id = -1; @@ -87,8 +88,12 @@ static void iothread_complete(UserCreatable *obj, Error **errp) /* This assumes we are called from a thread with useful CPU affinity for us * to inherit. */ - qemu_thread_create(&iothread->thread, "iothread", iothread_run, + name = object_get_canonical_path_component(OBJECT(obj)); + thread_name = g_strdup_printf("IO %s", name); + qemu_thread_create(&iothread->thread, thread_name, iothread_run, iothread, QEMU_THREAD_JOINABLE); + g_free(thread_name); + g_free(name); /* Wait for initialization to complete */ qemu_mutex_lock(&iothread->init_done_lock); @@ -109,6 +109,7 @@ bool kvm_allowed; bool kvm_readonly_mem_allowed; bool kvm_vm_attributes_allowed; bool kvm_direct_msi_allowed; +bool kvm_ioeventfd_any_length_allowed; static const KVMCapabilityInfo kvm_required_capabilites[] = { KVM_CAP_INFO(USER_MEMORY), @@ -1461,7 +1462,6 @@ static int kvm_init(MachineState *ms) * page size for the system though. */ assert(TARGET_PAGE_SIZE <= getpagesize()); - page_size_init(); s->sigmask_len = 8; @@ -1612,6 +1612,9 @@ static int kvm_init(MachineState *ms) kvm_vm_attributes_allowed = (kvm_check_extension(s, KVM_CAP_VM_ATTRIBUTES) > 0); + kvm_ioeventfd_any_length_allowed = + (kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0); + ret = kvm_arch_init(ms, s); if (ret < 0) { goto err; @@ -1766,11 +1769,6 @@ void kvm_cpu_synchronize_post_init(CPUState *cpu) run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, cpu); } -void kvm_cpu_clean_state(CPUState *cpu) -{ - cpu->kvm_vcpu_dirty = false; -} - int kvm_cpu_exec(CPUState *cpu) { struct kvm_run *run = cpu->kvm_run; diff --git a/kvm-stub.c b/kvm-stub.c index a5051f7c6e..dc97a5edf1 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -30,6 +30,7 @@ bool kvm_gsi_routing_allowed; bool kvm_gsi_direct_mapping; bool kvm_allowed; bool kvm_readonly_mem_allowed; +bool kvm_ioeventfd_any_length_allowed; int kvm_init_vcpu(CPUState *cpu) { diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h new file mode 100644 index 0000000000..9057d7af3a --- /dev/null +++ b/linux-headers/linux/userfaultfd.h @@ -0,0 +1,167 @@ +/* + * include/linux/userfaultfd.h + * + * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> + * Copyright (C) 2015 Red Hat, Inc. + * + */ + +#ifndef _LINUX_USERFAULTFD_H +#define _LINUX_USERFAULTFD_H + +#include <linux/types.h> + +#define UFFD_API ((__u64)0xAA) +/* + * After implementing the respective features it will become: + * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ + * UFFD_FEATURE_EVENT_FORK) + */ +#define UFFD_API_FEATURES (0) +#define UFFD_API_IOCTLS \ + ((__u64)1 << _UFFDIO_REGISTER | \ + (__u64)1 << _UFFDIO_UNREGISTER | \ + (__u64)1 << _UFFDIO_API) +#define UFFD_API_RANGE_IOCTLS \ + ((__u64)1 << _UFFDIO_WAKE | \ + (__u64)1 << _UFFDIO_COPY | \ + (__u64)1 << _UFFDIO_ZEROPAGE) + +/* + * Valid ioctl command number range with this API is from 0x00 to + * 0x3F. UFFDIO_API is the fixed number, everything else can be + * changed by implementing a different UFFD_API. If sticking to the + * same UFFD_API more ioctl can be added and userland will be aware of + * which ioctl the running kernel implements through the ioctl command + * bitmask written by the UFFDIO_API. + */ +#define _UFFDIO_REGISTER (0x00) +#define _UFFDIO_UNREGISTER (0x01) +#define _UFFDIO_WAKE (0x02) +#define _UFFDIO_COPY (0x03) +#define _UFFDIO_ZEROPAGE (0x04) +#define _UFFDIO_API (0x3F) + +/* userfaultfd ioctl ids */ +#define UFFDIO 0xAA +#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \ + struct uffdio_api) +#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \ + struct uffdio_register) +#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \ + struct uffdio_range) +#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ + struct uffdio_range) +#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \ + struct uffdio_copy) +#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ + struct uffdio_zeropage) + +/* read() structure */ +struct uffd_msg { + __u8 event; + + __u8 reserved1; + __u16 reserved2; + __u32 reserved3; + + union { + struct { + __u64 flags; + __u64 address; + } pagefault; + + struct { + /* unused reserved fields */ + __u64 reserved1; + __u64 reserved2; + __u64 reserved3; + } reserved; + } arg; +} __packed; + +/* + * Start at 0x12 and not at 0 to be more strict against bugs. + */ +#define UFFD_EVENT_PAGEFAULT 0x12 +#if 0 /* not available yet */ +#define UFFD_EVENT_FORK 0x13 +#endif + +/* flags for UFFD_EVENT_PAGEFAULT */ +#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ +#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ + +struct uffdio_api { + /* userland asks for an API number and the features to enable */ + __u64 api; + /* + * Kernel answers below with the all available features for + * the API, this notifies userland of which events and/or + * which flags for each event are enabled in the current + * kernel. + * + * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE + * are to be considered implicitly always enabled in all kernels as + * long as the uffdio_api.api requested matches UFFD_API. + */ +#if 0 /* not available yet */ +#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) +#define UFFD_FEATURE_EVENT_FORK (1<<1) +#endif + __u64 features; + + __u64 ioctls; +}; + +struct uffdio_range { + __u64 start; + __u64 len; +}; + +struct uffdio_register { + struct uffdio_range range; +#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) +#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) + __u64 mode; + + /* + * kernel answers which ioctl commands are available for the + * range, keep at the end as the last 8 bytes aren't read. + */ + __u64 ioctls; +}; + +struct uffdio_copy { + __u64 dst; + __u64 src; + __u64 len; + /* + * There will be a wrprotection flag later that allows to map + * pages wrprotected on the fly. And such a flag will be + * available if the wrprotection ioctl are implemented for the + * range according to the uffdio_register.ioctls. + */ +#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) + __u64 mode; + + /* + * "copy" is written by the ioctl and must be at the end: the + * copy_from_user will not read the last 8 bytes. + */ + __s64 copy; +}; + +struct uffdio_zeropage { + struct uffdio_range range; +#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0) + __u64 mode; + + /* + * "zeropage" is written by the ioctl and must be at the end: + * the copy_from_user will not read the last 8 bytes. + */ + __s64 zeropage; +}; + +#endif /* _LINUX_USERFAULTFD_H */ diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index 14a01602f8..ead86db91a 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -78,7 +78,7 @@ struct vhost_memory { #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) /* Give up ownership, and reset the device to default values. * Allows subsequent call to VHOST_OWNER_SET to succeed. */ -#define VHOST_RESET_DEVICE _IO(VHOST_VIRTIO, 0x02) +#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) /* Set up/modify memory layout */ #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index d68f5a16ca..8b17c0e94b 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1478,8 +1478,7 @@ static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot) host_start = (uintptr_t) g2h(elf_bss); host_end = (uintptr_t) g2h(last_bss); - host_map_start = (host_start + qemu_real_host_page_size - 1); - host_map_start &= -qemu_real_host_page_size; + host_map_start = REAL_HOST_PAGE_ALIGN(host_start); if (host_map_start < host_end) { void *p = mmap((void *)host_map_start, host_end - host_map_start, diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 5606bcd164..7b459d5100 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -444,9 +444,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, /* If so, truncate the file map at eof aligned with the hosts real pagesize. Additional anonymous maps will be created beyond EOF. */ - len = (sb.st_size - offset); - len += qemu_real_host_page_size - 1; - len &= ~(qemu_real_host_page_size - 1); + len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); } } diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 8bfb24f05b..6c64ba63db 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -5325,8 +5325,7 @@ static abi_long do_open_by_handle_at(abi_long mount_fd, abi_long handle, return -TARGET_EFAULT; } - fh = g_malloc0(total_size); - memcpy(fh, target_fh, total_size); + fh = g_memdup(target_fh, total_size); fh->handle_bytes = size; fh->handle_type = tswap32(target_fh->handle_type); diff --git a/main-loop.c b/main-loop.c index db600a37f1..5877615387 100644 --- a/main-loop.c +++ b/main-loop.c @@ -230,7 +230,7 @@ static int os_host_main_loop_wait(int64_t timeout) if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) { static bool notified; - if (!notified && !qtest_enabled()) { + if (!notified && !qtest_driver()) { fprintf(stderr, "main-loop: WARNING: I/O thread spun for %d iterations\n", MAX_MAIN_LOOP_SPIN); @@ -506,6 +506,9 @@ int main_loop_wait(int nonblocking) slirp_pollfds_poll(gpollfds, (ret < 0)); #endif + /* CPU thread can infinitely wait for event after + missing the warp */ + qemu_clock_warp(QEMU_CLOCK_VIRTUAL); qemu_clock_run_all_timers(); return ret; @@ -1688,7 +1688,9 @@ void memory_region_add_eventfd(MemoryRegion *mr, }; unsigned i; - adjust_endianness(mr, &mrfd.data, size); + if (size) { + adjust_endianness(mr, &mrfd.data, size); + } memory_region_transaction_begin(); for (i = 0; i < mr->ioeventfd_nb; ++i) { if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) { @@ -1721,7 +1723,9 @@ void memory_region_del_eventfd(MemoryRegion *mr, }; unsigned i; - adjust_endianness(mr, &mrfd.data, size); + if (size) { + adjust_endianness(mr, &mrfd.data, size); + } memory_region_transaction_begin(); for (i = 0; i < mr->ioeventfd_nb; ++i) { if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) { @@ -2036,6 +2040,9 @@ static void listener_add_address_space(MemoryListener *listener, return; } + if (listener->begin) { + listener->begin(listener); + } if (global_dirty_log) { if (listener->log_global_start) { listener->log_global_start(listener); @@ -2052,10 +2059,16 @@ static void listener_add_address_space(MemoryListener *listener, .offset_within_address_space = int128_get64(fr->addr.start), .readonly = fr->readonly, }; + if (fr->dirty_log_mask && listener->log_start) { + listener->log_start(listener, §ion, 0, fr->dirty_log_mask); + } if (listener->region_add) { listener->region_add(listener, §ion); } } + if (listener->commit) { + listener->commit(listener); + } flatview_unref(view); } diff --git a/migration/Makefile.objs b/migration/Makefile.objs index d929e969ae..0cac6d707a 100644 --- a/migration/Makefile.objs +++ b/migration/Makefile.objs @@ -1,7 +1,7 @@ common-obj-y += migration.o tcp.o common-obj-y += vmstate.o common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o -common-obj-y += xbzrle.o +common-obj-y += xbzrle.o postcopy-ram.o common-obj-$(CONFIG_RDMA) += rdma.o common-obj-$(CONFIG_POSIX) += exec.o unix.o fd.o diff --git a/migration/block.c b/migration/block.c index f7bb1e0126..656f38f341 100644 --- a/migration/block.c +++ b/migration/block.c @@ -36,6 +36,8 @@ #define MAX_IS_ALLOCATED_SEARCH 65536 +#define MAX_INFLIGHT_IO 512 + //#define DEBUG_BLK_MIGRATION #ifdef DEBUG_BLK_MIGRATION @@ -591,7 +593,7 @@ static int64_t get_remaining_dirty(void) /* Called with iothread lock taken. */ -static void blk_mig_cleanup(void) +static void block_migration_cleanup(void *opaque) { BlkMigDevState *bmds; BlkMigBlock *blk; @@ -618,11 +620,6 @@ static void blk_mig_cleanup(void) blk_mig_unlock(); } -static void block_migration_cancel(void *opaque) -{ - blk_mig_cleanup(); -} - static int block_save_setup(QEMUFile *f, void *opaque) { int ret; @@ -670,7 +667,10 @@ static int block_save_iterate(QEMUFile *f, void *opaque) blk_mig_lock(); while ((block_mig_state.submitted + block_mig_state.read_done) * BLOCK_SIZE < - qemu_file_get_rate_limit(f)) { + qemu_file_get_rate_limit(f) && + (block_mig_state.submitted + + block_mig_state.read_done) < + MAX_INFLIGHT_IO) { blk_mig_unlock(); if (block_mig_state.bulk_completed == 0) { /* first finish the bulk phase */ @@ -750,11 +750,12 @@ static int block_save_complete(QEMUFile *f, void *opaque) qemu_put_be64(f, BLK_MIG_FLAG_EOS); - blk_mig_cleanup(); return 0; } -static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) +static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, + uint64_t *non_postcopiable_pending, + uint64_t *postcopiable_pending) { /* Estimate pending number of bytes to send */ uint64_t pending; @@ -773,7 +774,8 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) qemu_mutex_unlock_iothread(); DPRINTF("Enter save live pending %" PRIu64 "\n", pending); - return pending; + /* We don't do postcopy */ + *non_postcopiable_pending += pending; } static int block_load(QEMUFile *f, void *opaque, int version_id) @@ -882,10 +884,10 @@ static SaveVMHandlers savevm_block_handlers = { .set_params = block_set_params, .save_live_setup = block_save_setup, .save_live_iterate = block_save_iterate, - .save_live_complete = block_save_complete, + .save_live_complete_precopy = block_save_complete, .save_live_pending = block_save_pending, .load_state = block_load, - .cancel = block_migration_cancel, + .cleanup = block_migration_cleanup, .is_active = block_is_active, }; diff --git a/migration/migration.c b/migration/migration.c index b092f386b4..adc6b6f1c9 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -21,15 +21,18 @@ #include "sysemu/sysemu.h" #include "block/block.h" #include "qapi/qmp/qerror.h" +#include "qapi/util.h" #include "qemu/sockets.h" #include "qemu/rcu.h" #include "migration/block.h" +#include "migration/postcopy-ram.h" #include "qemu/thread.h" #include "qmp-commands.h" #include "trace.h" -#include "qapi/util.h" #include "qapi-event.h" #include "qom/cpu.h" +#include "exec/memory.h" +#include "exec/address-spaces.h" #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ @@ -57,6 +60,13 @@ static NotifierList migration_state_notifiers = static bool deferred_incoming; +/* + * Current state of incoming postcopy; note this is not part of + * MigrationIncomingState since it's state is used during cleanup + * at the end as MIS is being freed. + */ +static PostcopyState incoming_postcopy_state; + /* When we add fault tolerance, we could have several migrations at once. For now we don't need to add dynamic creation of migration */ @@ -64,6 +74,7 @@ static bool deferred_incoming; /* For outgoing */ MigrationState *migrate_get_current(void) { + static bool once; static MigrationState current_migration = { .state = MIGRATION_STATUS_NONE, .bandwidth_limit = MAX_THROTTLE, @@ -81,6 +92,10 @@ MigrationState *migrate_get_current(void) DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT, }; + if (!once) { + qemu_mutex_init(¤t_migration.src_page_req_mutex); + once = true; + } return ¤t_migration; } @@ -95,14 +110,17 @@ MigrationIncomingState *migration_incoming_get_current(void) MigrationIncomingState *migration_incoming_state_new(QEMUFile* f) { mis_current = g_new0(MigrationIncomingState, 1); - mis_current->file = f; + mis_current->from_src_file = f; QLIST_INIT(&mis_current->loadvm_handlers); + qemu_mutex_init(&mis_current->rp_mutex); + qemu_event_init(&mis_current->main_thread_load_event, false); return mis_current; } void migration_incoming_state_destroy(void) { + qemu_event_destroy(&mis_current->main_thread_load_event); loadvm_free_handlers(mis_current); g_free(mis_current); mis_current = NULL; @@ -248,6 +266,35 @@ static void deferred_incoming_migration(Error **errp) deferred_incoming = true; } +/* Request a range of pages from the source VM at the given + * start address. + * rbname: Name of the RAMBlock to request the page in, if NULL it's the same + * as the last request (a name must have been given previously) + * Start: Address offset within the RB + * Len: Length in bytes required - must be a multiple of pagesize + */ +void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname, + ram_addr_t start, size_t len) +{ + uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname upto 256 */ + size_t msglen = 12; /* start + len */ + + *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); + *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); + + if (rbname) { + int rbname_len = strlen(rbname); + assert(rbname_len < 256); + + bufc[msglen++] = rbname_len; + memcpy(bufc + msglen, rbname, rbname_len); + msglen += rbname_len; + migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc); + } else { + migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc); + } +} + void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p; @@ -278,12 +325,37 @@ static void process_incoming_migration_co(void *opaque) { QEMUFile *f = opaque; Error *local_err = NULL; + MigrationIncomingState *mis; + PostcopyState ps; int ret; - migration_incoming_state_new(f); + mis = migration_incoming_state_new(f); + postcopy_state_set(POSTCOPY_INCOMING_NONE); migrate_generate_event(MIGRATION_STATUS_ACTIVE); + ret = qemu_loadvm_state(f); + ps = postcopy_state_get(); + trace_process_incoming_migration_co_end(ret, ps); + if (ps != POSTCOPY_INCOMING_NONE) { + if (ps == POSTCOPY_INCOMING_ADVISE) { + /* + * Where a migration had postcopy enabled (and thus went to advise) + * but managed to complete within the precopy period, we can use + * the normal exit. + */ + postcopy_ram_incoming_cleanup(mis); + } else if (ret >= 0) { + /* + * Postcopy was started, cleanup should happen at the end of the + * postcopy thread. + */ + trace_process_incoming_migration_co_postcopy_end_main(); + return; + } + /* Else if something went wrong then just fall out of the normal exit */ + } + qemu_fclose(f); free_xbzrle_decoded_buf(); migration_incoming_state_destroy(); @@ -344,6 +416,50 @@ void process_incoming_migration(QEMUFile *f) qemu_coroutine_enter(co, f); } +/* + * Send a message on the return channel back to the source + * of the migration. + */ +void migrate_send_rp_message(MigrationIncomingState *mis, + enum mig_rp_message_type message_type, + uint16_t len, void *data) +{ + trace_migrate_send_rp_message((int)message_type, len); + qemu_mutex_lock(&mis->rp_mutex); + qemu_put_be16(mis->to_src_file, (unsigned int)message_type); + qemu_put_be16(mis->to_src_file, len); + qemu_put_buffer(mis->to_src_file, data, len); + qemu_fflush(mis->to_src_file); + qemu_mutex_unlock(&mis->rp_mutex); +} + +/* + * Send a 'SHUT' message on the return channel with the given value + * to indicate that we've finished with the RP. Non-0 value indicates + * error. + */ +void migrate_send_rp_shut(MigrationIncomingState *mis, + uint32_t value) +{ + uint32_t buf; + + buf = cpu_to_be32(value); + migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); +} + +/* + * Send a 'PONG' message on the return channel with the given value + * (normally in response to a 'PING') + */ +void migrate_send_rp_pong(MigrationIncomingState *mis, + uint32_t value) +{ + uint32_t buf; + + buf = cpu_to_be32(value); + migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); +} + /* amount of nanoseconds we are willing to wait for migration to be down. * the choice of nanoseconds is because it is the maximum resolution that * get_clock() can achieve. It is an internal measure. All user-visible @@ -399,6 +515,24 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) return params; } +/* + * Return true if we're already in the middle of a migration + * (i.e. any of the active or setup states) + */ +static bool migration_is_setup_or_active(int state) +{ + switch (state) { + case MIGRATION_STATUS_ACTIVE: + case MIGRATION_STATUS_POSTCOPY_ACTIVE: + case MIGRATION_STATUS_SETUP: + return true; + + default: + return false; + + } +} + static void get_xbzrle_cache_stats(MigrationInfo *info) { if (migrate_use_xbzrle()) { @@ -465,6 +599,39 @@ MigrationInfo *qmp_query_migrate(Error **errp) get_xbzrle_cache_stats(info); break; + case MIGRATION_STATUS_POSTCOPY_ACTIVE: + /* Mostly the same as active; TODO add some postcopy stats */ + info->has_status = true; + info->has_total_time = true; + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + - s->total_time; + info->has_expected_downtime = true; + info->expected_downtime = s->expected_downtime; + info->has_setup_time = true; + info->setup_time = s->setup_time; + + info->has_ram = true; + info->ram = g_malloc0(sizeof(*info->ram)); + info->ram->transferred = ram_bytes_transferred(); + info->ram->remaining = ram_bytes_remaining(); + info->ram->total = ram_bytes_total(); + info->ram->duplicate = dup_mig_pages_transferred(); + info->ram->skipped = skipped_mig_pages_transferred(); + info->ram->normal = norm_mig_pages_transferred(); + info->ram->normal_bytes = norm_mig_bytes_transferred(); + info->ram->dirty_pages_rate = s->dirty_pages_rate; + info->ram->mbps = s->mbps; + + if (blk_mig_active()) { + info->has_disk = true; + info->disk = g_malloc0(sizeof(*info->disk)); + info->disk->transferred = blk_mig_bytes_transferred(); + info->disk->remaining = blk_mig_bytes_remaining(); + info->disk->total = blk_mig_bytes_total(); + } + + get_xbzrle_cache_stats(info); + break; case MIGRATION_STATUS_COMPLETED: get_xbzrle_cache_stats(info); @@ -506,8 +673,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, MigrationState *s = migrate_get_current(); MigrationCapabilityStatusList *cap; - if (s->state == MIGRATION_STATUS_ACTIVE || - s->state == MIGRATION_STATUS_SETUP) { + if (migration_is_setup_or_active(s->state)) { error_setg(errp, QERR_MIGRATION_ACTIVE); return; } @@ -515,6 +681,20 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, for (cap = params; cap; cap = cap->next) { s->enabled_capabilities[cap->value->capability] = cap->value->state; } + + if (migrate_postcopy_ram()) { + if (migrate_use_compression()) { + /* The decompression threads asynchronously write into RAM + * rather than use the atomic copies needed to avoid + * userfaulting. It should be possible to fix the decompression + * threads for compatibility in future. + */ + error_report("Postcopy is not currently compatible with " + "compression"); + s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM] = + false; + } + } } void qmp_migrate_set_parameters(bool has_compress_level, @@ -583,6 +763,28 @@ void qmp_migrate_set_parameters(bool has_compress_level, } } +void qmp_migrate_start_postcopy(Error **errp) +{ + MigrationState *s = migrate_get_current(); + + if (!migrate_postcopy_ram()) { + error_setg(errp, "Enable postcopy with migrate_set_capability before" + " the start of migration"); + return; + } + + if (s->state == MIGRATION_STATUS_NONE) { + error_setg(errp, "Postcopy must be started after migration has been" + " started"); + return; + } + /* + * we don't error if migration has finished since that would be racy + * with issuing this command. + */ + atomic_set(&s->start_postcopy, true); +} + /* shared migration helpers */ static void migrate_set_state(MigrationState *s, int old_state, int new_state) @@ -600,10 +802,15 @@ static void migrate_fd_cleanup(void *opaque) qemu_bh_delete(s->cleanup_bh); s->cleanup_bh = NULL; + flush_page_queue(s); + if (s->file) { trace_migrate_fd_cleanup(); qemu_mutex_unlock_iothread(); - qemu_thread_join(&s->thread); + if (s->migration_thread_running) { + qemu_thread_join(&s->thread); + s->migration_thread_running = false; + } qemu_mutex_lock_iothread(); migrate_compress_threads_join(); @@ -611,14 +818,12 @@ static void migrate_fd_cleanup(void *opaque) s->file = NULL; } - assert(s->state != MIGRATION_STATUS_ACTIVE); + assert((s->state != MIGRATION_STATUS_ACTIVE) && + (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE)); - if (s->state != MIGRATION_STATUS_COMPLETED) { - qemu_savevm_state_cancel(); - if (s->state == MIGRATION_STATUS_CANCELLING) { - migrate_set_state(s, MIGRATION_STATUS_CANCELLING, - MIGRATION_STATUS_CANCELLED); - } + if (s->state == MIGRATION_STATUS_CANCELLING) { + migrate_set_state(s, MIGRATION_STATUS_CANCELLING, + MIGRATION_STATUS_CANCELLED); } notifier_list_notify(&migration_state_notifiers, s); @@ -638,10 +843,14 @@ static void migrate_fd_cancel(MigrationState *s) QEMUFile *f = migrate_get_current()->file; trace_migrate_fd_cancel(); + if (s->rp_state.from_dst_file) { + /* shutdown the rp socket, so causing the rp thread to shutdown */ + qemu_file_shutdown(s->rp_state.from_dst_file); + } + do { old_state = s->state; - if (old_state != MIGRATION_STATUS_SETUP && - old_state != MIGRATION_STATUS_ACTIVE) { + if (!migration_is_setup_or_active(old_state)) { break; } migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING); @@ -685,43 +894,43 @@ bool migration_has_failed(MigrationState *s) s->state == MIGRATION_STATUS_FAILED); } -static MigrationState *migrate_init(const MigrationParams *params) +bool migration_in_postcopy(MigrationState *s) { - MigrationState *s = migrate_get_current(); - int64_t bandwidth_limit = s->bandwidth_limit; - bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; - int64_t xbzrle_cache_size = s->xbzrle_cache_size; - int compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL]; - int compress_thread_count = - s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS]; - int decompress_thread_count = - s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; - int x_cpu_throttle_initial = - s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; - int x_cpu_throttle_increment = - s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; + return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); +} - memcpy(enabled_capabilities, s->enabled_capabilities, - sizeof(enabled_capabilities)); +MigrationState *migrate_init(const MigrationParams *params) +{ + MigrationState *s = migrate_get_current(); - memset(s, 0, sizeof(*s)); + /* + * Reinitialise all migration state, except + * parameters/capabilities that the user set, and + * locks. + */ + s->bytes_xfer = 0; + s->xfer_limit = 0; + s->cleanup_bh = 0; + s->file = NULL; + s->state = MIGRATION_STATUS_NONE; s->params = *params; - memcpy(s->enabled_capabilities, enabled_capabilities, - sizeof(enabled_capabilities)); - s->xbzrle_cache_size = xbzrle_cache_size; - - s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level; - s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] = - compress_thread_count; - s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = - decompress_thread_count; - s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = - x_cpu_throttle_initial; - s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = - x_cpu_throttle_increment; - s->bandwidth_limit = bandwidth_limit; + s->rp_state.from_dst_file = NULL; + s->rp_state.error = false; + s->mbps = 0.0; + s->downtime = 0; + s->expected_downtime = 0; + s->dirty_pages_rate = 0; + s->dirty_bytes_rate = 0; + s->setup_time = 0; + s->dirty_sync_count = 0; + s->start_postcopy = false; + s->migration_thread_running = false; + s->last_req_rb = NULL; + migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); + QSIMPLEQ_INIT(&s->src_page_requests); + s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); return s; } @@ -773,8 +982,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, params.blk = has_blk && blk; params.shared = has_inc && inc; - if (s->state == MIGRATION_STATUS_ACTIVE || - s->state == MIGRATION_STATUS_SETUP || + if (migration_is_setup_or_active(s->state) || s->state == MIGRATION_STATUS_CANCELLING) { error_setg(errp, QERR_MIGRATION_ACTIVE); return; @@ -893,6 +1101,15 @@ void qmp_migrate_set_downtime(double value, Error **errp) max_downtime = (uint64_t)value; } +bool migrate_postcopy_ram(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM]; +} + bool migrate_auto_converge(void) { MigrationState *s; @@ -974,36 +1191,382 @@ int64_t migrate_xbzrle_cache_size(void) return s->xbzrle_cache_size; } +/* migration thread support */ +/* + * Something bad happened to the RP stream, mark an error + * The caller shall print or trace something to indicate why + */ +static void mark_source_rp_bad(MigrationState *s) +{ + s->rp_state.error = true; +} + +static struct rp_cmd_args { + ssize_t len; /* -1 = variable */ + const char *name; +} rp_cmd_args[] = { + [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, + [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, + [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, + [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, + [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, + [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, +}; + +/* + * Process a request for pages received on the return path, + * We're allowed to send more than requested (e.g. to round to our page size) + * and we don't need to send pages that have already been sent. + */ +static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, + ram_addr_t start, size_t len) +{ + long our_host_ps = getpagesize(); + + trace_migrate_handle_rp_req_pages(rbname, start, len); + + /* + * Since we currently insist on matching page sizes, just sanity check + * we're being asked for whole host pages. + */ + if (start & (our_host_ps-1) || + (len & (our_host_ps-1))) { + error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT + " len: %zd", __func__, start, len); + mark_source_rp_bad(ms); + return; + } + + if (ram_save_queue_pages(ms, rbname, start, len)) { + mark_source_rp_bad(ms); + } +} + +/* + * Handles messages sent on the return path towards the source VM + * + */ +static void *source_return_path_thread(void *opaque) +{ + MigrationState *ms = opaque; + QEMUFile *rp = ms->rp_state.from_dst_file; + uint16_t header_len, header_type; + const int max_len = 512; + uint8_t buf[max_len]; + uint32_t tmp32, sibling_error; + ram_addr_t start = 0; /* =0 to silence warning */ + size_t len = 0, expected_len; + int res; + + trace_source_return_path_thread_entry(); + while (!ms->rp_state.error && !qemu_file_get_error(rp) && + migration_is_setup_or_active(ms->state)) { + trace_source_return_path_thread_loop_top(); + header_type = qemu_get_be16(rp); + header_len = qemu_get_be16(rp); + + if (header_type >= MIG_RP_MSG_MAX || + header_type == MIG_RP_MSG_INVALID) { + error_report("RP: Received invalid message 0x%04x length 0x%04x", + header_type, header_len); + mark_source_rp_bad(ms); + goto out; + } + + if ((rp_cmd_args[header_type].len != -1 && + header_len != rp_cmd_args[header_type].len) || + header_len > max_len) { + error_report("RP: Received '%s' message (0x%04x) with" + "incorrect length %d expecting %zu", + rp_cmd_args[header_type].name, header_type, header_len, + (size_t)rp_cmd_args[header_type].len); + mark_source_rp_bad(ms); + goto out; + } + + /* We know we've got a valid header by this point */ + res = qemu_get_buffer(rp, buf, header_len); + if (res != header_len) { + error_report("RP: Failed reading data for message 0x%04x" + " read %d expected %d", + header_type, res, header_len); + mark_source_rp_bad(ms); + goto out; + } + + /* OK, we have the message and the data */ + switch (header_type) { + case MIG_RP_MSG_SHUT: + sibling_error = be32_to_cpup((uint32_t *)buf); + trace_source_return_path_thread_shut(sibling_error); + if (sibling_error) { + error_report("RP: Sibling indicated error %d", sibling_error); + mark_source_rp_bad(ms); + } + /* + * We'll let the main thread deal with closing the RP + * we could do a shutdown(2) on it, but we're the only user + * anyway, so there's nothing gained. + */ + goto out; + + case MIG_RP_MSG_PONG: + tmp32 = be32_to_cpup((uint32_t *)buf); + trace_source_return_path_thread_pong(tmp32); + break; + + case MIG_RP_MSG_REQ_PAGES: + start = be64_to_cpup((uint64_t *)buf); + len = be32_to_cpup((uint32_t *)(buf + 8)); + migrate_handle_rp_req_pages(ms, NULL, start, len); + break; + + case MIG_RP_MSG_REQ_PAGES_ID: + expected_len = 12 + 1; /* header + termination */ + + if (header_len >= expected_len) { + start = be64_to_cpup((uint64_t *)buf); + len = be32_to_cpup((uint32_t *)(buf + 8)); + /* Now we expect an idstr */ + tmp32 = buf[12]; /* Length of the following idstr */ + buf[13 + tmp32] = '\0'; + expected_len += tmp32; + } + if (header_len != expected_len) { + error_report("RP: Req_Page_id with length %d expecting %zd", + header_len, expected_len); + mark_source_rp_bad(ms); + goto out; + } + migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); + break; + + default: + break; + } + } + if (qemu_file_get_error(rp)) { + trace_source_return_path_thread_bad_end(); + mark_source_rp_bad(ms); + } + + trace_source_return_path_thread_end(); +out: + ms->rp_state.from_dst_file = NULL; + qemu_fclose(rp); + return NULL; +} + +static int open_return_path_on_source(MigrationState *ms) +{ + + ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->file); + if (!ms->rp_state.from_dst_file) { + return -1; + } + + trace_open_return_path_on_source(); + qemu_thread_create(&ms->rp_state.rp_thread, "return path", + source_return_path_thread, ms, QEMU_THREAD_JOINABLE); + + trace_open_return_path_on_source_continue(); + + return 0; +} + +/* Returns 0 if the RP was ok, otherwise there was an error on the RP */ +static int await_return_path_close_on_source(MigrationState *ms) +{ + /* + * If this is a normal exit then the destination will send a SHUT and the + * rp_thread will exit, however if there's an error we need to cause + * it to exit. + */ + if (qemu_file_get_error(ms->file) && ms->rp_state.from_dst_file) { + /* + * shutdown(2), if we have it, will cause it to unblock if it's stuck + * waiting for the destination. + */ + qemu_file_shutdown(ms->rp_state.from_dst_file); + mark_source_rp_bad(ms); + } + trace_await_return_path_close_on_source_joining(); + qemu_thread_join(&ms->rp_state.rp_thread); + trace_await_return_path_close_on_source_close(); + return ms->rp_state.error; +} + +/* + * Switch from normal iteration to postcopy + * Returns non-0 on error + */ +static int postcopy_start(MigrationState *ms, bool *old_vm_running) +{ + int ret; + const QEMUSizedBuffer *qsb; + int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + migrate_set_state(ms, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_POSTCOPY_ACTIVE); + + trace_postcopy_start(); + qemu_mutex_lock_iothread(); + trace_postcopy_start_set_run(); + + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); + *old_vm_running = runstate_is_running(); + global_state_store(); + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + + if (ret < 0) { + goto fail; + } + + /* + * Cause any non-postcopiable, but iterative devices to + * send out their final data. + */ + qemu_savevm_state_complete_precopy(ms->file, true); + + /* + * in Finish migrate and with the io-lock held everything should + * be quiet, but we've potentially still got dirty pages and we + * need to tell the destination to throw any pages it's already received + * that are dirty + */ + if (ram_postcopy_send_discard_bitmap(ms)) { + error_report("postcopy send discard bitmap failed"); + goto fail; + } + + /* + * send rest of state - note things that are doing postcopy + * will notice we're in POSTCOPY_ACTIVE and not actually + * wrap their state up here + */ + qemu_file_set_rate_limit(ms->file, INT64_MAX); + /* Ping just for debugging, helps line traces up */ + qemu_savevm_send_ping(ms->file, 2); + + /* + * While loading the device state we may trigger page transfer + * requests and the fd must be free to process those, and thus + * the destination must read the whole device state off the fd before + * it starts processing it. Unfortunately the ad-hoc migration format + * doesn't allow the destination to know the size to read without fully + * parsing it through each devices load-state code (especially the open + * coded devices that use get/put). + * So we wrap the device state up in a package with a length at the start; + * to do this we use a qemu_buf to hold the whole of the device state. + */ + QEMUFile *fb = qemu_bufopen("w", NULL); + if (!fb) { + error_report("Failed to create buffered file"); + goto fail; + } + + /* + * Make sure the receiver can get incoming pages before we send the rest + * of the state + */ + qemu_savevm_send_postcopy_listen(fb); + + qemu_savevm_state_complete_precopy(fb, false); + qemu_savevm_send_ping(fb, 3); + + qemu_savevm_send_postcopy_run(fb); + + /* <><> end of stuff going into the package */ + qsb = qemu_buf_get(fb); + + /* Now send that blob */ + if (qemu_savevm_send_packaged(ms->file, qsb)) { + goto fail_closefb; + } + qemu_fclose(fb); + ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; + + qemu_mutex_unlock_iothread(); + + /* + * Although this ping is just for debug, it could potentially be + * used for getting a better measurement of downtime at the source. + */ + qemu_savevm_send_ping(ms->file, 4); + + ret = qemu_file_get_error(ms->file); + if (ret) { + error_report("postcopy_start: Migration stream errored"); + migrate_set_state(ms, MIGRATION_STATUS_POSTCOPY_ACTIVE, + MIGRATION_STATUS_FAILED); + } + + return ret; + +fail_closefb: + qemu_fclose(fb); +fail: + migrate_set_state(ms, MIGRATION_STATUS_POSTCOPY_ACTIVE, + MIGRATION_STATUS_FAILED); + qemu_mutex_unlock_iothread(); + return -1; +} + /** * migration_completion: Used by migration_thread when there's not much left. * The caller 'breaks' the loop when this returns. * * @s: Current migration state + * @current_active_state: The migration state we expect to be in * @*old_vm_running: Pointer to old_vm_running flag * @*start_time: Pointer to time to update */ -static void migration_completion(MigrationState *s, bool *old_vm_running, +static void migration_completion(MigrationState *s, int current_active_state, + bool *old_vm_running, int64_t *start_time) { int ret; - qemu_mutex_lock_iothread(); - *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); - *old_vm_running = runstate_is_running(); + if (s->state == MIGRATION_STATUS_ACTIVE) { + qemu_mutex_lock_iothread(); + *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); + *old_vm_running = runstate_is_running(); + ret = global_state_store(); + + if (!ret) { + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + if (ret >= 0) { + qemu_file_set_rate_limit(s->file, INT64_MAX); + qemu_savevm_state_complete_precopy(s->file, false); + } + } + qemu_mutex_unlock_iothread(); - ret = global_state_store(); - if (!ret) { - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - if (ret >= 0) { - qemu_file_set_rate_limit(s->file, INT64_MAX); - qemu_savevm_state_complete(s->file); + if (ret < 0) { + goto fail; } + } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { + trace_migration_completion_postcopy_end(); + + qemu_savevm_state_complete_postcopy(s->file); + trace_migration_completion_postcopy_end_after_complete(); } - qemu_mutex_unlock_iothread(); - if (ret < 0) { - goto fail; + /* + * If rp was opened we must clean up the thread before + * cleaning everything else up (since if there are no failures + * it will wait for the destination to send it's status in + * a SHUT command). + * Postcopy opens rp if enabled (even if it's not avtivated) + */ + if (migrate_postcopy_ram()) { + int rp_error; + trace_migration_completion_postcopy_end_before_rp(); + rp_error = await_return_path_close_on_source(s); + trace_migration_completion_postcopy_end_after_rp(rp_error); + if (rp_error) { + goto fail; + } } if (qemu_file_get_error(s->file)) { @@ -1011,59 +1574,107 @@ static void migration_completion(MigrationState *s, bool *old_vm_running, goto fail; } - migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COMPLETED); + migrate_set_state(s, current_active_state, MIGRATION_STATUS_COMPLETED); return; fail: - migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); + migrate_set_state(s, current_active_state, MIGRATION_STATUS_FAILED); } -/* migration thread support */ - +/* + * Master migration thread on the source VM. + * It drives the migration and pumps the data down the outgoing channel. + */ static void *migration_thread(void *opaque) { MigrationState *s = opaque; + /* Used by the bandwidth calcs, updated later */ int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); int64_t initial_bytes = 0; int64_t max_size = 0; int64_t start_time = initial_time; + int64_t end_time; bool old_vm_running = false; + bool entered_postcopy = false; + /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */ + enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE; rcu_register_thread(); qemu_savevm_state_header(s->file); + + if (migrate_postcopy_ram()) { + /* Now tell the dest that it should open its end so it can reply */ + qemu_savevm_send_open_return_path(s->file); + + /* And do a ping that will make stuff easier to debug */ + qemu_savevm_send_ping(s->file, 1); + + /* + * Tell the destination that we *might* want to do postcopy later; + * if the other end can't do postcopy it should fail now, nice and + * early. + */ + qemu_savevm_send_postcopy_advise(s->file); + } + qemu_savevm_state_begin(s->file, &s->params); s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; + current_active_state = MIGRATION_STATUS_ACTIVE; migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); - while (s->state == MIGRATION_STATUS_ACTIVE) { + trace_migration_thread_setup_complete(); + + while (s->state == MIGRATION_STATUS_ACTIVE || + s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { int64_t current_time; uint64_t pending_size; if (!qemu_file_rate_limit(s->file)) { - pending_size = qemu_savevm_state_pending(s->file, max_size); - trace_migrate_pending(pending_size, max_size); + uint64_t pend_post, pend_nonpost; + + qemu_savevm_state_pending(s->file, max_size, &pend_nonpost, + &pend_post); + pending_size = pend_nonpost + pend_post; + trace_migrate_pending(pending_size, max_size, + pend_post, pend_nonpost); if (pending_size && pending_size >= max_size) { - qemu_savevm_state_iterate(s->file); + /* Still a significant amount to transfer */ + + if (migrate_postcopy_ram() && + s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE && + pend_nonpost <= max_size && + atomic_read(&s->start_postcopy)) { + + if (!postcopy_start(s, &old_vm_running)) { + current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE; + entered_postcopy = true; + } + + continue; + } + /* Just another iteration step */ + qemu_savevm_state_iterate(s->file, entered_postcopy); } else { trace_migration_thread_low_pending(pending_size); - migration_completion(s, &old_vm_running, &start_time); + migration_completion(s, current_active_state, + &old_vm_running, &start_time); break; } } if (qemu_file_get_error(s->file)) { - migrate_set_state(s, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_FAILED); + migrate_set_state(s, current_active_state, MIGRATION_STATUS_FAILED); + trace_migration_thread_file_err(); break; } current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); if (current_time >= initial_time + BUFFER_DELAY) { uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes; uint64_t time_spent = current_time - initial_time; - double bandwidth = transferred_bytes / time_spent; + double bandwidth = (double)transferred_bytes / time_spent; max_size = bandwidth * migrate_max_downtime() / 1000000; s->mbps = time_spent ? (((double) transferred_bytes * 8.0) / @@ -1087,22 +1698,26 @@ static void *migration_thread(void *opaque) } } + trace_migration_thread_after_loop(); /* If we enabled cpu throttling for auto-converge, turn it off. */ cpu_throttle_stop(); + end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); qemu_mutex_lock_iothread(); + qemu_savevm_state_cleanup(); if (s->state == MIGRATION_STATUS_COMPLETED) { - int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); uint64_t transferred_bytes = qemu_ftell(s->file); s->total_time = end_time - s->total_time; - s->downtime = end_time - start_time; + if (!entered_postcopy) { + s->downtime = end_time - start_time; + } if (s->total_time) { s->mbps = (((double) transferred_bytes * 8.0) / ((double) s->total_time)) / 1000; } runstate_set(RUN_STATE_POSTMIGRATE); } else { - if (old_vm_running) { + if (old_vm_running && !entered_postcopy) { vm_start(); } } @@ -1125,7 +1740,34 @@ void migrate_fd_connect(MigrationState *s) /* Notify before starting migration thread */ notifier_list_notify(&migration_state_notifiers, s); + /* + * Open the return path; currently for postcopy but other things might + * also want it. + */ + if (migrate_postcopy_ram()) { + if (open_return_path_on_source(s)) { + error_report("Unable to open return-path for postcopy"); + migrate_set_state(s, MIGRATION_STATUS_SETUP, + MIGRATION_STATUS_FAILED); + migrate_fd_cleanup(s); + return; + } + } + migrate_compress_threads_create(); qemu_thread_create(&s->thread, "migration", migration_thread, s, QEMU_THREAD_JOINABLE); + s->migration_thread_running = true; +} + +PostcopyState postcopy_state_get(void) +{ + return atomic_mb_read(&incoming_postcopy_state); } + +/* Set the state and return the old state */ +PostcopyState postcopy_state_set(PostcopyState new_state) +{ + return atomic_xchg(&incoming_postcopy_state, new_state); +} + diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c new file mode 100644 index 0000000000..3946aa98aa --- /dev/null +++ b/migration/postcopy-ram.c @@ -0,0 +1,761 @@ +/* + * Postcopy migration for RAM + * + * Copyright 2013-2015 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Dave Gilbert <dgilbert@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* + * Postcopy is a migration technique where the execution flips from the + * source to the destination before all the data has been copied. + */ + +#include <glib.h> +#include <stdio.h> +#include <unistd.h> + +#include "qemu-common.h" +#include "migration/migration.h" +#include "migration/postcopy-ram.h" +#include "sysemu/sysemu.h" +#include "sysemu/balloon.h" +#include "qemu/error-report.h" +#include "trace.h" + +/* Arbitrary limit on size of each discard command, + * keeps them around ~200 bytes + */ +#define MAX_DISCARDS_PER_COMMAND 12 + +struct PostcopyDiscardState { + const char *ramblock_name; + uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */ + uint16_t cur_entry; + /* + * Start and length of a discard range (bytes) + */ + uint64_t start_list[MAX_DISCARDS_PER_COMMAND]; + uint64_t length_list[MAX_DISCARDS_PER_COMMAND]; + unsigned int nsentwords; + unsigned int nsentcmds; +}; + +/* Postcopy needs to detect accesses to pages that haven't yet been copied + * across, and efficiently map new pages in, the techniques for doing this + * are target OS specific. + */ +#if defined(__linux__) + +#include <poll.h> +#include <sys/eventfd.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <asm/types.h> /* for __u64 */ +#endif + +#if defined(__linux__) && defined(__NR_userfaultfd) +#include <linux/userfaultfd.h> + +static bool ufd_version_check(int ufd) +{ + struct uffdio_api api_struct; + uint64_t ioctl_mask; + + api_struct.api = UFFD_API; + api_struct.features = 0; + if (ioctl(ufd, UFFDIO_API, &api_struct)) { + error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s", + strerror(errno)); + return false; + } + + ioctl_mask = (__u64)1 << _UFFDIO_REGISTER | + (__u64)1 << _UFFDIO_UNREGISTER; + if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) { + error_report("Missing userfault features: %" PRIx64, + (uint64_t)(~api_struct.ioctls & ioctl_mask)); + return false; + } + + return true; +} + +/* + * Note: This has the side effect of munlock'ing all of RAM, that's + * normally fine since if the postcopy succeeds it gets turned back on at the + * end. + */ +bool postcopy_ram_supported_by_host(void) +{ + long pagesize = getpagesize(); + int ufd = -1; + bool ret = false; /* Error unless we change it */ + void *testarea = NULL; + struct uffdio_register reg_struct; + struct uffdio_range range_struct; + uint64_t feature_mask; + + if ((1ul << qemu_target_page_bits()) > pagesize) { + error_report("Target page size bigger than host page size"); + goto out; + } + + ufd = syscall(__NR_userfaultfd, O_CLOEXEC); + if (ufd == -1) { + error_report("%s: userfaultfd not available: %s", __func__, + strerror(errno)); + goto out; + } + + /* Version and features check */ + if (!ufd_version_check(ufd)) { + goto out; + } + + /* + * userfault and mlock don't go together; we'll put it back later if + * it was enabled. + */ + if (munlockall()) { + error_report("%s: munlockall: %s", __func__, strerror(errno)); + return -1; + } + + /* + * We need to check that the ops we need are supported on anon memory + * To do that we need to register a chunk and see the flags that + * are returned. + */ + testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (testarea == MAP_FAILED) { + error_report("%s: Failed to map test area: %s", __func__, + strerror(errno)); + goto out; + } + g_assert(((size_t)testarea & (pagesize-1)) == 0); + + reg_struct.range.start = (uintptr_t)testarea; + reg_struct.range.len = pagesize; + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; + + if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) { + error_report("%s userfault register: %s", __func__, strerror(errno)); + goto out; + } + + range_struct.start = (uintptr_t)testarea; + range_struct.len = pagesize; + if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) { + error_report("%s userfault unregister: %s", __func__, strerror(errno)); + goto out; + } + + feature_mask = (__u64)1 << _UFFDIO_WAKE | + (__u64)1 << _UFFDIO_COPY | + (__u64)1 << _UFFDIO_ZEROPAGE; + if ((reg_struct.ioctls & feature_mask) != feature_mask) { + error_report("Missing userfault map features: %" PRIx64, + (uint64_t)(~reg_struct.ioctls & feature_mask)); + goto out; + } + + /* Success! */ + ret = true; +out: + if (testarea) { + munmap(testarea, pagesize); + } + if (ufd != -1) { + close(ufd); + } + return ret; +} + +/** + * postcopy_ram_discard_range: Discard a range of memory. + * We can assume that if we've been called postcopy_ram_hosttest returned true. + * + * @mis: Current incoming migration state. + * @start, @length: range of memory to discard. + * + * returns: 0 on success. + */ +int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, + size_t length) +{ + trace_postcopy_ram_discard_range(start, length); + if (madvise(start, length, MADV_DONTNEED)) { + error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno)); + return -1; + } + + return 0; +} + +/* + * Setup an area of RAM so that it *can* be used for postcopy later; this + * must be done right at the start prior to pre-copy. + * opaque should be the MIS. + */ +static int init_range(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + MigrationIncomingState *mis = opaque; + + trace_postcopy_init_range(block_name, host_addr, offset, length); + + /* + * We need the whole of RAM to be truly empty for postcopy, so things + * like ROMs and any data tables built during init must be zero'd + * - we're going to get the copy from the source anyway. + * (Precopy will just overwrite this data, so doesn't need the discard) + */ + if (postcopy_ram_discard_range(mis, host_addr, length)) { + return -1; + } + + return 0; +} + +/* + * At the end of migration, undo the effects of init_range + * opaque should be the MIS. + */ +static int cleanup_range(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + MigrationIncomingState *mis = opaque; + struct uffdio_range range_struct; + trace_postcopy_cleanup_range(block_name, host_addr, offset, length); + + /* + * We turned off hugepage for the precopy stage with postcopy enabled + * we can turn it back on now. + */ + qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE); + + /* + * We can also turn off userfault now since we should have all the + * pages. It can be useful to leave it on to debug postcopy + * if you're not sure it's always getting every page. + */ + range_struct.start = (uintptr_t)host_addr; + range_struct.len = length; + + if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) { + error_report("%s: userfault unregister %s", __func__, strerror(errno)); + + return -1; + } + + return 0; +} + +/* + * Initialise postcopy-ram, setting the RAM to a state where we can go into + * postcopy later; must be called prior to any precopy. + * called from arch_init's similarly named ram_postcopy_incoming_init + */ +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +{ + if (qemu_ram_foreach_block(init_range, mis)) { + return -1; + } + + return 0; +} + +/* + * At the end of a migration where postcopy_ram_incoming_init was called. + */ +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) +{ + trace_postcopy_ram_incoming_cleanup_entry(); + + if (mis->have_fault_thread) { + uint64_t tmp64; + + if (qemu_ram_foreach_block(cleanup_range, mis)) { + return -1; + } + /* + * Tell the fault_thread to exit, it's an eventfd that should + * currently be at 0, we're going to increment it to 1 + */ + tmp64 = 1; + if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) { + trace_postcopy_ram_incoming_cleanup_join(); + qemu_thread_join(&mis->fault_thread); + } else { + /* Not much we can do here, but may as well report it */ + error_report("%s: incrementing userfault_quit_fd: %s", __func__, + strerror(errno)); + } + trace_postcopy_ram_incoming_cleanup_closeuf(); + close(mis->userfault_fd); + close(mis->userfault_quit_fd); + mis->have_fault_thread = false; + } + + qemu_balloon_inhibit(false); + + if (enable_mlock) { + if (os_mlock() < 0) { + error_report("mlock: %s", strerror(errno)); + /* + * It doesn't feel right to fail at this point, we have a valid + * VM state. + */ + } + } + + postcopy_state_set(POSTCOPY_INCOMING_END); + migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); + + if (mis->postcopy_tmp_page) { + munmap(mis->postcopy_tmp_page, getpagesize()); + mis->postcopy_tmp_page = NULL; + } + trace_postcopy_ram_incoming_cleanup_exit(); + return 0; +} + +/* + * Disable huge pages on an area + */ +static int nhp_range(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + trace_postcopy_nhp_range(block_name, host_addr, offset, length); + + /* + * Before we do discards we need to ensure those discards really + * do delete areas of the page, even if THP thinks a hugepage would + * be a good idea, so force hugepages off. + */ + qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE); + + return 0; +} + +/* + * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard + * however leaving it until after precopy means that most of the precopy + * data is still THPd + */ +int postcopy_ram_prepare_discard(MigrationIncomingState *mis) +{ + if (qemu_ram_foreach_block(nhp_range, mis)) { + return -1; + } + + postcopy_state_set(POSTCOPY_INCOMING_DISCARD); + + return 0; +} + +/* + * Mark the given area of RAM as requiring notification to unwritten areas + * Used as a callback on qemu_ram_foreach_block. + * host_addr: Base of area to mark + * offset: Offset in the whole ram arena + * length: Length of the section + * opaque: MigrationIncomingState pointer + * Returns 0 on success + */ +static int ram_block_enable_notify(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, + void *opaque) +{ + MigrationIncomingState *mis = opaque; + struct uffdio_register reg_struct; + + reg_struct.range.start = (uintptr_t)host_addr; + reg_struct.range.len = length; + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; + + /* Now tell our userfault_fd that it's responsible for this area */ + if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, ®_struct)) { + error_report("%s userfault register: %s", __func__, strerror(errno)); + return -1; + } + + return 0; +} + +/* + * Handle faults detected by the USERFAULT markings + */ +static void *postcopy_ram_fault_thread(void *opaque) +{ + MigrationIncomingState *mis = opaque; + struct uffd_msg msg; + int ret; + size_t hostpagesize = getpagesize(); + RAMBlock *rb = NULL; + RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ + + trace_postcopy_ram_fault_thread_entry(); + qemu_sem_post(&mis->fault_thread_sem); + + while (true) { + ram_addr_t rb_offset; + ram_addr_t in_raspace; + struct pollfd pfd[2]; + + /* + * We're mainly waiting for the kernel to give us a faulting HVA, + * however we can be told to quit via userfault_quit_fd which is + * an eventfd + */ + pfd[0].fd = mis->userfault_fd; + pfd[0].events = POLLIN; + pfd[0].revents = 0; + pfd[1].fd = mis->userfault_quit_fd; + pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ + pfd[1].revents = 0; + + if (poll(pfd, 2, -1 /* Wait forever */) == -1) { + error_report("%s: userfault poll: %s", __func__, strerror(errno)); + break; + } + + if (pfd[1].revents) { + trace_postcopy_ram_fault_thread_quit(); + break; + } + + ret = read(mis->userfault_fd, &msg, sizeof(msg)); + if (ret != sizeof(msg)) { + if (errno == EAGAIN) { + /* + * if a wake up happens on the other thread just after + * the poll, there is nothing to read. + */ + continue; + } + if (ret < 0) { + error_report("%s: Failed to read full userfault message: %s", + __func__, strerror(errno)); + break; + } else { + error_report("%s: Read %d bytes from userfaultfd expected %zd", + __func__, ret, sizeof(msg)); + break; /* Lost alignment, don't know what we'd read next */ + } + } + if (msg.event != UFFD_EVENT_PAGEFAULT) { + error_report("%s: Read unexpected event %ud from userfaultfd", + __func__, msg.event); + continue; /* It's not a page fault, shouldn't happen */ + } + + rb = qemu_ram_block_from_host( + (void *)(uintptr_t)msg.arg.pagefault.address, + true, &in_raspace, &rb_offset); + if (!rb) { + error_report("postcopy_ram_fault_thread: Fault outside guest: %" + PRIx64, (uint64_t)msg.arg.pagefault.address); + break; + } + + rb_offset &= ~(hostpagesize - 1); + trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, + qemu_ram_get_idstr(rb), + rb_offset); + + /* + * Send the request to the source - we want to request one + * of our host page sizes (which is >= TPS) + */ + if (rb != last_rb) { + last_rb = rb; + migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), + rb_offset, hostpagesize); + } else { + /* Save some space */ + migrate_send_rp_req_pages(mis, NULL, + rb_offset, hostpagesize); + } + } + trace_postcopy_ram_fault_thread_exit(); + return NULL; +} + +int postcopy_ram_enable_notify(MigrationIncomingState *mis) +{ + /* Open the fd for the kernel to give us userfaults */ + mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (mis->userfault_fd == -1) { + error_report("%s: Failed to open userfault fd: %s", __func__, + strerror(errno)); + return -1; + } + + /* + * Although the host check already tested the API, we need to + * do the check again as an ABI handshake on the new fd. + */ + if (!ufd_version_check(mis->userfault_fd)) { + return -1; + } + + /* Now an eventfd we use to tell the fault-thread to quit */ + mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC); + if (mis->userfault_quit_fd == -1) { + error_report("%s: Opening userfault_quit_fd: %s", __func__, + strerror(errno)); + close(mis->userfault_fd); + return -1; + } + + qemu_sem_init(&mis->fault_thread_sem, 0); + qemu_thread_create(&mis->fault_thread, "postcopy/fault", + postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE); + qemu_sem_wait(&mis->fault_thread_sem); + qemu_sem_destroy(&mis->fault_thread_sem); + mis->have_fault_thread = true; + + /* Mark so that we get notified of accesses to unwritten areas */ + if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) { + return -1; + } + + /* + * Ballooning can mark pages as absent while we're postcopying + * that would cause false userfaults. + */ + qemu_balloon_inhibit(true); + + trace_postcopy_ram_enable_notify(); + + return 0; +} + +/* + * Place a host page (from) at (host) atomically + * returns 0 on success + */ +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) +{ + struct uffdio_copy copy_struct; + + copy_struct.dst = (uint64_t)(uintptr_t)host; + copy_struct.src = (uint64_t)(uintptr_t)from; + copy_struct.len = getpagesize(); + copy_struct.mode = 0; + + /* copy also acks to the kernel waking the stalled thread up + * TODO: We can inhibit that ack and only do it if it was requested + * which would be slightly cheaper, but we'd have to be careful + * of the order of updating our page state. + */ + if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) { + int e = errno; + error_report("%s: %s copy host: %p from: %p", + __func__, strerror(e), host, from); + + return -e; + } + + trace_postcopy_place_page(host); + return 0; +} + +/* + * Place a zero page at (host) atomically + * returns 0 on success + */ +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) +{ + struct uffdio_zeropage zero_struct; + + zero_struct.range.start = (uint64_t)(uintptr_t)host; + zero_struct.range.len = getpagesize(); + zero_struct.mode = 0; + + if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { + int e = errno; + error_report("%s: %s zero host: %p", + __func__, strerror(e), host); + + return -e; + } + + trace_postcopy_place_page_zero(host); + return 0; +} + +/* + * Returns a target page of memory that can be mapped at a later point in time + * using postcopy_place_page + * The same address is used repeatedly, postcopy_place_page just takes the + * backing page away. + * Returns: Pointer to allocated page + * + */ +void *postcopy_get_tmp_page(MigrationIncomingState *mis) +{ + if (!mis->postcopy_tmp_page) { + mis->postcopy_tmp_page = mmap(NULL, getpagesize(), + PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (!mis->postcopy_tmp_page) { + error_report("%s: %s", __func__, strerror(errno)); + return NULL; + } + } + + return mis->postcopy_tmp_page; +} + +#else +/* No target OS support, stubs just fail */ +bool postcopy_ram_supported_by_host(void) +{ + error_report("%s: No OS support", __func__); + return false; +} + +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +{ + error_report("postcopy_ram_incoming_init: No OS support"); + return -1; +} + +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) +{ + assert(0); + return -1; +} + +int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, + size_t length) +{ + assert(0); + return -1; +} + +int postcopy_ram_prepare_discard(MigrationIncomingState *mis) +{ + assert(0); + return -1; +} + +int postcopy_ram_enable_notify(MigrationIncomingState *mis) +{ + assert(0); + return -1; +} + +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) +{ + assert(0); + return -1; +} + +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) +{ + assert(0); + return -1; +} + +void *postcopy_get_tmp_page(MigrationIncomingState *mis) +{ + assert(0); + return NULL; +} + +#endif + +/* ------------------------------------------------------------------------- */ + +/** + * postcopy_discard_send_init: Called at the start of each RAMBlock before + * asking to discard individual ranges. + * + * @ms: The current migration state. + * @offset: the bitmap offset of the named RAMBlock in the migration + * bitmap. + * @name: RAMBlock that discards will operate on. + * + * returns: a new PDS. + */ +PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, + unsigned long offset, + const char *name) +{ + PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState)); + + if (res) { + res->ramblock_name = name; + res->offset = offset; + } + + return res; +} + +/** + * postcopy_discard_send_range: Called by the bitmap code for each chunk to + * discard. May send a discard message, may just leave it queued to + * be sent later. + * + * @ms: Current migration state. + * @pds: Structure initialised by postcopy_discard_send_init(). + * @start,@length: a range of pages in the migration bitmap in the + * RAM block passed to postcopy_discard_send_init() (length=1 is one page) + */ +void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, + unsigned long start, unsigned long length) +{ + size_t tp_bits = qemu_target_page_bits(); + /* Convert to byte offsets within the RAM block */ + pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits; + pds->length_list[pds->cur_entry] = length << tp_bits; + trace_postcopy_discard_send_range(pds->ramblock_name, start, length); + pds->cur_entry++; + pds->nsentwords++; + + if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) { + /* Full set, ship it! */ + qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name, + pds->cur_entry, + pds->start_list, + pds->length_list); + pds->nsentcmds++; + pds->cur_entry = 0; + } +} + +/** + * postcopy_discard_send_finish: Called at the end of each RAMBlock by the + * bitmap code. Sends any outstanding discard messages, frees the PDS + * + * @ms: Current migration state. + * @pds: Structure initialised by postcopy_discard_send_init(). + */ +void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds) +{ + /* Anything unsent? */ + if (pds->cur_entry) { + qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name, + pds->cur_entry, + pds->start_list, + pds->length_list); + pds->nsentcmds++; + } + + trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords, + pds->nsentcmds); + + g_free(pds); +} diff --git a/migration/qemu-file-unix.c b/migration/qemu-file-unix.c index 809bf070d7..6ca53e7d67 100644 --- a/migration/qemu-file-unix.c +++ b/migration/qemu-file-unix.c @@ -22,6 +22,7 @@ * THE SOFTWARE. */ #include "qemu-common.h" +#include "qemu/error-report.h" #include "qemu/iov.h" #include "qemu/sockets.h" #include "qemu/coroutine.h" @@ -39,12 +40,44 @@ static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, QEMUFileSocket *s = opaque; ssize_t len; ssize_t size = iov_size(iov, iovcnt); + ssize_t offset = 0; + int err; - len = iov_send(s->fd, iov, iovcnt, 0, size); - if (len < size) { - len = -socket_error(); - } - return len; + while (size > 0) { + len = iov_send(s->fd, iov, iovcnt, offset, size); + + if (len > 0) { + size -= len; + offset += len; + } + + if (size > 0) { + err = socket_error(); + + if (err != EAGAIN && err != EWOULDBLOCK) { + error_report("socket_writev_buffer: Got err=%d for (%zu/%zu)", + err, (size_t)size, (size_t)len); + /* + * If I've already sent some but only just got the error, I + * could return the amount validly sent so far and wait for the + * next call to report the error, but I'd rather flag the error + * immediately. + */ + return -err; + } + + /* Emulate blocking */ + GPollFD pfd; + + pfd.fd = s->fd; + pfd.events = G_IO_OUT | G_IO_ERR; + pfd.revents = 0; + TFR(err = g_poll(&pfd, 1, -1 /* no timeout */)); + /* Errors other than EINTR intentionally ignored */ + } + } + + return offset; } static int socket_get_fd(void *opaque) @@ -97,6 +130,56 @@ static int socket_shutdown(void *opaque, bool rd, bool wr) } } +static int socket_return_close(void *opaque) +{ + QEMUFileSocket *s = opaque; + /* + * Note: We don't close the socket, that should be done by the forward + * path. + */ + g_free(s); + return 0; +} + +static const QEMUFileOps socket_return_read_ops = { + .get_fd = socket_get_fd, + .get_buffer = socket_get_buffer, + .close = socket_return_close, + .shut_down = socket_shutdown, +}; + +static const QEMUFileOps socket_return_write_ops = { + .get_fd = socket_get_fd, + .writev_buffer = socket_writev_buffer, + .close = socket_return_close, + .shut_down = socket_shutdown, +}; + +/* + * Give a QEMUFile* off the same socket but data in the opposite + * direction. + */ +static QEMUFile *socket_get_return_path(void *opaque) +{ + QEMUFileSocket *forward = opaque; + QEMUFileSocket *reverse; + + if (qemu_file_get_error(forward->file)) { + /* If the forward file is in error, don't try and open a return */ + return NULL; + } + + reverse = g_malloc0(sizeof(QEMUFileSocket)); + reverse->fd = forward->fd; + /* I don't think there's a better way to tell which direction 'this' is */ + if (forward->file->ops->get_buffer != NULL) { + /* being called from the read side, so we need to be able to write */ + return qemu_fopen_ops(reverse, &socket_return_write_ops); + } else { + return qemu_fopen_ops(reverse, &socket_return_read_ops); + } +} + static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, int64_t pos) { @@ -206,18 +289,19 @@ QEMUFile *qemu_fdopen(int fd, const char *mode) } static const QEMUFileOps socket_read_ops = { - .get_fd = socket_get_fd, - .get_buffer = socket_get_buffer, - .close = socket_close, - .shut_down = socket_shutdown - + .get_fd = socket_get_fd, + .get_buffer = socket_get_buffer, + .close = socket_close, + .shut_down = socket_shutdown, + .get_return_path = socket_get_return_path }; static const QEMUFileOps socket_write_ops = { - .get_fd = socket_get_fd, - .writev_buffer = socket_writev_buffer, - .close = socket_close, - .shut_down = socket_shutdown + .get_fd = socket_get_fd, + .writev_buffer = socket_writev_buffer, + .close = socket_close, + .shut_down = socket_shutdown, + .get_return_path = socket_get_return_path }; QEMUFile *qemu_fopen_socket(int fd, const char *mode) diff --git a/migration/qemu-file.c b/migration/qemu-file.c index df49023ed8..0bbd2574a8 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -44,6 +44,18 @@ int qemu_file_shutdown(QEMUFile *f) return f->ops->shut_down(f->opaque, true, true); } +/* + * Result: QEMUFile* for a 'return path' for comms in the opposite direction + * NULL if not available + */ +QEMUFile *qemu_file_get_return_path(QEMUFile *f) +{ + if (!f->ops->get_return_path) { + return NULL; + } + return f->ops->get_return_path(f->opaque); +} + bool qemu_file_mode_is_not_valid(const char *mode) { if (mode == NULL || @@ -434,6 +446,43 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) } /* + * Read 'size' bytes of data from the file. + * 'size' can be larger than the internal buffer. + * + * The data: + * may be held on an internal buffer (in which case *buf is updated + * to point to it) that is valid until the next qemu_file operation. + * OR + * will be copied to the *buf that was passed in. + * + * The code tries to avoid the copy if possible. + * + * It will return size bytes unless there was an error, in which case it will + * return as many as it managed to read (assuming blocking fd's which + * all current QEMUFile are) + * + * Note: Since **buf may get changed, the caller should take care to + * keep a pointer to the original buffer if it needs to deallocate it. + */ +size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) +{ + if (size < IO_BUF_SIZE) { + size_t res; + uint8_t *src; + + res = qemu_peek_buffer(f, &src, size, 0); + + if (res == size) { + qemu_file_skip(f, res); + *buf = src; + return res; + } + } + + return qemu_get_buffer(f, *buf, size); +} + +/* * Peeks a single byte from the buffer; this isn't guaranteed to work if * offset leaves a gap after the previous read/peeked data. */ @@ -611,3 +660,18 @@ size_t qemu_get_counted_string(QEMUFile *f, char buf[256]) return res == len ? res : 0; } + +/* + * Set the blocking state of the QEMUFile. + * Note: On some transports the OS only keeps a single blocking state for + * both directions, and thus changing the blocking on the main + * QEMUFile can also affect the return path. + */ +void qemu_file_set_blocking(QEMUFile *f, bool block) +{ + if (block) { + qemu_set_block(qemu_get_fd(f)); + } else { + qemu_set_nonblock(qemu_get_fd(f)); + } +} diff --git a/migration/ram.c b/migration/ram.c index a25bcc7db1..1eb155aedd 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -32,6 +32,7 @@ #include "qemu/timer.h" #include "qemu/main-loop.h" #include "migration/migration.h" +#include "migration/postcopy-ram.h" #include "exec/address-spaces.h" #include "migration/page_cache.h" #include "qemu/error-report.h" @@ -237,7 +238,14 @@ typedef struct PageSearchStatus PageSearchStatus; static struct BitmapRcu { struct rcu_head rcu; + /* Main migration bitmap */ unsigned long *bmap; + /* bitmap of pages that haven't been sent even once + * only maintained and used in postcopy at the moment + * where it's used to send the dirtymap at the start + * of the postcopy phase + */ + unsigned long *unsentmap; } *migration_bitmap_rcu; struct CompressParam { @@ -531,10 +539,18 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, return 1; } -/* Called with rcu_read_lock() to protect migration_bitmap */ +/* Called with rcu_read_lock() to protect migration_bitmap + * rb: The RAMBlock to search for dirty pages in + * start: Start address (typically so we can continue from previous page) + * ram_addr_abs: Pointer into which to store the address of the dirty page + * within the global ram_addr space + * + * Returns: byte offset within memory region of the start of a dirty page + */ static inline -ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, - ram_addr_t start) +ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb, + ram_addr_t start, + ram_addr_t *ram_addr_abs) { unsigned long base = rb->offset >> TARGET_PAGE_BITS; unsigned long nr = base + (start >> TARGET_PAGE_BITS); @@ -551,14 +567,24 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, next = find_next_bit(bitmap, size, nr); } - if (next < size) { - clear_bit(next, bitmap); + *ram_addr_abs = next << TARGET_PAGE_BITS; + return (next - base) << TARGET_PAGE_BITS; +} + +static inline bool migration_bitmap_clear_dirty(ram_addr_t addr) +{ + bool ret; + int nr = addr >> TARGET_PAGE_BITS; + unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + + ret = test_and_clear_bit(nr, bitmap); + + if (ret) { migration_dirty_pages--; } - return (next - base) << TARGET_PAGE_BITS; + return ret; } -/* Called with rcu_read_lock() to protect migration_bitmap */ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) { unsigned long *bitmap; @@ -951,12 +977,14 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, * @f: Current migration stream. * @pss: Data about the state of the current dirty page scan. * @*again: Set to false if the search has scanned the whole of RAM + * *ram_addr_abs: Pointer into which to store the address of the dirty page + * within the global ram_addr space */ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, - bool *again) + bool *again, ram_addr_t *ram_addr_abs) { - pss->offset = migration_bitmap_find_and_reset_dirty(pss->block, - pss->offset); + pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset, + ram_addr_abs); if (pss->complete_round && pss->block == last_seen_block && pss->offset >= last_offset) { /* @@ -995,6 +1023,278 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, } } +/* + * Helper for 'get_queued_page' - gets a page off the queue + * ms: MigrationState in + * *offset: Used to return the offset within the RAMBlock + * ram_addr_abs: global offset in the dirty/sent bitmaps + * + * Returns: block (or NULL if none available) + */ +static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset, + ram_addr_t *ram_addr_abs) +{ + RAMBlock *block = NULL; + + qemu_mutex_lock(&ms->src_page_req_mutex); + if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) { + struct MigrationSrcPageRequest *entry = + QSIMPLEQ_FIRST(&ms->src_page_requests); + block = entry->rb; + *offset = entry->offset; + *ram_addr_abs = (entry->offset + entry->rb->offset) & + TARGET_PAGE_MASK; + + if (entry->len > TARGET_PAGE_SIZE) { + entry->len -= TARGET_PAGE_SIZE; + entry->offset += TARGET_PAGE_SIZE; + } else { + memory_region_unref(block->mr); + QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req); + g_free(entry); + } + } + qemu_mutex_unlock(&ms->src_page_req_mutex); + + return block; +} + +/* + * Unqueue a page from the queue fed by postcopy page requests; skips pages + * that are already sent (!dirty) + * + * ms: MigrationState in + * pss: PageSearchStatus structure updated with found block/offset + * ram_addr_abs: global offset in the dirty/sent bitmaps + * + * Returns: true if a queued page is found + */ +static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, + ram_addr_t *ram_addr_abs) +{ + RAMBlock *block; + ram_addr_t offset; + bool dirty; + + do { + block = unqueue_page(ms, &offset, ram_addr_abs); + /* + * We're sending this page, and since it's postcopy nothing else + * will dirty it, and we must make sure it doesn't get sent again + * even if this queue request was received after the background + * search already sent it. + */ + if (block) { + unsigned long *bitmap; + bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap); + if (!dirty) { + trace_get_queued_page_not_dirty( + block->idstr, (uint64_t)offset, + (uint64_t)*ram_addr_abs, + test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, + atomic_rcu_read(&migration_bitmap_rcu)->unsentmap)); + } else { + trace_get_queued_page(block->idstr, + (uint64_t)offset, + (uint64_t)*ram_addr_abs); + } + } + + } while (block && !dirty); + + if (block) { + /* + * As soon as we start servicing pages out of order, then we have + * to kill the bulk stage, since the bulk stage assumes + * in (migration_bitmap_find_and_reset_dirty) that every page is + * dirty, that's no longer true. + */ + ram_bulk_stage = false; + + /* + * We want the background search to continue from the queued page + * since the guest is likely to want other pages near to the page + * it just requested. + */ + pss->block = block; + pss->offset = offset; + } + + return !!block; +} + +/** + * flush_page_queue: Flush any remaining pages in the ram request queue + * it should be empty at the end anyway, but in error cases there may be + * some left. + * + * ms: MigrationState + */ +void flush_page_queue(MigrationState *ms) +{ + struct MigrationSrcPageRequest *mspr, *next_mspr; + /* This queue generally should be empty - but in the case of a failed + * migration might have some droppings in. + */ + rcu_read_lock(); + QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) { + memory_region_unref(mspr->rb->mr); + QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req); + g_free(mspr); + } + rcu_read_unlock(); +} + +/** + * Queue the pages for transmission, e.g. a request from postcopy destination + * ms: MigrationStatus in which the queue is held + * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last) + * start: Offset from the start of the RAMBlock + * len: Length (in bytes) to send + * Return: 0 on success + */ +int ram_save_queue_pages(MigrationState *ms, const char *rbname, + ram_addr_t start, ram_addr_t len) +{ + RAMBlock *ramblock; + + rcu_read_lock(); + if (!rbname) { + /* Reuse last RAMBlock */ + ramblock = ms->last_req_rb; + + if (!ramblock) { + /* + * Shouldn't happen, we can't reuse the last RAMBlock if + * it's the 1st request. + */ + error_report("ram_save_queue_pages no previous block"); + goto err; + } + } else { + ramblock = qemu_ram_block_by_name(rbname); + + if (!ramblock) { + /* We shouldn't be asked for a non-existent RAMBlock */ + error_report("ram_save_queue_pages no block '%s'", rbname); + goto err; + } + ms->last_req_rb = ramblock; + } + trace_ram_save_queue_pages(ramblock->idstr, start, len); + if (start+len > ramblock->used_length) { + error_report("%s request overrun start=" RAM_ADDR_FMT " len=" + RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, + __func__, start, len, ramblock->used_length); + goto err; + } + + struct MigrationSrcPageRequest *new_entry = + g_malloc0(sizeof(struct MigrationSrcPageRequest)); + new_entry->rb = ramblock; + new_entry->offset = start; + new_entry->len = len; + + memory_region_ref(ramblock->mr); + qemu_mutex_lock(&ms->src_page_req_mutex); + QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req); + qemu_mutex_unlock(&ms->src_page_req_mutex); + rcu_read_unlock(); + + return 0; + +err: + rcu_read_unlock(); + return -1; +} + +/** + * ram_save_target_page: Save one target page + * + * + * @f: QEMUFile where to send the data + * @block: pointer to block that contains the page we want to send + * @offset: offset inside the block for the page; + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space + * + * Returns: Number of pages written. + */ +static int ram_save_target_page(MigrationState *ms, QEMUFile *f, + RAMBlock *block, ram_addr_t offset, + bool last_stage, + uint64_t *bytes_transferred, + ram_addr_t dirty_ram_abs) +{ + int res = 0; + + /* Check the pages is dirty and if it is send it */ + if (migration_bitmap_clear_dirty(dirty_ram_abs)) { + unsigned long *unsentmap; + if (compression_switch && migrate_use_compression()) { + res = ram_save_compressed_page(f, block, offset, + last_stage, + bytes_transferred); + } else { + res = ram_save_page(f, block, offset, last_stage, + bytes_transferred); + } + + if (res < 0) { + return res; + } + unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + if (unsentmap) { + clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap); + } + last_sent_block = block; + } + + return res; +} + +/** + * ram_save_host_page: Starting at *offset send pages upto the end + * of the current host page. It's valid for the initial + * offset to point into the middle of a host page + * in which case the remainder of the hostpage is sent. + * Only dirty target pages are sent. + * + * Returns: Number of pages written. + * + * @f: QEMUFile where to send the data + * @block: pointer to block that contains the page we want to send + * @offset: offset inside the block for the page; updated to last target page + * sent + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space + */ +static int ram_save_host_page(MigrationState *ms, QEMUFile *f, RAMBlock *block, + ram_addr_t *offset, bool last_stage, + uint64_t *bytes_transferred, + ram_addr_t dirty_ram_abs) +{ + int tmppages, pages = 0; + do { + tmppages = ram_save_target_page(ms, f, block, *offset, last_stage, + bytes_transferred, dirty_ram_abs); + if (tmppages < 0) { + return tmppages; + } + + pages += tmppages; + *offset += TARGET_PAGE_SIZE; + dirty_ram_abs += TARGET_PAGE_SIZE; + } while (*offset & (qemu_host_page_size - 1)); + + /* The offset we leave with is the last one we looked at */ + *offset -= TARGET_PAGE_SIZE; + return pages; +} + /** * ram_find_and_save_block: Finds a dirty page and sends it to f * @@ -1006,14 +1306,20 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, * @f: QEMUFile where to send the data * @last_stage: if we are at the completion stage * @bytes_transferred: increase it with the number of transferred bytes + * + * On systems where host-page-size > target-page-size it will send all the + * pages in a host page that are dirty. */ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, uint64_t *bytes_transferred) { PageSearchStatus pss; + MigrationState *ms = migrate_get_current(); int pages = 0; bool again, found; + ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in + ram_addr_t space */ pss.block = last_seen_block; pss.offset = last_offset; @@ -1024,22 +1330,18 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, } do { - found = find_dirty_block(f, &pss, &again); + again = true; + found = get_queued_page(ms, &pss, &dirty_ram_abs); - if (found) { - if (compression_switch && migrate_use_compression()) { - pages = ram_save_compressed_page(f, pss.block, pss.offset, - last_stage, - bytes_transferred); - } else { - pages = ram_save_page(f, pss.block, pss.offset, last_stage, - bytes_transferred); - } + if (!found) { + /* priority queue empty, so just search for something dirty */ + found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); + } - /* if page is unmodified, continue to the next */ - if (pages > 0) { - last_sent_block = pss.block; - } + if (found) { + pages = ram_save_host_page(ms, f, pss.block, &pss.offset, + last_stage, bytes_transferred, + dirty_ram_abs); } } while (!pages && again); @@ -1097,10 +1399,11 @@ void free_xbzrle_decoded_buf(void) static void migration_bitmap_free(struct BitmapRcu *bmap) { g_free(bmap->bmap); + g_free(bmap->unsentmap); g_free(bmap); } -static void migration_end(void) +static void ram_migration_cleanup(void *opaque) { /* caller have hold iothread lock or is in a bh, so there is * no writing race against this migration_bitmap @@ -1124,11 +1427,6 @@ static void migration_end(void) XBZRLE_cache_unlock(); } -static void ram_migration_cancel(void *opaque) -{ - migration_end(); -} - static void reset_ram_globals(void) { last_seen_block = NULL; @@ -1158,6 +1456,13 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) qemu_mutex_lock(&migration_bitmap_mutex); bitmap_copy(bitmap->bmap, old_bitmap->bmap, old); bitmap_set(bitmap->bmap, old, new - old); + + /* We don't have a way to safely extend the sentmap + * with RCU; so mark it as missing, entry to postcopy + * will fail. + */ + bitmap->unsentmap = NULL; + atomic_rcu_set(&migration_bitmap_rcu, bitmap); qemu_mutex_unlock(&migration_bitmap_mutex); migration_dirty_pages += new - old; @@ -1165,6 +1470,394 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) } } +/* + * 'expected' is the value you expect the bitmap mostly to be full + * of; it won't bother printing lines that are all this value. + * If 'todump' is null the migration bitmap is dumped. + */ +void ram_debug_dump_bitmap(unsigned long *todump, bool expected) +{ + int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; + + int64_t cur; + int64_t linelen = 128; + char linebuf[129]; + + if (!todump) { + todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + } + + for (cur = 0; cur < ram_pages; cur += linelen) { + int64_t curb; + bool found = false; + /* + * Last line; catch the case where the line length + * is longer than remaining ram + */ + if (cur + linelen > ram_pages) { + linelen = ram_pages - cur; + } + for (curb = 0; curb < linelen; curb++) { + bool thisbit = test_bit(cur + curb, todump); + linebuf[curb] = thisbit ? '1' : '.'; + found = found || (thisbit != expected); + } + if (found) { + linebuf[curb] = '\0'; + fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf); + } + } +} + +/* **** functions for postcopy ***** */ + +/* + * Callback from postcopy_each_ram_send_discard for each RAMBlock + * Note: At this point the 'unsentmap' is the processed bitmap combined + * with the dirtymap; so a '1' means it's either dirty or unsent. + * start,length: Indexes into the bitmap for the first bit + * representing the named block and length in target-pages + */ +static int postcopy_send_discard_bm_ram(MigrationState *ms, + PostcopyDiscardState *pds, + unsigned long start, + unsigned long length) +{ + unsigned long end = start + length; /* one after the end */ + unsigned long current; + unsigned long *unsentmap; + + unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + for (current = start; current < end; ) { + unsigned long one = find_next_bit(unsentmap, end, current); + + if (one <= end) { + unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1); + unsigned long discard_length; + + if (zero >= end) { + discard_length = end - one; + } else { + discard_length = zero - one; + } + postcopy_discard_send_range(ms, pds, one, discard_length); + current = one + discard_length; + } else { + current = one; + } + } + + return 0; +} + +/* + * Utility for the outgoing postcopy code. + * Calls postcopy_send_discard_bm_ram for each RAMBlock + * passing it bitmap indexes and name. + * Returns: 0 on success + * (qemu_ram_foreach_block ends up passing unscaled lengths + * which would mean postcopy code would have to deal with target page) + */ +static int postcopy_each_ram_send_discard(MigrationState *ms) +{ + struct RAMBlock *block; + int ret; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + unsigned long first = block->offset >> TARGET_PAGE_BITS; + PostcopyDiscardState *pds = postcopy_discard_send_init(ms, + first, + block->idstr); + + /* + * Postcopy sends chunks of bitmap over the wire, but it + * just needs indexes at this point, avoids it having + * target page specific code. + */ + ret = postcopy_send_discard_bm_ram(ms, pds, first, + block->used_length >> TARGET_PAGE_BITS); + postcopy_discard_send_finish(ms, pds); + if (ret) { + return ret; + } + } + + return 0; +} + +/* + * Helper for postcopy_chunk_hostpages; it's called twice to cleanup + * the two bitmaps, that are similar, but one is inverted. + * + * We search for runs of target-pages that don't start or end on a + * host page boundary; + * unsent_pass=true: Cleans up partially unsent host pages by searching + * the unsentmap + * unsent_pass=false: Cleans up partially dirty host pages by searching + * the main migration bitmap + * + */ +static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, + RAMBlock *block, + PostcopyDiscardState *pds) +{ + unsigned long *bitmap; + unsigned long *unsentmap; + unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE; + unsigned long first = block->offset >> TARGET_PAGE_BITS; + unsigned long len = block->used_length >> TARGET_PAGE_BITS; + unsigned long last = first + (len - 1); + unsigned long run_start; + + bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + + if (unsent_pass) { + /* Find a sent page */ + run_start = find_next_zero_bit(unsentmap, last + 1, first); + } else { + /* Find a dirty page */ + run_start = find_next_bit(bitmap, last + 1, first); + } + + while (run_start <= last) { + bool do_fixup = false; + unsigned long fixup_start_addr; + unsigned long host_offset; + + /* + * If the start of this run of pages is in the middle of a host + * page, then we need to fixup this host page. + */ + host_offset = run_start % host_ratio; + if (host_offset) { + do_fixup = true; + run_start -= host_offset; + fixup_start_addr = run_start; + /* For the next pass */ + run_start = run_start + host_ratio; + } else { + /* Find the end of this run */ + unsigned long run_end; + if (unsent_pass) { + run_end = find_next_bit(unsentmap, last + 1, run_start + 1); + } else { + run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1); + } + /* + * If the end isn't at the start of a host page, then the + * run doesn't finish at the end of a host page + * and we need to discard. + */ + host_offset = run_end % host_ratio; + if (host_offset) { + do_fixup = true; + fixup_start_addr = run_end - host_offset; + /* + * This host page has gone, the next loop iteration starts + * from after the fixup + */ + run_start = fixup_start_addr + host_ratio; + } else { + /* + * No discards on this iteration, next loop starts from + * next sent/dirty page + */ + run_start = run_end + 1; + } + } + + if (do_fixup) { + unsigned long page; + + /* Tell the destination to discard this page */ + if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) { + /* For the unsent_pass we: + * discard partially sent pages + * For the !unsent_pass (dirty) we: + * discard partially dirty pages that were sent + * (any partially sent pages were already discarded + * by the previous unsent_pass) + */ + postcopy_discard_send_range(ms, pds, fixup_start_addr, + host_ratio); + } + + /* Clean up the bitmap */ + for (page = fixup_start_addr; + page < fixup_start_addr + host_ratio; page++) { + /* All pages in this host page are now not sent */ + set_bit(page, unsentmap); + + /* + * Remark them as dirty, updating the count for any pages + * that weren't previously dirty. + */ + migration_dirty_pages += !test_and_set_bit(page, bitmap); + } + } + + if (unsent_pass) { + /* Find the next sent page for the next iteration */ + run_start = find_next_zero_bit(unsentmap, last + 1, + run_start); + } else { + /* Find the next dirty page for the next iteration */ + run_start = find_next_bit(bitmap, last + 1, run_start); + } + } +} + +/* + * Utility for the outgoing postcopy code. + * + * Discard any partially sent host-page size chunks, mark any partially + * dirty host-page size chunks as all dirty. + * + * Returns: 0 on success + */ +static int postcopy_chunk_hostpages(MigrationState *ms) +{ + struct RAMBlock *block; + + if (qemu_host_page_size == TARGET_PAGE_SIZE) { + /* Easy case - TPS==HPS - nothing to be done */ + return 0; + } + + /* Easiest way to make sure we don't resume in the middle of a host-page */ + last_seen_block = NULL; + last_sent_block = NULL; + last_offset = 0; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + unsigned long first = block->offset >> TARGET_PAGE_BITS; + + PostcopyDiscardState *pds = + postcopy_discard_send_init(ms, first, block->idstr); + + /* First pass: Discard all partially sent host pages */ + postcopy_chunk_hostpages_pass(ms, true, block, pds); + /* + * Second pass: Ensure that all partially dirty host pages are made + * fully dirty. + */ + postcopy_chunk_hostpages_pass(ms, false, block, pds); + + postcopy_discard_send_finish(ms, pds); + } /* ram_list loop */ + + return 0; +} + +/* + * Transmit the set of pages to be discarded after precopy to the target + * these are pages that: + * a) Have been previously transmitted but are now dirty again + * b) Pages that have never been transmitted, this ensures that + * any pages on the destination that have been mapped by background + * tasks get discarded (transparent huge pages is the specific concern) + * Hopefully this is pretty sparse + */ +int ram_postcopy_send_discard_bitmap(MigrationState *ms) +{ + int ret; + unsigned long *bitmap, *unsentmap; + + rcu_read_lock(); + + /* This should be our last sync, the src is now paused */ + migration_bitmap_sync(); + + unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + if (!unsentmap) { + /* We don't have a safe way to resize the sentmap, so + * if the bitmap was resized it will be NULL at this + * point. + */ + error_report("migration ram resized during precopy phase"); + rcu_read_unlock(); + return -EINVAL; + } + + /* Deal with TPS != HPS */ + ret = postcopy_chunk_hostpages(ms); + if (ret) { + rcu_read_unlock(); + return ret; + } + + /* + * Update the unsentmap to be unsentmap = unsentmap | dirty + */ + bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + bitmap_or(unsentmap, unsentmap, bitmap, + last_ram_offset() >> TARGET_PAGE_BITS); + + + trace_ram_postcopy_send_discard_bitmap(); +#ifdef DEBUG_POSTCOPY + ram_debug_dump_bitmap(unsentmap, true); +#endif + + ret = postcopy_each_ram_send_discard(ms); + rcu_read_unlock(); + + return ret; +} + +/* + * At the start of the postcopy phase of migration, any now-dirty + * precopied pages are discarded. + * + * start, length describe a byte address range within the RAMBlock + * + * Returns 0 on success. + */ +int ram_discard_range(MigrationIncomingState *mis, + const char *block_name, + uint64_t start, size_t length) +{ + int ret = -1; + + rcu_read_lock(); + RAMBlock *rb = qemu_ram_block_by_name(block_name); + + if (!rb) { + error_report("ram_discard_range: Failed to find block '%s'", + block_name); + goto err; + } + + uint8_t *host_startaddr = rb->host + start; + + if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) { + error_report("ram_discard_range: Unaligned start address: %p", + host_startaddr); + goto err; + } + + if ((start + length) <= rb->used_length) { + uint8_t *host_endaddr = host_startaddr + length; + if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) { + error_report("ram_discard_range: Unaligned end address: %p", + host_endaddr); + goto err; + } + ret = postcopy_ram_discard_range(mis, host_startaddr, length); + } else { + error_report("ram_discard_range: Overrun block '%s' (%" PRIu64 + "/%zx/" RAM_ADDR_FMT")", + block_name, start, length, rb->used_length); + } + +err: + rcu_read_unlock(); + + return ret; +} + + /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has * long-running RCU critical section. When rcu-reclaims in the code * start to become numerous it will be necessary to reduce the @@ -1219,10 +1912,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque) reset_ram_globals(); ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; - migration_bitmap_rcu = g_new(struct BitmapRcu, 1); + migration_bitmap_rcu = g_new0(struct BitmapRcu, 1); migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages); bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages); + if (migrate_postcopy_ram()) { + migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages); + bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages); + } + /* * Count the total number of pages used by ram blocks not including any * gaps due to alignment or unplugs. @@ -1322,7 +2020,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque) { rcu_read_lock(); - migration_bitmap_sync(); + if (!migration_in_postcopy(migrate_get_current())) { + migration_bitmap_sync(); + } ram_control_before_iterate(f, RAM_CONTROL_FINISH); @@ -1344,19 +2044,21 @@ static int ram_save_complete(QEMUFile *f, void *opaque) rcu_read_unlock(); - migration_end(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); return 0; } -static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) +static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, + uint64_t *non_postcopiable_pending, + uint64_t *postcopiable_pending) { uint64_t remaining_size; remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; - if (remaining_size < max_size) { + if (!migration_in_postcopy(migrate_get_current()) && + remaining_size < max_size) { qemu_mutex_lock_iothread(); rcu_read_lock(); migration_bitmap_sync(); @@ -1364,7 +2066,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) qemu_mutex_unlock_iothread(); remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; } - return remaining_size; + + /* We can do postcopy, and all the data is postcopiable */ + *postcopiable_pending += remaining_size; } static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) @@ -1405,6 +2109,14 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) /* Must be called from within a rcu critical section. * Returns a pointer from within the RCU-protected ram_list. */ +/* + * Read a RAMBlock ID from the stream f, find the host address of the + * start of that block and add on 'offset' + * + * f: Stream to read from + * offset: Offset within the block + * flags: Page flags (mostly to see if it's a continuation of previous block) + */ static inline void *host_from_stream_offset(QEMUFile *f, ram_addr_t offset, int flags) @@ -1426,14 +2138,12 @@ static inline void *host_from_stream_offset(QEMUFile *f, qemu_get_buffer(f, (uint8_t *)id, len); id[len] = 0; - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id)) && - block->max_length > offset) { - return block->host + offset; - } + block = qemu_ram_block_by_name(id); + if (block && block->max_length > offset) { + return block->host + offset; } - error_report("Can't find block %s!", id); + error_report("Can't find block %s", id); return NULL; } @@ -1541,11 +2251,148 @@ static void decompress_data_with_multi_threads(uint8_t *compbuf, } } +/* + * Allocate data structures etc needed by incoming migration with postcopy-ram + * postcopy-ram's similarly names postcopy_ram_incoming_init does the work + */ +int ram_postcopy_incoming_init(MigrationIncomingState *mis) +{ + size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; + + return postcopy_ram_incoming_init(mis, ram_pages); +} + +/* + * Called in postcopy mode by ram_load(). + * rcu_read_lock is taken prior to this being called. + */ +static int ram_load_postcopy(QEMUFile *f) +{ + int flags = 0, ret = 0; + bool place_needed = false; + bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE; + MigrationIncomingState *mis = migration_incoming_get_current(); + /* Temporary page that is later 'placed' */ + void *postcopy_host_page = postcopy_get_tmp_page(mis); + void *last_host = NULL; + bool all_zero = false; + + while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { + ram_addr_t addr; + void *host = NULL; + void *page_buffer = NULL; + void *place_source = NULL; + uint8_t ch; + + addr = qemu_get_be64(f); + flags = addr & ~TARGET_PAGE_MASK; + addr &= TARGET_PAGE_MASK; + + trace_ram_load_postcopy_loop((uint64_t)addr, flags); + place_needed = false; + if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) { + host = host_from_stream_offset(f, addr, flags); + if (!host) { + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + page_buffer = host; + /* + * Postcopy requires that we place whole host pages atomically. + * To make it atomic, the data is read into a temporary page + * that's moved into place later. + * The migration protocol uses, possibly smaller, target-pages + * however the source ensures it always sends all the components + * of a host page in order. + */ + page_buffer = postcopy_host_page + + ((uintptr_t)host & ~qemu_host_page_mask); + /* If all TP are zero then we can optimise the place */ + if (!((uintptr_t)host & ~qemu_host_page_mask)) { + all_zero = true; + } else { + /* not the 1st TP within the HP */ + if (host != (last_host + TARGET_PAGE_SIZE)) { + error_report("Non-sequential target page %p/%p\n", + host, last_host); + ret = -EINVAL; + break; + } + } + + + /* + * If it's the last part of a host page then we place the host + * page + */ + place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & + ~qemu_host_page_mask) == 0; + place_source = postcopy_host_page; + } + last_host = host; + + switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { + case RAM_SAVE_FLAG_COMPRESS: + ch = qemu_get_byte(f); + memset(page_buffer, ch, TARGET_PAGE_SIZE); + if (ch) { + all_zero = false; + } + break; + + case RAM_SAVE_FLAG_PAGE: + all_zero = false; + if (!place_needed || !matching_page_sizes) { + qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE); + } else { + /* Avoids the qemu_file copy during postcopy, which is + * going to do a copy later; can only do it when we + * do this read in one go (matching page sizes) + */ + qemu_get_buffer_in_place(f, (uint8_t **)&place_source, + TARGET_PAGE_SIZE); + } + break; + case RAM_SAVE_FLAG_EOS: + /* normal exit */ + break; + default: + error_report("Unknown combination of migration flags: %#x" + " (postcopy mode)", flags); + ret = -EINVAL; + } + + if (place_needed) { + /* This gets called at the last target page in the host page */ + if (all_zero) { + ret = postcopy_place_page_zero(mis, + host + TARGET_PAGE_SIZE - + qemu_host_page_size); + } else { + ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE - + qemu_host_page_size, + place_source); + } + } + if (!ret) { + ret = qemu_file_get_error(f); + } + } + + return ret; +} + static int ram_load(QEMUFile *f, void *opaque, int version_id) { int flags = 0, ret = 0; static uint64_t seq_iter; int len = 0; + /* + * If system is running in postcopy mode, page inserts to host memory must + * be atomic + */ + bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING; seq_iter++; @@ -1559,15 +2406,30 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) * critical section. */ rcu_read_lock(); - while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { + + if (postcopy_running) { + ret = ram_load_postcopy(f); + } + + while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr, total_ram_bytes; - void *host; + void *host = NULL; uint8_t ch; addr = qemu_get_be64(f); flags = addr & ~TARGET_PAGE_MASK; addr &= TARGET_PAGE_MASK; + if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE | + RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { + host = host_from_stream_offset(f, addr, flags); + if (!host) { + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + } + switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { case RAM_SAVE_FLAG_MEM_SIZE: /* Synchronize RAM block list */ @@ -1582,23 +2444,20 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) id[len] = 0; length = qemu_get_be64(f); - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id))) { - if (length != block->used_length) { - Error *local_err = NULL; + block = qemu_ram_block_by_name(id); + if (block) { + if (length != block->used_length) { + Error *local_err = NULL; - ret = qemu_ram_resize(block->offset, length, &local_err); - if (local_err) { - error_report_err(local_err); - } + ret = qemu_ram_resize(block->offset, length, + &local_err); + if (local_err) { + error_report_err(local_err); } - ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, - block->idstr); - break; } - } - - if (!block) { + ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, + block->idstr); + } else { error_report("Unknown ramblock \"%s\", cannot " "accept migration", id); ret = -EINVAL; @@ -1607,33 +2466,17 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) total_ram_bytes -= length; } break; + case RAM_SAVE_FLAG_COMPRESS: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } ch = qemu_get_byte(f); ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); break; + case RAM_SAVE_FLAG_PAGE: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } qemu_get_buffer(f, host, TARGET_PAGE_SIZE); break; - case RAM_SAVE_FLAG_COMPRESS_PAGE: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Invalid RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } + case RAM_SAVE_FLAG_COMPRESS_PAGE: len = qemu_get_be32(f); if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { error_report("Invalid compressed data length: %d", len); @@ -1643,13 +2486,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) qemu_get_buffer(f, compressed_data_buf, len); decompress_data_with_multi_threads(compressed_data_buf, host, len); break; + case RAM_SAVE_FLAG_XBZRLE: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } if (load_xbzrle(f, addr, host) < 0) { error_report("Failed to decompress XBZRLE page at " RAM_ADDR_FMT, addr); @@ -1683,10 +2521,11 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) static SaveVMHandlers savevm_ram_handlers = { .save_live_setup = ram_save_setup, .save_live_iterate = ram_save_iterate, - .save_live_complete = ram_save_complete, + .save_live_complete_postcopy = ram_save_complete, + .save_live_complete_precopy = ram_save_complete, .save_live_pending = ram_save_pending, .load_state = ram_load, - .cancel = ram_migration_cancel, + .cleanup = ram_migration_cleanup, }; void ram_mig_init(void) diff --git a/migration/rdma.c b/migration/rdma.c index 553fbd7503..dcabb91005 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -577,7 +577,7 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name, block->is_ram_block = local->init ? false : true; if (rdma->blockmap) { - g_hash_table_insert(rdma->blockmap, (void *) block_offset, block); + g_hash_table_insert(rdma->blockmap, (void *)(uintptr_t)block_offset, block); } trace_rdma_add_block(block_name, local->nb_blocks, diff --git a/migration/savevm.c b/migration/savevm.c index dbcc39a617..0ad1b93a8b 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -37,6 +37,7 @@ #include "qemu/timer.h" #include "audio/audio.h" #include "migration/migration.h" +#include "migration/postcopy-ram.h" #include "qapi/qmp/qerror.h" #include "qemu/error-report.h" #include "qemu/sockets.h" @@ -45,6 +46,7 @@ #include "exec/memory.h" #include "qmp-commands.h" #include "trace.h" +#include "qemu/bitops.h" #include "qemu/iov.h" #include "block/snapshot.h" #include "block/qapi.h" @@ -57,8 +59,26 @@ #define ARP_PTYPE_IP 0x0800 #define ARP_OP_REQUEST_REV 0x3 +const unsigned int postcopy_ram_discard_version = 0; + static bool skip_section_footers; +static struct mig_cmd_args { + ssize_t len; /* -1 = variable */ + const char *name; +} mig_cmd_args[] = { + [MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" }, + [MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" }, + [MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" }, + [MIG_CMD_POSTCOPY_ADVISE] = { .len = 16, .name = "POSTCOPY_ADVISE" }, + [MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" }, + [MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" }, + [MIG_CMD_POSTCOPY_RAM_DISCARD] = { + .len = -1, .name = "POSTCOPY_RAM_DISCARD" }, + [MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" }, + [MIG_CMD_MAX] = { .len = -1, .name = "MAX" }, +}; + static int announce_self_create(uint8_t *buf, uint8_t *mac_addr) { @@ -694,6 +714,156 @@ static void save_section_footer(QEMUFile *f, SaveStateEntry *se) } } +/** + * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the + * command and associated data. + * + * @f: File to send command on + * @command: Command type to send + * @len: Length of associated data + * @data: Data associated with command. + */ +void qemu_savevm_command_send(QEMUFile *f, + enum qemu_vm_cmd command, + uint16_t len, + uint8_t *data) +{ + trace_savevm_command_send(command, len); + qemu_put_byte(f, QEMU_VM_COMMAND); + qemu_put_be16(f, (uint16_t)command); + qemu_put_be16(f, len); + qemu_put_buffer(f, data, len); + qemu_fflush(f); +} + +void qemu_savevm_send_ping(QEMUFile *f, uint32_t value) +{ + uint32_t buf; + + trace_savevm_send_ping(value); + buf = cpu_to_be32(value); + qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf); +} + +void qemu_savevm_send_open_return_path(QEMUFile *f) +{ + trace_savevm_send_open_return_path(); + qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL); +} + +/* We have a buffer of data to send; we don't want that all to be loaded + * by the command itself, so the command contains just the length of the + * extra buffer that we then send straight after it. + * TODO: Must be a better way to organise that + * + * Returns: + * 0 on success + * -ve on error + */ +int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb) +{ + size_t cur_iov; + size_t len = qsb_get_length(qsb); + uint32_t tmp; + + if (len > MAX_VM_CMD_PACKAGED_SIZE) { + error_report("%s: Unreasonably large packaged state: %zu", + __func__, len); + return -1; + } + + tmp = cpu_to_be32(len); + + trace_qemu_savevm_send_packaged(); + qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp); + + /* all the data follows (concatinating the iov's) */ + for (cur_iov = 0; cur_iov < qsb->n_iov; cur_iov++) { + /* The iov entries are partially filled */ + size_t towrite = MIN(qsb->iov[cur_iov].iov_len, len); + len -= towrite; + + if (!towrite) { + break; + } + + qemu_put_buffer(f, qsb->iov[cur_iov].iov_base, towrite); + } + + return 0; +} + +/* Send prior to any postcopy transfer */ +void qemu_savevm_send_postcopy_advise(QEMUFile *f) +{ + uint64_t tmp[2]; + tmp[0] = cpu_to_be64(getpagesize()); + tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits()); + + trace_qemu_savevm_send_postcopy_advise(); + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp); +} + +/* Sent prior to starting the destination running in postcopy, discard pages + * that have already been sent but redirtied on the source. + * CMD_POSTCOPY_RAM_DISCARD consist of: + * byte version (0) + * byte Length of name field (not including 0) + * n x byte RAM block name + * byte 0 terminator (just for safety) + * n x Byte ranges within the named RAMBlock + * be64 Start of the range + * be64 Length + * + * name: RAMBlock name that these entries are part of + * len: Number of page entries + * start_list: 'len' addresses + * length_list: 'len' addresses + * + */ +void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, + uint16_t len, + uint64_t *start_list, + uint64_t *length_list) +{ + uint8_t *buf; + uint16_t tmplen; + uint16_t t; + size_t name_len = strlen(name); + + trace_qemu_savevm_send_postcopy_ram_discard(name, len); + assert(name_len < 256); + buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len); + buf[0] = postcopy_ram_discard_version; + buf[1] = name_len; + memcpy(buf + 2, name, name_len); + tmplen = 2 + name_len; + buf[tmplen++] = '\0'; + + for (t = 0; t < len; t++) { + cpu_to_be64w((uint64_t *)(buf + tmplen), start_list[t]); + tmplen += 8; + cpu_to_be64w((uint64_t *)(buf + tmplen), length_list[t]); + tmplen += 8; + } + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf); + g_free(buf); +} + +/* Get the destination into a state where it can receive postcopy data. */ +void qemu_savevm_send_postcopy_listen(QEMUFile *f) +{ + trace_savevm_send_postcopy_listen(); + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL); +} + +/* Kick the destination into running */ +void qemu_savevm_send_postcopy_run(QEMUFile *f) +{ + trace_savevm_send_postcopy_run(); + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL); +} + bool qemu_savevm_state_blocked(Error **errp) { SaveStateEntry *se; @@ -713,6 +883,12 @@ void qemu_savevm_state_header(QEMUFile *f) trace_savevm_state_header(); qemu_put_be32(f, QEMU_VM_FILE_MAGIC); qemu_put_be32(f, QEMU_VM_FILE_VERSION); + + if (!savevm_state.skip_configuration) { + qemu_put_byte(f, QEMU_VM_CONFIGURATION); + vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0); + } + } void qemu_savevm_state_begin(QEMUFile *f, @@ -729,11 +905,6 @@ void qemu_savevm_state_begin(QEMUFile *f, se->ops->set_params(params, se->opaque); } - if (!savevm_state.skip_configuration) { - qemu_put_byte(f, QEMU_VM_CONFIGURATION); - vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0); - } - QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || !se->ops->save_live_setup) { continue; @@ -760,7 +931,7 @@ void qemu_savevm_state_begin(QEMUFile *f, * 0 : We haven't finished, caller have to go again * 1 : We have finished, we can go to complete phase */ -int qemu_savevm_state_iterate(QEMUFile *f) +int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) { SaveStateEntry *se; int ret = 1; @@ -775,6 +946,15 @@ int qemu_savevm_state_iterate(QEMUFile *f) continue; } } + /* + * In the postcopy phase, any device that doesn't know how to + * do postcopy should have saved it's state in the _complete + * call that's already run, it might get confused if we call + * iterate afterwards. + */ + if (postcopy && !se->ops->save_live_complete_postcopy) { + continue; + } if (qemu_file_rate_limit(f)) { return 0; } @@ -803,24 +983,69 @@ int qemu_savevm_state_iterate(QEMUFile *f) static bool should_send_vmdesc(void) { MachineState *machine = MACHINE(qdev_get_machine()); - return !machine->suppress_vmdesc; + bool in_postcopy = migration_in_postcopy(migrate_get_current()); + return !machine->suppress_vmdesc && !in_postcopy; +} + +/* + * Calls the save_live_complete_postcopy methods + * causing the last few pages to be sent immediately and doing any associated + * cleanup. + * Note postcopy also calls qemu_savevm_state_complete_precopy to complete + * all the other devices, but that happens at the point we switch to postcopy. + */ +void qemu_savevm_state_complete_postcopy(QEMUFile *f) +{ + SaveStateEntry *se; + int ret; + + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { + if (!se->ops || !se->ops->save_live_complete_postcopy) { + continue; + } + if (se->ops && se->ops->is_active) { + if (!se->ops->is_active(se->opaque)) { + continue; + } + } + trace_savevm_section_start(se->idstr, se->section_id); + /* Section type */ + qemu_put_byte(f, QEMU_VM_SECTION_END); + qemu_put_be32(f, se->section_id); + + ret = se->ops->save_live_complete_postcopy(f, se->opaque); + trace_savevm_section_end(se->idstr, se->section_id, ret); + save_section_footer(f, se); + if (ret < 0) { + qemu_file_set_error(f, ret); + return; + } + } + + qemu_put_byte(f, QEMU_VM_EOF); + qemu_fflush(f); } -void qemu_savevm_state_complete(QEMUFile *f) +void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only) { QJSON *vmdesc; int vmdesc_len; SaveStateEntry *se; int ret; + bool in_postcopy = migration_in_postcopy(migrate_get_current()); - trace_savevm_state_complete(); + trace_savevm_state_complete_precopy(); cpu_synchronize_all_states(); QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { - if (!se->ops || !se->ops->save_live_complete) { + if (!se->ops || + (in_postcopy && se->ops->save_live_complete_postcopy) || + (in_postcopy && !iterable_only) || + !se->ops->save_live_complete_precopy) { continue; } + if (se->ops && se->ops->is_active) { if (!se->ops->is_active(se->opaque)) { continue; @@ -830,7 +1055,7 @@ void qemu_savevm_state_complete(QEMUFile *f) save_section_header(f, se, QEMU_VM_SECTION_END); - ret = se->ops->save_live_complete(f, se->opaque); + ret = se->ops->save_live_complete_precopy(f, se->opaque); trace_savevm_section_end(se->idstr, se->section_id, ret); save_section_footer(f, se); if (ret < 0) { @@ -839,6 +1064,10 @@ void qemu_savevm_state_complete(QEMUFile *f) } } + if (iterable_only) { + return; + } + vmdesc = qjson_new(); json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE); json_start_array(vmdesc, "devices"); @@ -867,7 +1096,10 @@ void qemu_savevm_state_complete(QEMUFile *f) save_section_footer(f, se); } - qemu_put_byte(f, QEMU_VM_EOF); + if (!in_postcopy) { + /* Postcopy stream will still be going */ + qemu_put_byte(f, QEMU_VM_EOF); + } json_end_array(vmdesc); qjson_finish(vmdesc); @@ -883,10 +1115,19 @@ void qemu_savevm_state_complete(QEMUFile *f) qemu_fflush(f); } -uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size) +/* Give an estimate of the amount left to be transferred, + * the result is split into the amount for units that can and + * for units that can't do postcopy. + */ +void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size, + uint64_t *res_non_postcopiable, + uint64_t *res_postcopiable) { SaveStateEntry *se; - uint64_t ret = 0; + + *res_non_postcopiable = 0; + *res_postcopiable = 0; + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || !se->ops->save_live_pending) { @@ -897,19 +1138,19 @@ uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size) continue; } } - ret += se->ops->save_live_pending(f, se->opaque, max_size); + se->ops->save_live_pending(f, se->opaque, max_size, + res_non_postcopiable, res_postcopiable); } - return ret; } -void qemu_savevm_state_cancel(void) +void qemu_savevm_state_cleanup(void) { SaveStateEntry *se; - trace_savevm_state_cancel(); + trace_savevm_state_cleanup(); QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { - if (se->ops && se->ops->cancel) { - se->ops->cancel(se->opaque); + if (se->ops && se->ops->cleanup) { + se->ops->cleanup(se->opaque); } } } @@ -921,6 +1162,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) .blk = 0, .shared = 0 }; + MigrationState *ms = migrate_init(¶ms); + ms->file = f; if (qemu_savevm_state_blocked(errp)) { return -EINVAL; @@ -932,18 +1175,18 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) qemu_mutex_lock_iothread(); while (qemu_file_get_error(f) == 0) { - if (qemu_savevm_state_iterate(f) > 0) { + if (qemu_savevm_state_iterate(f, false) > 0) { break; } } ret = qemu_file_get_error(f); if (ret == 0) { - qemu_savevm_state_complete(f); + qemu_savevm_state_complete_precopy(f, false); ret = qemu_file_get_error(f); } + qemu_savevm_state_cleanup(); if (ret != 0) { - qemu_savevm_state_cancel(); error_setg_errno(errp, -ret, "Error while writing VM state"); } return ret; @@ -1001,6 +1244,420 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id) return NULL; } +enum LoadVMExitCodes { + /* Allow a command to quit all layers of nested loadvm loops */ + LOADVM_QUIT = 1, +}; + +static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); + +/* ------ incoming postcopy messages ------ */ +/* 'advise' arrives before any transfers just to tell us that a postcopy + * *might* happen - it might be skipped if precopy transferred everything + * quickly. + */ +static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) +{ + PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); + uint64_t remote_hps, remote_tps; + + trace_loadvm_postcopy_handle_advise(); + if (ps != POSTCOPY_INCOMING_NONE) { + error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps); + return -1; + } + + if (!postcopy_ram_supported_by_host()) { + return -1; + } + + remote_hps = qemu_get_be64(mis->from_src_file); + if (remote_hps != getpagesize()) { + /* + * Some combinations of mismatch are probably possible but it gets + * a bit more complicated. In particular we need to place whole + * host pages on the dest at once, and we need to ensure that we + * handle dirtying to make sure we never end up sending part of + * a hostpage on it's own. + */ + error_report("Postcopy needs matching host page sizes (s=%d d=%d)", + (int)remote_hps, getpagesize()); + return -1; + } + + remote_tps = qemu_get_be64(mis->from_src_file); + if (remote_tps != (1ul << qemu_target_page_bits())) { + /* + * Again, some differences could be dealt with, but for now keep it + * simple. + */ + error_report("Postcopy needs matching target page sizes (s=%d d=%d)", + (int)remote_tps, 1 << qemu_target_page_bits()); + return -1; + } + + if (ram_postcopy_incoming_init(mis)) { + return -1; + } + + postcopy_state_set(POSTCOPY_INCOMING_ADVISE); + + return 0; +} + +/* After postcopy we will be told to throw some pages away since they're + * dirty and will have to be demand fetched. Must happen before CPU is + * started. + * There can be 0..many of these messages, each encoding multiple pages. + */ +static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis, + uint16_t len) +{ + int tmp; + char ramid[256]; + PostcopyState ps = postcopy_state_get(); + + trace_loadvm_postcopy_ram_handle_discard(); + + switch (ps) { + case POSTCOPY_INCOMING_ADVISE: + /* 1st discard */ + tmp = postcopy_ram_prepare_discard(mis); + if (tmp) { + return tmp; + } + break; + + case POSTCOPY_INCOMING_DISCARD: + /* Expected state */ + break; + + default: + error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)", + ps); + return -1; + } + /* We're expecting a + * Version (0) + * a RAM ID string (length byte, name, 0 term) + * then at least 1 16 byte chunk + */ + if (len < (1 + 1 + 1 + 1 + 2 * 8)) { + error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len); + return -1; + } + + tmp = qemu_get_byte(mis->from_src_file); + if (tmp != postcopy_ram_discard_version) { + error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp); + return -1; + } + + if (!qemu_get_counted_string(mis->from_src_file, ramid)) { + error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID"); + return -1; + } + tmp = qemu_get_byte(mis->from_src_file); + if (tmp != 0) { + error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp); + return -1; + } + + len -= 3 + strlen(ramid); + if (len % 16) { + error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len); + return -1; + } + trace_loadvm_postcopy_ram_handle_discard_header(ramid, len); + while (len) { + uint64_t start_addr, block_length; + start_addr = qemu_get_be64(mis->from_src_file); + block_length = qemu_get_be64(mis->from_src_file); + + len -= 16; + int ret = ram_discard_range(mis, ramid, start_addr, + block_length); + if (ret) { + return ret; + } + } + trace_loadvm_postcopy_ram_handle_discard_end(); + + return 0; +} + +/* + * Triggered by a postcopy_listen command; this thread takes over reading + * the input stream, leaving the main thread free to carry on loading the rest + * of the device state (from RAM). + * (TODO:This could do with being in a postcopy file - but there again it's + * just another input loop, not that postcopy specific) + */ +static void *postcopy_ram_listen_thread(void *opaque) +{ + QEMUFile *f = opaque; + MigrationIncomingState *mis = migration_incoming_get_current(); + int load_res; + + qemu_sem_post(&mis->listen_thread_sem); + trace_postcopy_ram_listen_thread_start(); + + /* + * Because we're a thread and not a coroutine we can't yield + * in qemu_file, and thus we must be blocking now. + */ + qemu_file_set_blocking(f, true); + load_res = qemu_loadvm_state_main(f, mis); + /* And non-blocking again so we don't block in any cleanup */ + qemu_file_set_blocking(f, false); + + trace_postcopy_ram_listen_thread_exit(); + if (load_res < 0) { + error_report("%s: loadvm failed: %d", __func__, load_res); + qemu_file_set_error(f, load_res); + } else { + /* + * This looks good, but it's possible that the device loading in the + * main thread hasn't finished yet, and so we might not be in 'RUN' + * state yet; wait for the end of the main thread. + */ + qemu_event_wait(&mis->main_thread_load_event); + } + postcopy_ram_incoming_cleanup(mis); + /* + * If everything has worked fine, then the main thread has waited + * for us to start, and we're the last use of the mis. + * (If something broke then qemu will have to exit anyway since it's + * got a bad migration state). + */ + migration_incoming_state_destroy(); + + if (load_res < 0) { + /* + * If something went wrong then we have a bad state so exit; + * depending how far we got it might be possible at this point + * to leave the guest running and fire MCEs for pages that never + * arrived as a desperate recovery step. + */ + exit(EXIT_FAILURE); + } + + return NULL; +} + +/* After this message we must be able to immediately receive postcopy data */ +static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) +{ + PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING); + trace_loadvm_postcopy_handle_listen(); + if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) { + error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps); + return -1; + } + if (ps == POSTCOPY_INCOMING_ADVISE) { + /* + * A rare case, we entered listen without having to do any discards, + * so do the setup that's normally done at the time of the 1st discard. + */ + postcopy_ram_prepare_discard(mis); + } + + /* + * Sensitise RAM - can now generate requests for blocks that don't exist + * However, at this point the CPU shouldn't be running, and the IO + * shouldn't be doing anything yet so don't actually expect requests + */ + if (postcopy_ram_enable_notify(mis)) { + return -1; + } + + if (mis->have_listen_thread) { + error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread"); + return -1; + } + + mis->have_listen_thread = true; + /* Start up the listening thread and wait for it to signal ready */ + qemu_sem_init(&mis->listen_thread_sem, 0); + qemu_thread_create(&mis->listen_thread, "postcopy/listen", + postcopy_ram_listen_thread, mis->from_src_file, + QEMU_THREAD_JOINABLE); + qemu_sem_wait(&mis->listen_thread_sem); + qemu_sem_destroy(&mis->listen_thread_sem); + + return 0; +} + +/* After all discards we can start running and asking for pages */ +static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) +{ + PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING); + Error *local_err = NULL; + + trace_loadvm_postcopy_handle_run(); + if (ps != POSTCOPY_INCOMING_LISTENING) { + error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps); + return -1; + } + + /* TODO we should move all of this lot into postcopy_ram.c or a shared code + * in migration.c + */ + cpu_synchronize_all_post_init(); + + qemu_announce_self(); + + /* Make sure all file formats flush their mutable metadata */ + bdrv_invalidate_cache_all(&local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } + + trace_loadvm_postcopy_handle_run_cpu_sync(); + cpu_synchronize_all_post_init(); + + trace_loadvm_postcopy_handle_run_vmstart(); + + if (autostart) { + /* Hold onto your hats, starting the CPU */ + vm_start(); + } else { + /* leave it paused and let management decide when to start the CPU */ + runstate_set(RUN_STATE_PAUSED); + } + + /* We need to finish reading the stream from the package + * and also stop reading anything more from the stream that loaded the + * package (since it's now being read by the listener thread). + * LOADVM_QUIT will quit all the layers of nested loadvm loops. + */ + return LOADVM_QUIT; +} + +/** + * Immediately following this command is a blob of data containing an embedded + * chunk of migration stream; read it and load it. + * + * @mis: Incoming state + * @length: Length of packaged data to read + * + * Returns: Negative values on error + * + */ +static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis) +{ + int ret; + uint8_t *buffer; + uint32_t length; + QEMUSizedBuffer *qsb; + + length = qemu_get_be32(mis->from_src_file); + trace_loadvm_handle_cmd_packaged(length); + + if (length > MAX_VM_CMD_PACKAGED_SIZE) { + error_report("Unreasonably large packaged state: %u", length); + return -1; + } + buffer = g_malloc0(length); + ret = qemu_get_buffer(mis->from_src_file, buffer, (int)length); + if (ret != length) { + g_free(buffer); + error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%d\n", + ret, length); + return (ret < 0) ? ret : -EAGAIN; + } + trace_loadvm_handle_cmd_packaged_received(ret); + + /* Setup a dummy QEMUFile that actually reads from the buffer */ + qsb = qsb_create(buffer, length); + g_free(buffer); /* Because qsb_create copies */ + if (!qsb) { + error_report("Unable to create qsb"); + } + QEMUFile *packf = qemu_bufopen("r", qsb); + + ret = qemu_loadvm_state_main(packf, mis); + trace_loadvm_handle_cmd_packaged_main(ret); + qemu_fclose(packf); + qsb_free(qsb); + + return ret; +} + +/* + * Process an incoming 'QEMU_VM_COMMAND' + * 0 just a normal return + * LOADVM_QUIT All good, but exit the loop + * <0 Error + */ +static int loadvm_process_command(QEMUFile *f) +{ + MigrationIncomingState *mis = migration_incoming_get_current(); + uint16_t cmd; + uint16_t len; + uint32_t tmp32; + + cmd = qemu_get_be16(f); + len = qemu_get_be16(f); + + trace_loadvm_process_command(cmd, len); + if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) { + error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len); + return -EINVAL; + } + + if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) { + error_report("%s received with bad length - expecting %zu, got %d", + mig_cmd_args[cmd].name, + (size_t)mig_cmd_args[cmd].len, len); + return -ERANGE; + } + + switch (cmd) { + case MIG_CMD_OPEN_RETURN_PATH: + if (mis->to_src_file) { + error_report("CMD_OPEN_RETURN_PATH called when RP already open"); + /* Not really a problem, so don't give up */ + return 0; + } + mis->to_src_file = qemu_file_get_return_path(f); + if (!mis->to_src_file) { + error_report("CMD_OPEN_RETURN_PATH failed"); + return -1; + } + break; + + case MIG_CMD_PING: + tmp32 = qemu_get_be32(f); + trace_loadvm_process_command_ping(tmp32); + if (!mis->to_src_file) { + error_report("CMD_PING (0x%x) received with no return path", + tmp32); + return -1; + } + migrate_send_rp_pong(mis, tmp32); + break; + + case MIG_CMD_PACKAGED: + return loadvm_handle_cmd_packaged(mis); + + case MIG_CMD_POSTCOPY_ADVISE: + return loadvm_postcopy_handle_advise(mis); + + case MIG_CMD_POSTCOPY_LISTEN: + return loadvm_postcopy_handle_listen(mis); + + case MIG_CMD_POSTCOPY_RUN: + return loadvm_postcopy_handle_run(mis); + + case MIG_CMD_POSTCOPY_RAM_DISCARD: + return loadvm_postcopy_ram_handle_discard(mis, len); + } + + return 0; +} + struct LoadStateEntry { QLIST_ENTRY(LoadStateEntry) entry; SaveStateEntry *se; @@ -1053,47 +1710,10 @@ void loadvm_free_handlers(MigrationIncomingState *mis) } } -int qemu_loadvm_state(QEMUFile *f) +static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) { - MigrationIncomingState *mis = migration_incoming_get_current(); - Error *local_err = NULL; uint8_t section_type; - unsigned int v; int ret; - int file_error_after_eof = -1; - - if (qemu_savevm_state_blocked(&local_err)) { - error_report_err(local_err); - return -EINVAL; - } - - v = qemu_get_be32(f); - if (v != QEMU_VM_FILE_MAGIC) { - error_report("Not a migration stream"); - return -EINVAL; - } - - v = qemu_get_be32(f); - if (v == QEMU_VM_FILE_VERSION_COMPAT) { - error_report("SaveVM v2 format is obsolete and don't work anymore"); - return -ENOTSUP; - } - if (v != QEMU_VM_FILE_VERSION) { - error_report("Unsupported migration stream version"); - return -ENOTSUP; - } - - if (!savevm_state.skip_configuration) { - if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { - error_report("Configuration section missing"); - return -EINVAL; - } - ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); - - if (ret) { - return ret; - } - } while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) { uint32_t instance_id, version_id, section_id; @@ -1122,16 +1742,14 @@ int qemu_loadvm_state(QEMUFile *f) if (se == NULL) { error_report("Unknown savevm section or instance '%s' %d", idstr, instance_id); - ret = -EINVAL; - goto out; + return -EINVAL; } /* Validate version */ if (version_id > se->version_id) { error_report("savevm: unsupported version %d for '%s' v%d", version_id, idstr, se->version_id); - ret = -EINVAL; - goto out; + return -EINVAL; } /* Add entry */ @@ -1146,11 +1764,10 @@ int qemu_loadvm_state(QEMUFile *f) if (ret < 0) { error_report("error while loading state for instance 0x%x of" " device '%s'", instance_id, idstr); - goto out; + return ret; } if (!check_section_footer(f, le)) { - ret = -EINVAL; - goto out; + return -EINVAL; } break; case QEMU_VM_SECTION_PART: @@ -1165,29 +1782,88 @@ int qemu_loadvm_state(QEMUFile *f) } if (le == NULL) { error_report("Unknown savevm section %d", section_id); - ret = -EINVAL; - goto out; + return -EINVAL; } ret = vmstate_load(f, le->se, le->version_id); if (ret < 0) { error_report("error while loading state section id %d(%s)", section_id, le->se->idstr); - goto out; + return ret; } if (!check_section_footer(f, le)) { - ret = -EINVAL; - goto out; + return -EINVAL; + } + break; + case QEMU_VM_COMMAND: + ret = loadvm_process_command(f); + trace_qemu_loadvm_state_section_command(ret); + if ((ret < 0) || (ret & LOADVM_QUIT)) { + return ret; } break; default: error_report("Unknown savevm section type %d", section_type); - ret = -EINVAL; - goto out; + return -EINVAL; + } + } + + return 0; +} + +int qemu_loadvm_state(QEMUFile *f) +{ + MigrationIncomingState *mis = migration_incoming_get_current(); + Error *local_err = NULL; + unsigned int v; + int ret; + + if (qemu_savevm_state_blocked(&local_err)) { + error_report_err(local_err); + return -EINVAL; + } + + v = qemu_get_be32(f); + if (v != QEMU_VM_FILE_MAGIC) { + error_report("Not a migration stream"); + return -EINVAL; + } + + v = qemu_get_be32(f); + if (v == QEMU_VM_FILE_VERSION_COMPAT) { + error_report("SaveVM v2 format is obsolete and don't work anymore"); + return -ENOTSUP; + } + if (v != QEMU_VM_FILE_VERSION) { + error_report("Unsupported migration stream version"); + return -ENOTSUP; + } + + if (!savevm_state.skip_configuration) { + if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { + error_report("Configuration section missing"); + return -EINVAL; + } + ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); + + if (ret) { + return ret; } } - file_error_after_eof = qemu_file_get_error(f); + ret = qemu_loadvm_state_main(f, mis); + qemu_event_set(&mis->main_thread_load_event); + + trace_qemu_loadvm_state_post_main(ret); + + if (mis->have_listen_thread) { + /* Listen thread still going, can't clean up yet */ + return ret; + } + + if (ret == 0) { + ret = qemu_file_get_error(f); + } /* * Try to read in the VMDESC section as well, so that dumping tools that @@ -1199,10 +1875,10 @@ int qemu_loadvm_state(QEMUFile *f) * We also mustn't read data that isn't there; some transports (RDMA) * will stall waiting for that data when the source has already closed. */ - if (should_send_vmdesc()) { + if (ret == 0 && should_send_vmdesc()) { uint8_t *buf; uint32_t size; - section_type = qemu_get_byte(f); + uint8_t section_type = qemu_get_byte(f); if (section_type != QEMU_VM_VMDESCRIPTION) { error_report("Expected vmdescription section, but got %d", @@ -1226,57 +1902,9 @@ int qemu_loadvm_state(QEMUFile *f) cpu_synchronize_all_post_init(); - ret = 0; - -out: - if (ret == 0) { - /* We may not have a VMDESC section, so ignore relative errors */ - ret = file_error_after_eof; - } - return ret; } -static BlockDriverState *find_vmstate_bs(void) -{ - BlockDriverState *bs = NULL; - while ((bs = bdrv_next(bs))) { - if (bdrv_can_snapshot(bs)) { - return bs; - } - } - return NULL; -} - -/* - * Deletes snapshots of a given name in all opened images. - */ -static int del_existing_snapshots(Monitor *mon, const char *name) -{ - BlockDriverState *bs; - QEMUSnapshotInfo sn1, *snapshot = &sn1; - Error *err = NULL; - - bs = NULL; - while ((bs = bdrv_next(bs))) { - if (bdrv_can_snapshot(bs) && - bdrv_snapshot_find(bs, snapshot, name) >= 0) { - bdrv_snapshot_delete_by_id_or_name(bs, name, &err); - if (err) { - monitor_printf(mon, - "Error while deleting snapshot on device '%s':" - " %s\n", - bdrv_get_device_name(bs), - error_get_pretty(err)); - error_free(err); - return -1; - } - } - } - - return 0; -} - void hmp_savevm(Monitor *mon, const QDict *qdict) { BlockDriverState *bs, *bs1; @@ -1289,27 +1917,29 @@ void hmp_savevm(Monitor *mon, const QDict *qdict) struct tm tm; const char *name = qdict_get_try_str(qdict, "name"); Error *local_err = NULL; + AioContext *aio_context; - /* Verify if there is a device that doesn't support snapshots and is writable */ - bs = NULL; - while ((bs = bdrv_next(bs))) { - - if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { - continue; - } + if (!bdrv_all_can_snapshot(&bs)) { + monitor_printf(mon, "Device '%s' is writable but does not " + "support snapshots.\n", bdrv_get_device_name(bs)); + return; + } - if (!bdrv_can_snapshot(bs)) { - monitor_printf(mon, "Device '%s' is writable but does not support snapshots.\n", - bdrv_get_device_name(bs)); - return; - } + /* Delete old snapshots of the same name */ + if (name && bdrv_all_delete_snapshot(name, &bs1, &local_err) < 0) { + monitor_printf(mon, + "Error while deleting snapshot on device '%s': %s\n", + bdrv_get_device_name(bs1), error_get_pretty(local_err)); + error_free(local_err); + return; } - bs = find_vmstate_bs(); - if (!bs) { + bs = bdrv_all_find_vmstate_bs(); + if (bs == NULL) { monitor_printf(mon, "No block device can accept snapshots\n"); return; } + aio_context = bdrv_get_aio_context(bs); saved_vm_running = runstate_is_running(); @@ -1320,6 +1950,8 @@ void hmp_savevm(Monitor *mon, const QDict *qdict) } vm_stop(RUN_STATE_SAVE_VM); + aio_context_acquire(aio_context); + memset(sn, 0, sizeof(*sn)); /* fill auxiliary fields */ @@ -1342,11 +1974,6 @@ void hmp_savevm(Monitor *mon, const QDict *qdict) strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm); } - /* Delete old snapshots of the same name */ - if (name && del_existing_snapshots(mon, name) < 0) { - goto the_end; - } - /* save the VM state */ f = qemu_fopen_bdrv(bs, 1); if (!f) { @@ -1362,22 +1989,14 @@ void hmp_savevm(Monitor *mon, const QDict *qdict) goto the_end; } - /* create the snapshots */ - - bs1 = NULL; - while ((bs1 = bdrv_next(bs1))) { - if (bdrv_can_snapshot(bs1)) { - /* Write VM state size only to the image that contains the state */ - sn->vm_state_size = (bs == bs1 ? vm_state_size : 0); - ret = bdrv_snapshot_create(bs1, sn); - if (ret < 0) { - monitor_printf(mon, "Error while creating snapshot on '%s'\n", - bdrv_get_device_name(bs1)); - } - } + ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs); + if (ret < 0) { + monitor_printf(mon, "Error while creating snapshot on '%s'\n", + bdrv_get_device_name(bs)); } the_end: + aio_context_release(aio_context); if (saved_vm_running) { vm_start(); } @@ -1416,15 +2035,31 @@ int load_vmstate(const char *name) QEMUSnapshotInfo sn; QEMUFile *f; int ret; + AioContext *aio_context; - bs_vm_state = find_vmstate_bs(); + if (!bdrv_all_can_snapshot(&bs)) { + error_report("Device '%s' is writable but does not support snapshots.", + bdrv_get_device_name(bs)); + return -ENOTSUP; + } + ret = bdrv_all_find_snapshot(name, &bs); + if (ret < 0) { + error_report("Device '%s' does not have the requested snapshot '%s'", + bdrv_get_device_name(bs), name); + return ret; + } + + bs_vm_state = bdrv_all_find_vmstate_bs(); if (!bs_vm_state) { error_report("No block device supports snapshots"); return -ENOTSUP; } + aio_context = bdrv_get_aio_context(bs_vm_state); /* Don't even try to load empty VM states */ + aio_context_acquire(aio_context); ret = bdrv_snapshot_find(bs_vm_state, &sn, name); + aio_context_release(aio_context); if (ret < 0) { return ret; } else if (sn.vm_state_size == 0) { @@ -1433,42 +2068,14 @@ int load_vmstate(const char *name) return -EINVAL; } - /* Verify if there is any device that doesn't support snapshots and is - writable and check if the requested snapshot is available too. */ - bs = NULL; - while ((bs = bdrv_next(bs))) { - - if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { - continue; - } - - if (!bdrv_can_snapshot(bs)) { - error_report("Device '%s' is writable but does not support snapshots.", - bdrv_get_device_name(bs)); - return -ENOTSUP; - } - - ret = bdrv_snapshot_find(bs, &sn, name); - if (ret < 0) { - error_report("Device '%s' does not have the requested snapshot '%s'", - bdrv_get_device_name(bs), name); - return ret; - } - } - /* Flush all IO requests so they don't interfere with the new state. */ bdrv_drain_all(); - bs = NULL; - while ((bs = bdrv_next(bs))) { - if (bdrv_can_snapshot(bs)) { - ret = bdrv_snapshot_goto(bs, name); - if (ret < 0) { - error_report("Error %d while activating snapshot '%s' on '%s'", - ret, name, bdrv_get_device_name(bs)); - return ret; - } - } + ret = bdrv_all_goto_snapshot(name, &bs); + if (ret < 0) { + error_report("Error %d while activating snapshot '%s' on '%s'", + ret, name, bdrv_get_device_name(bs)); + return ret; } /* restore the VM state */ @@ -1480,9 +2087,12 @@ int load_vmstate(const char *name) qemu_system_reset(VMRESET_SILENT); migration_incoming_state_new(f); - ret = qemu_loadvm_state(f); + aio_context_acquire(aio_context); + ret = qemu_loadvm_state(f); qemu_fclose(f); + aio_context_release(aio_context); + migration_incoming_state_destroy(); if (ret < 0) { error_report("Error %d while loading VM state", ret); @@ -1498,43 +2108,34 @@ void hmp_delvm(Monitor *mon, const QDict *qdict) Error *err; const char *name = qdict_get_str(qdict, "name"); - if (!find_vmstate_bs()) { - monitor_printf(mon, "No block device supports snapshots\n"); - return; - } - - bs = NULL; - while ((bs = bdrv_next(bs))) { - if (bdrv_can_snapshot(bs)) { - err = NULL; - bdrv_snapshot_delete_by_id_or_name(bs, name, &err); - if (err) { - monitor_printf(mon, - "Error while deleting snapshot on device '%s':" - " %s\n", - bdrv_get_device_name(bs), - error_get_pretty(err)); - error_free(err); - } - } + if (bdrv_all_delete_snapshot(name, &bs, &err) < 0) { + monitor_printf(mon, + "Error while deleting snapshot on device '%s': %s\n", + bdrv_get_device_name(bs), error_get_pretty(err)); + error_free(err); } } void hmp_info_snapshots(Monitor *mon, const QDict *qdict) { BlockDriverState *bs, *bs1; - QEMUSnapshotInfo *sn_tab, *sn, s, *sn_info = &s; - int nb_sns, i, ret, available; + QEMUSnapshotInfo *sn_tab, *sn; + int nb_sns, i; int total; int *available_snapshots; + AioContext *aio_context; - bs = find_vmstate_bs(); + bs = bdrv_all_find_vmstate_bs(); if (!bs) { monitor_printf(mon, "No available block device supports snapshots\n"); return; } + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); nb_sns = bdrv_snapshot_list(bs, &sn_tab); + aio_context_release(aio_context); + if (nb_sns < 0) { monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns); return; @@ -1548,21 +2149,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) available_snapshots = g_new0(int, nb_sns); total = 0; for (i = 0; i < nb_sns; i++) { - sn = &sn_tab[i]; - available = 1; - bs1 = NULL; - - while ((bs1 = bdrv_next(bs1))) { - if (bdrv_can_snapshot(bs1) && bs1 != bs) { - ret = bdrv_snapshot_find(bs1, sn_info, sn->id_str); - if (ret < 0) { - available = 0; - break; - } - } - } - - if (available) { + if (bdrv_all_find_snapshot(sn_tab[i].id_str, &bs1) == 0) { available_snapshots[total] = i; total++; } @@ -181,13 +181,16 @@ typedef struct { * instance. */ typedef struct MonitorQAPIEventState { - QAPIEvent event; /* Event being tracked */ - int64_t rate; /* Minimum time (in ns) between two events */ - int64_t last; /* QEMU_CLOCK_REALTIME value at last emission */ + QAPIEvent event; /* Throttling state for this event type and... */ + QDict *data; /* ... data, see qapi_event_throttle_equal() */ QEMUTimer *timer; /* Timer for handling delayed events */ - QObject *data; /* Event pending delayed dispatch */ + QDict *qdict; /* Delayed event (if any) */ } MonitorQAPIEventState; +typedef struct { + int64_t rate; /* Minimum time (in ns) between two events */ +} MonitorQAPIEventConf; + struct Monitor { CharDriverState *chr; int reset_seen; @@ -438,132 +441,161 @@ static void monitor_protocol_emitter(Monitor *mon, QObject *data, } -static MonitorQAPIEventState monitor_qapi_event_state[QAPI_EVENT_MAX]; +static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT_MAX] = { + /* Limit guest-triggerable events to 1 per second */ + [QAPI_EVENT_RTC_CHANGE] = { 1000 * SCALE_MS }, + [QAPI_EVENT_WATCHDOG] = { 1000 * SCALE_MS }, + [QAPI_EVENT_BALLOON_CHANGE] = { 1000 * SCALE_MS }, + [QAPI_EVENT_QUORUM_REPORT_BAD] = { 1000 * SCALE_MS }, + [QAPI_EVENT_QUORUM_FAILURE] = { 1000 * SCALE_MS }, + [QAPI_EVENT_VSERPORT_CHANGE] = { 1000 * SCALE_MS }, +}; + +GHashTable *monitor_qapi_event_state; /* * Emits the event to every monitor instance, @event is only used for trace * Called with monitor_lock held. */ -static void monitor_qapi_event_emit(QAPIEvent event, QObject *data) +static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict) { Monitor *mon; - trace_monitor_protocol_event_emit(event, data); + trace_monitor_protocol_event_emit(event, qdict); QLIST_FOREACH(mon, &mon_list, entry) { if (monitor_is_qmp(mon) && mon->qmp.in_command_mode) { - monitor_json_emitter(mon, data); + monitor_json_emitter(mon, QOBJECT(qdict)); } } } +static void monitor_qapi_event_handler(void *opaque); + /* * Queue a new event for emission to Monitor instances, * applying any rate limiting if required. */ static void -monitor_qapi_event_queue(QAPIEvent event, QDict *data, Error **errp) +monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp) { + MonitorQAPIEventConf *evconf; MonitorQAPIEventState *evstate; - assert(event < QAPI_EVENT_MAX); - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - evstate = &(monitor_qapi_event_state[event]); - trace_monitor_protocol_event_queue(event, - data, - evstate->rate, - evstate->last, - now); + assert(event < QAPI_EVENT_MAX); + evconf = &monitor_qapi_event_conf[event]; + trace_monitor_protocol_event_queue(event, qdict, evconf->rate); - /* Rate limit of 0 indicates no throttling */ qemu_mutex_lock(&monitor_lock); - if (!evstate->rate) { - monitor_qapi_event_emit(event, QOBJECT(data)); - evstate->last = now; + + if (!evconf->rate) { + /* Unthrottled event */ + monitor_qapi_event_emit(event, qdict); } else { - int64_t delta = now - evstate->last; - if (evstate->data || - delta < evstate->rate) { - /* If there's an existing event pending, replace - * it with the new event, otherwise schedule a - * timer for delayed emission + QDict *data = qobject_to_qdict(qdict_get(qdict, "data")); + MonitorQAPIEventState key = { .event = event, .data = data }; + + evstate = g_hash_table_lookup(monitor_qapi_event_state, &key); + assert(!evstate || timer_pending(evstate->timer)); + + if (evstate) { + /* + * Timer is pending for (at least) evconf->rate ns after + * last send. Store event for sending when timer fires, + * replacing a prior stored event if any. */ - if (evstate->data) { - qobject_decref(evstate->data); - } else { - int64_t then = evstate->last + evstate->rate; - timer_mod_ns(evstate->timer, then); - } - evstate->data = QOBJECT(data); - qobject_incref(evstate->data); + QDECREF(evstate->qdict); + evstate->qdict = qdict; + QINCREF(evstate->qdict); } else { - monitor_qapi_event_emit(event, QOBJECT(data)); - evstate->last = now; + /* + * Last send was (at least) evconf->rate ns ago. + * Send immediately, and arm the timer to call + * monitor_qapi_event_handler() in evconf->rate ns. Any + * events arriving before then will be delayed until then. + */ + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + + monitor_qapi_event_emit(event, qdict); + + evstate = g_new(MonitorQAPIEventState, 1); + evstate->event = event; + evstate->data = data; + QINCREF(evstate->data); + evstate->qdict = NULL; + evstate->timer = timer_new_ns(QEMU_CLOCK_REALTIME, + monitor_qapi_event_handler, + evstate); + g_hash_table_add(monitor_qapi_event_state, evstate); + timer_mod_ns(evstate->timer, now + evconf->rate); } } + qemu_mutex_unlock(&monitor_lock); } /* - * The callback invoked by QemuTimer when a delayed - * event is ready to be emitted + * This function runs evconf->rate ns after sending a throttled + * event. + * If another event has since been stored, send it. */ static void monitor_qapi_event_handler(void *opaque) { MonitorQAPIEventState *evstate = opaque; - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + MonitorQAPIEventConf *evconf = &monitor_qapi_event_conf[evstate->event]; - trace_monitor_protocol_event_handler(evstate->event, - evstate->data, - evstate->last, - now); + trace_monitor_protocol_event_handler(evstate->event, evstate->qdict); qemu_mutex_lock(&monitor_lock); - if (evstate->data) { - monitor_qapi_event_emit(evstate->event, evstate->data); - qobject_decref(evstate->data); - evstate->data = NULL; + + if (evstate->qdict) { + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + + monitor_qapi_event_emit(evstate->event, evstate->qdict); + QDECREF(evstate->qdict); + evstate->qdict = NULL; + timer_mod_ns(evstate->timer, now + evconf->rate); + } else { + g_hash_table_remove(monitor_qapi_event_state, evstate); + QDECREF(evstate->data); + timer_free(evstate->timer); + g_free(evstate); } - evstate->last = now; + qemu_mutex_unlock(&monitor_lock); } -/* - * @event: the event ID to be limited - * @rate: the rate limit in milliseconds - * - * Sets a rate limit on a particular event, so no - * more than 1 event will be emitted within @rate - * milliseconds - */ -static void -monitor_qapi_event_throttle(QAPIEvent event, int64_t rate) +static unsigned int qapi_event_throttle_hash(const void *key) { - MonitorQAPIEventState *evstate; - assert(event < QAPI_EVENT_MAX); + const MonitorQAPIEventState *evstate = key; + unsigned int hash = evstate->event * 255; - evstate = &(monitor_qapi_event_state[event]); + if (evstate->event == QAPI_EVENT_VSERPORT_CHANGE) { + hash += g_str_hash(qdict_get_str(evstate->data, "id")); + } - trace_monitor_protocol_event_throttle(event, rate); - evstate->event = event; - assert(rate * SCALE_MS <= INT64_MAX); - evstate->rate = rate * SCALE_MS; - evstate->last = 0; - evstate->data = NULL; - evstate->timer = timer_new(QEMU_CLOCK_REALTIME, - SCALE_MS, - monitor_qapi_event_handler, - evstate); + return hash; } -static void monitor_qapi_event_init(void) +static gboolean qapi_event_throttle_equal(const void *a, const void *b) { - /* Limit guest-triggerable events to 1 per second */ - monitor_qapi_event_throttle(QAPI_EVENT_RTC_CHANGE, 1000); - monitor_qapi_event_throttle(QAPI_EVENT_WATCHDOG, 1000); - monitor_qapi_event_throttle(QAPI_EVENT_BALLOON_CHANGE, 1000); - monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_REPORT_BAD, 1000); - monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_FAILURE, 1000); - monitor_qapi_event_throttle(QAPI_EVENT_VSERPORT_CHANGE, 1000); + const MonitorQAPIEventState *eva = a; + const MonitorQAPIEventState *evb = b; + + if (eva->event != evb->event) { + return FALSE; + } + + if (eva->event == QAPI_EVENT_VSERPORT_CHANGE) { + return !strcmp(qdict_get_str(eva->data, "id"), + qdict_get_str(evb->data, "id")); + } + return TRUE; +} + +static void monitor_qapi_event_init(void) +{ + monitor_qapi_event_state = g_hash_table_new(qapi_event_throttle_hash, + qapi_event_throttle_equal); qmp_event_set_func_emit(monitor_qapi_event_queue); } @@ -2104,6 +2136,8 @@ static int get_monitor_def(target_long *pval, const char *name) { const MonitorDef *md = target_monitor_defs(); void *ptr; + uint64_t tmp = 0; + int ret; if (md == NULL) { return -1; @@ -2131,7 +2165,13 @@ static int get_monitor_def(target_long *pval, const char *name) return 0; } } - return -1; + + ret = target_get_monitor_def(mon_get_cpu(), name, &tmp); + if (!ret) { + *pval = (target_long) tmp; + } + + return ret; } static void next(void) @@ -3376,13 +3416,18 @@ static void vm_completion(ReadLineState *rs, const char *str) readline_set_completion_index(rs, len); while ((bs = bdrv_next(bs))) { SnapshotInfoList *snapshots, *snapshot; + AioContext *ctx = bdrv_get_aio_context(bs); + bool ok = false; - if (!bdrv_can_snapshot(bs)) { - continue; + aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs)) { + ok = bdrv_query_snapshot_info_list(bs, &snapshots, NULL) == 0; } - if (bdrv_query_snapshot_info_list(bs, &snapshots, NULL)) { + aio_context_release(ctx); + if (!ok) { continue; } + snapshot = snapshots; while (snapshot) { char *completion = snapshot->value->name; @@ -3804,7 +3849,7 @@ static QDict *qmp_check_input_obj(QObject *input_obj, Error **errp) return input_dict; } -static void handle_qmp_command(JSONMessageParser *parser, QList *tokens) +static void handle_qmp_command(JSONMessageParser *parser, GQueue *tokens) { Error *local_err = NULL; QObject *obj, *data; @@ -3862,6 +3907,7 @@ static void handle_qmp_command(JSONMessageParser *parser, QList *tokens) err_out: monitor_protocol_emitter(mon, data, local_err); qobject_decref(data); + error_free(local_err); QDECREF(input); QDECREF(args); } diff --git a/net/dump.c b/net/dump.c index dd0555f8bd..ce16a4b0e3 100644 --- a/net/dump.c +++ b/net/dump.c @@ -187,8 +187,8 @@ int net_init_dump(const NetClientOptions *opts, const char *name, NetClientState *nc; DumpNetClient *dnc; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_DUMP); - dump = opts->dump; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_DUMP); + dump = opts->u.dump; assert(peer); diff --git a/net/filter.c b/net/filter.c index 326f2b5ac8..1365bad026 100644 --- a/net/filter.c +++ b/net/filter.c @@ -137,6 +137,7 @@ static void netfilter_complete(UserCreatable *uc, Error **errp) Error *local_err = NULL; char *str, *info; ObjectProperty *prop; + ObjectPropertyIterator *iter; StringOutputVisitor *ov; if (!nf->netdev_id) { @@ -173,7 +174,8 @@ static void netfilter_complete(UserCreatable *uc, Error **errp) QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next); /* generate info str */ - QTAILQ_FOREACH(prop, &OBJECT(nf)->properties, node) { + iter = object_property_iter_init(OBJECT(nf)); + while ((prop = object_property_iter_next(iter))) { if (!strcmp(prop->name, "type")) { continue; } @@ -187,6 +189,7 @@ static void netfilter_complete(UserCreatable *uc, Error **errp) g_free(str); g_free(info); } + object_property_iter_free(iter); } static void netfilter_finalize(Object *obj) @@ -285,9 +285,9 @@ int net_init_hubport(const NetClientOptions *opts, const char *name, { const NetdevHubPortOptions *hubport; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_HUBPORT); + assert(opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT); assert(!peer); - hubport = opts->hubport; + hubport = opts->u.hubport; net_hub_add_port(hubport->hubid, name); return 0; diff --git a/net/l2tpv3.c b/net/l2tpv3.c index 4f9bceecc9..8e68e540ec 100644 --- a/net/l2tpv3.c +++ b/net/l2tpv3.c @@ -545,8 +545,8 @@ int net_init_l2tpv3(const NetClientOptions *opts, s->queue_tail = 0; s->header_mismatch = false; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3); - l2tpv3 = opts->l2tpv3; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_L2TPV3); + l2tpv3 = opts->u.l2tpv3; if (l2tpv3->has_ipv6 && l2tpv3->ipv6) { s->ipv6 = l2tpv3->ipv6; @@ -882,8 +882,8 @@ static int net_init_nic(const NetClientOptions *opts, const char *name, NICInfo *nd; const NetLegacyNicOptions *nic; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_NIC); - nic = opts->nic; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_NIC); + nic = opts->u.nic; idx = nic_get_free_idx(); if (idx == -1 || nb_nics >= MAX_NICS) { @@ -984,9 +984,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) opts = netdev->opts; name = netdev->id; - if (opts->kind == NET_CLIENT_OPTIONS_KIND_DUMP || - opts->kind == NET_CLIENT_OPTIONS_KIND_NIC || - !net_client_init_fun[opts->kind]) { + if (opts->type == NET_CLIENT_OPTIONS_KIND_DUMP || + opts->type == NET_CLIENT_OPTIONS_KIND_NIC || + !net_client_init_fun[opts->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a netdev backend type"); return -1; @@ -997,16 +997,16 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) /* missing optional values have been initialized to "all bits zero" */ name = net->has_id ? net->id : net->name; - if (opts->kind == NET_CLIENT_OPTIONS_KIND_NONE) { + if (opts->type == NET_CLIENT_OPTIONS_KIND_NONE) { return 0; /* nothing to do */ } - if (opts->kind == NET_CLIENT_OPTIONS_KIND_HUBPORT) { + if (opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a net type"); return -1; } - if (!net_client_init_fun[opts->kind]) { + if (!net_client_init_fun[opts->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a net backend type (maybe it is not compiled " "into this binary)"); @@ -1014,17 +1014,17 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) } /* Do not add to a vlan if it's a nic with a netdev= parameter. */ - if (opts->kind != NET_CLIENT_OPTIONS_KIND_NIC || - !opts->nic->has_netdev) { + if (opts->type != NET_CLIENT_OPTIONS_KIND_NIC || + !opts->u.nic->has_netdev) { peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL); } } - if (net_client_init_fun[opts->kind](opts, name, peer, errp) < 0) { + if (net_client_init_fun[opts->type](opts, name, peer, errp) < 0) { /* FIXME drop when all init functions store an Error */ if (errp && !*errp) { error_setg(errp, QERR_DEVICE_INIT_FAILED, - NetClientOptionsKind_lookup[opts->kind]); + NetClientOptionsKind_lookup[opts->type]); } return -1; } diff --git a/net/netmap.c b/net/netmap.c index 508b82947d..555836829e 100644 --- a/net/netmap.c +++ b/net/netmap.c @@ -90,7 +90,7 @@ pkt_copy(const void *_src, void *_dst, int l) * Open a netmap device. We assume there is only one queue * (which is the case for the VALE bridge). */ -static int netmap_open(NetmapPriv *me) +static void netmap_open(NetmapPriv *me, Error **errp) { int fd; int err; @@ -99,9 +99,8 @@ static int netmap_open(NetmapPriv *me) me->fd = fd = open(me->fdname, O_RDWR); if (fd < 0) { - error_report("Unable to open netmap device '%s' (%s)", - me->fdname, strerror(errno)); - return -1; + error_setg_file_open(errp, errno, me->fdname); + return; } memset(&req, 0, sizeof(req)); pstrcpy(req.nr_name, sizeof(req.nr_name), me->ifname); @@ -109,15 +108,14 @@ static int netmap_open(NetmapPriv *me) req.nr_version = NETMAP_API; err = ioctl(fd, NIOCREGIF, &req); if (err) { - error_report("Unable to register %s: %s", me->ifname, strerror(errno)); + error_setg_errno(errp, errno, "Unable to register %s", me->ifname); goto error; } l = me->memsize = req.nr_memsize; me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); if (me->mem == MAP_FAILED) { - error_report("Unable to mmap netmap shared memory: %s", - strerror(errno)); + error_setg_errno(errp, errno, "Unable to mmap netmap shared memory"); me->mem = NULL; goto error; } @@ -125,11 +123,11 @@ static int netmap_open(NetmapPriv *me) me->nifp = NETMAP_IF(me->mem, req.nr_offset); me->tx = NETMAP_TXRING(me->nifp, 0); me->rx = NETMAP_RXRING(me->nifp, 0); - return 0; + + return; error: close(me->fd); - return -1; } static void netmap_send(void *opaque); @@ -438,9 +436,9 @@ static NetClientInfo net_netmap_info = { int net_init_netmap(const NetClientOptions *opts, const char *name, NetClientState *peer, Error **errp) { - /* FIXME error_setg(errp, ...) on failure */ - const NetdevNetmapOptions *netmap_opts = opts->netmap; + const NetdevNetmapOptions *netmap_opts = opts->u.netmap; NetClientState *nc; + Error *err = NULL; NetmapPriv me; NetmapState *s; @@ -448,7 +446,9 @@ int net_init_netmap(const NetClientOptions *opts, netmap_opts->has_devname ? netmap_opts->devname : "/dev/netmap"); /* Set default name for the port if not supplied. */ pstrcpy(me.ifname, sizeof(me.ifname), netmap_opts->ifname); - if (netmap_open(&me)) { + netmap_open(&me, &err); + if (err) { + error_propagate(errp, err); return -1; } /* Create the object. */ diff --git a/net/slirp.c b/net/slirp.c index 7657b38fdf..f505570adb 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -746,8 +746,8 @@ int net_init_slirp(const NetClientOptions *opts, const char *name, const NetdevUserOptions *user; const char **dnssearch; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_USER); - user = opts->user; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_USER); + user = opts->u.user; vnet = user->has_net ? g_strdup(user->net) : user->has_ip ? g_strdup_printf("%s/24", user->ip) : diff --git a/net/socket.c b/net/socket.c index b1e3b1c8d9..e8605d4ded 100644 --- a/net/socket.c +++ b/net/socket.c @@ -706,8 +706,8 @@ int net_init_socket(const NetClientOptions *opts, const char *name, Error *err = NULL; const NetdevSocketOptions *sock; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_SOCKET); - sock = opts->socket; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_SOCKET); + sock = opts->u.socket; if (sock->has_fd + sock->has_listen + sock->has_connect + sock->has_mcast + sock->has_udp != 1) { diff --git a/net/tap-bsd.c b/net/tap-bsd.c index 7028d9be95..0103a97bf6 100644 --- a/net/tap-bsd.c +++ b/net/tap-bsd.c @@ -109,8 +109,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, #define PATH_NET_TAP "/dev/tap" -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, - int vnet_hdr_required, int mq_required, Error **errp) +static int tap_open_clone(char *ifname, int ifname_size, Error **errp) { int fd, s, ret; struct ifreq ifr; @@ -126,7 +125,8 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, ret = ioctl(fd, TAPGIFNAME, (void *)&ifr); if (ret < 0) { error_setg_errno(errp, errno, "could not get tap interface name"); - goto error; + close(fd); + return -1; } if (ifname[0] != '\0') { @@ -135,19 +135,47 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, if (s < 0) { error_setg_errno(errp, errno, "could not open socket to set interface name"); - goto error; + close(fd); + return -1; } ifr.ifr_data = ifname; ret = ioctl(s, SIOCSIFNAME, (void *)&ifr); close(s); if (ret < 0) { error_setg(errp, "could not set tap interface name"); - goto error; + close(fd); + return -1; } } else { pstrcpy(ifname, ifname_size, ifr.ifr_name); } + return fd; +} + +int tap_open(char *ifname, int ifname_size, int *vnet_hdr, + int vnet_hdr_required, int mq_required, Error **errp) +{ + int fd = -1; + + /* If the specified tap device already exists just use it. */ + if (ifname[0] != '\0') { + char dname[100]; + snprintf(dname, sizeof dname, "/dev/%s", ifname); + TFR(fd = open(dname, O_RDWR)); + if (fd < 0 && errno != ENOENT) { + error_setg_errno(errp, errno, "could not open %s", dname); + return -1; + } + } + + if (fd < 0) { + /* Tap device not specified or does not exist. */ + if ((fd = tap_open_clone(ifname, ifname_size, errp)) < 0) { + return -1; + } + } + if (*vnet_hdr) { /* BSD doesn't have IFF_VNET_HDR */ *vnet_hdr = 0; diff --git a/net/tap-win32.c b/net/tap-win32.c index 625d53c64b..7fddb20b51 100644 --- a/net/tap-win32.c +++ b/net/tap-win32.c @@ -77,7 +77,12 @@ //#define DEBUG_TAP_WIN32 -#define TUN_ASYNCHRONOUS_WRITES 1 +/* FIXME: The asynch write path appears to be broken at + * present. WriteFile() ignores the lpNumberOfBytesWritten parameter + * for overlapped writes, with the result we return zero bytes sent, + * and after handling a single packet, receive is disabled for this + * interface. */ +/* #define TUN_ASYNCHRONOUS_WRITES 1 */ #define TUN_BUFFER_SIZE 1560 #define TUN_MAX_BUFFER_COUNT 32 @@ -356,7 +361,8 @@ static int get_device_guid( &len); if (status != ERROR_SUCCESS || name_type != REG_SZ) { - return -1; + ++i; + continue; } else { if (is_tap_win32_dev(enum_name)) { @@ -460,27 +466,48 @@ static int tap_win32_write(tap_win32_overlapped_t *overlapped, BOOL result; DWORD error; +#ifdef TUN_ASYNCHRONOUS_WRITES result = GetOverlappedResult( overlapped->handle, &overlapped->write_overlapped, &write_size, FALSE); if (!result && GetLastError() == ERROR_IO_INCOMPLETE) WaitForSingleObject(overlapped->write_event, INFINITE); +#endif result = WriteFile(overlapped->handle, buffer, size, &write_size, &overlapped->write_overlapped); +#ifdef TUN_ASYNCHRONOUS_WRITES + /* FIXME: we can't sensibly set write_size here, without waiting + * for the IO to complete! Moreover, we can't return zero, + * because that will disable receive on this interface, and we + * also can't assume it will succeed and return the full size, + * because that will result in the buffer being reclaimed while + * the IO is in progress. */ +#error Async writes are broken. Please disable TUN_ASYNCHRONOUS_WRITES. +#else /* !TUN_ASYNCHRONOUS_WRITES */ if (!result) { - switch (error = GetLastError()) - { - case ERROR_IO_PENDING: -#ifndef TUN_ASYNCHRONOUS_WRITES - WaitForSingleObject(overlapped->write_event, INFINITE); -#endif - break; - default: - return -1; + error = GetLastError(); + if (error == ERROR_IO_PENDING) { + result = GetOverlappedResult(overlapped->handle, + &overlapped->write_overlapped, + &write_size, TRUE); } } +#endif + + if (!result) { +#ifdef DEBUG_TAP_WIN32 + LPTSTR msgbuf; + error = GetLastError(); + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM, + NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + &msgbuf, 0, NULL); + fprintf(stderr, "Tap-Win32: Error WriteFile %d - %s\n", error, msgbuf); + LocalFree(msgbuf); +#endif + return 0; + } return write_size; } @@ -767,8 +794,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, /* FIXME error_setg(errp, ...) on failure */ const NetdevTapOptions *tap; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP); - tap = opts->tap; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP); + tap = opts->u.tap; if (!tap->has_ifname) { error_report("tap: no interface name"); @@ -565,8 +565,8 @@ int net_init_bridge(const NetClientOptions *opts, const char *name, TAPState *s; int fd, vnet_hdr; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_BRIDGE); - bridge = opts->bridge; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_BRIDGE); + bridge = opts->u.bridge; helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER; br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE; @@ -728,8 +728,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, const char *vhostfdname; char ifname[128]; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP); - tap = opts->tap; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP); + tap = opts->u.tap; queues = tap->has_queues ? tap->queues : 1; vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL; @@ -115,8 +115,8 @@ int net_init_vde(const NetClientOptions *opts, const char *name, /* FIXME error_setg(errp, ...) on failure */ const NetdevVdeOptions *vde; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VDE); - vde = opts->vde; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_VDE); + vde = opts->u.vde; /* missing optional values have been initialized to "all bits zero" */ if (net_vde_init(peer, "vde", name, vde->sock, vde->port, vde->group, diff --git a/net/vhost-user.c b/net/vhost-user.c index 17b5c2a722..b368a90219 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -188,7 +188,7 @@ static void net_vhost_user_event(void *opaque, int event) qmp_set_link(name, true, &err); break; case CHR_EVENT_CLOSED: - qmp_set_link(name, true, &err); + qmp_set_link(name, false, &err); vhost_user_stop(queues, ncs); break; } @@ -301,8 +301,8 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name, const NetdevVhostUserOptions *vhost_user_opts; CharDriverState *chr; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER); - vhost_user_opts = opts->vhost_user; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + vhost_user_opts = opts->u.vhost_user; chr = net_vhost_parse_chardev(vhost_user_opts, errp); if (!chr) { @@ -316,6 +316,11 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name, } queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1; + if (queues < 1) { + error_setg(errp, + "vhost-user number of queues must be bigger than zero"); + return -1; + } return net_vhost_user_init(peer, "vhost_user", name, chr, queues); } @@ -226,9 +226,9 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) goto error; } - switch (object->kind) { + switch (object->type) { case NUMA_OPTIONS_KIND_NODE: - numa_node_parse(object->node, opts, &err); + numa_node_parse(object->u.node, opts, &err); if (err) { goto error; } @@ -487,9 +487,9 @@ static void numa_stat_memory_devices(uint64_t node_mem[]) MemoryDeviceInfo *value = info->value; if (value) { - switch (value->kind) { + switch (value->type) { case MEMORY_DEVICE_INFO_KIND_DIMM: - node_mem[value->dimm->node] += value->dimm->size; + node_mem[value->u.dimm->node] += value->u.dimm->size; break; default: break; diff --git a/pc-bios/README b/pc-bios/README index e4154ab9f0..d260c1bbbe 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -17,7 +17,7 @@ - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware implementation for certain IBM POWER hardware. The sources are at https://github.com/aik/SLOF, and the image currently in qemu is - built from git tag qemu-slof-20150813. + built from git tag qemu-slof-20151103. - sgabios (the Serial Graphics Adapter option ROM) provides a means for legacy x86 software to communicate with an attached serial console as diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc Binary files differindex 540e45a74c..a419bfd031 100644 --- a/pc-bios/openbios-ppc +++ b/pc-bios/openbios-ppc diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32 Binary files differindex 0da11883bb..cf52787034 100644 --- a/pc-bios/openbios-sparc32 +++ b/pc-bios/openbios-sparc32 diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64 Binary files differindex 9bf3ce5450..08eecdf225 100644 --- a/pc-bios/openbios-sparc64 +++ b/pc-bios/openbios-sparc64 diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img Binary files differindex 0c540a10cf..bd8f21050f 100644 --- a/pc-bios/s390-ccw.img +++ b/pc-bios/s390-ccw.img diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile index 15e423274f..11c5dd4799 100644 --- a/pc-bios/s390-ccw/Makefile +++ b/pc-bios/s390-ccw/Makefile @@ -10,7 +10,7 @@ $(call set-vpath, $(SRC_PATH)/pc-bios/s390-ccw) .PHONY : all clean build-all OBJECTS = start.o main.o bootmap.o sclp-ascii.o virtio.o -CFLAGS += -fPIE -fno-stack-protector -ffreestanding +CFLAGS += -fPIE -fno-stack-protector -ffreestanding -march=z900 CFLAGS += -fno-delete-null-pointer-checks -msoft-float LDFLAGS += -Wl,-pie -nostdlib diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index b678d5ebb8..415508b279 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -445,6 +445,206 @@ static void ipl_scsi(void) } /*********************************************************************** + * IPL El Torito ISO9660 image or DVD + */ + +static bool is_iso_bc_entry_compatible(IsoBcSection *s) +{ + uint8_t *magic_sec = (uint8_t *)(sec + ISO_SECTOR_SIZE); + + if (s->unused || !s->sector_count) { + return false; + } + read_iso_sector(bswap32(s->load_rba), magic_sec, + "Failed to read image sector 0"); + + /* Checking bytes 8 - 32 for S390 Linux magic */ + return !_memcmp(magic_sec + 8, linux_s390_magic, 24); +} + +/* Location of the current sector of the directory */ +static uint32_t sec_loc[ISO9660_MAX_DIR_DEPTH]; +/* Offset in the current sector of the directory */ +static uint32_t sec_offset[ISO9660_MAX_DIR_DEPTH]; +/* Remained directory space in bytes */ +static uint32_t dir_rem[ISO9660_MAX_DIR_DEPTH]; + +static inline uint32_t iso_get_file_size(uint32_t load_rba) +{ + IsoVolDesc *vd = (IsoVolDesc *)sec; + IsoDirHdr *cur_record = &vd->vd.primary.rootdir; + uint8_t *temp = sec + ISO_SECTOR_SIZE; + int level = 0; + + read_iso_sector(ISO_PRIMARY_VD_SECTOR, sec, + "Failed to read ISO primary descriptor"); + sec_loc[0] = iso_733_to_u32(cur_record->ext_loc); + dir_rem[0] = 0; + sec_offset[0] = 0; + + while (level >= 0) { + IPL_assert(sec_offset[level] <= ISO_SECTOR_SIZE, + "Directory tree structure violation"); + + cur_record = (IsoDirHdr *)(temp + sec_offset[level]); + + if (sec_offset[level] == 0) { + read_iso_sector(sec_loc[level], temp, + "Failed to read ISO directory"); + if (dir_rem[level] == 0) { + /* Skip self and parent records */ + dir_rem[level] = iso_733_to_u32(cur_record->data_len) - + cur_record->dr_len; + sec_offset[level] += cur_record->dr_len; + + cur_record = (IsoDirHdr *)(temp + sec_offset[level]); + dir_rem[level] -= cur_record->dr_len; + sec_offset[level] += cur_record->dr_len; + continue; + } + } + + if (!cur_record->dr_len || sec_offset[level] == ISO_SECTOR_SIZE) { + /* Zero-padding and/or the end of current sector */ + dir_rem[level] -= ISO_SECTOR_SIZE - sec_offset[level]; + sec_offset[level] = 0; + sec_loc[level]++; + } else { + /* The directory record is valid */ + if (load_rba == iso_733_to_u32(cur_record->ext_loc)) { + return iso_733_to_u32(cur_record->data_len); + } + + dir_rem[level] -= cur_record->dr_len; + sec_offset[level] += cur_record->dr_len; + + if (cur_record->file_flags & 0x2) { + /* Subdirectory */ + if (level == ISO9660_MAX_DIR_DEPTH - 1) { + sclp_print("ISO-9660 directory depth limit exceeded\n"); + } else { + level++; + sec_loc[level] = iso_733_to_u32(cur_record->ext_loc); + sec_offset[level] = 0; + dir_rem[level] = 0; + continue; + } + } + } + + if (dir_rem[level] == 0) { + /* Nothing remaining */ + level--; + read_iso_sector(sec_loc[level], temp, + "Failed to read ISO directory"); + } + } + + return 0; +} + +static void load_iso_bc_entry(IsoBcSection *load) +{ + IsoBcSection s = *load; + /* + * According to spec, extent for each file + * is padded and ISO_SECTOR_SIZE bytes aligned + */ + uint32_t blks_to_load = bswap16(s.sector_count) >> ET_SECTOR_SHIFT; + uint32_t real_size = iso_get_file_size(bswap32(s.load_rba)); + + if (real_size) { + /* Round up blocks to load */ + blks_to_load = (real_size + ISO_SECTOR_SIZE - 1) / ISO_SECTOR_SIZE; + sclp_print("ISO boot image size verified\n"); + } else { + sclp_print("ISO boot image size could not be verified\n"); + } + + read_iso_boot_image(bswap32(s.load_rba), + (void *)((uint64_t)bswap16(s.load_segment)), + blks_to_load); + + /* Trying to get PSW at zero address */ + if (*((uint64_t *)0) & IPL_PSW_MASK) { + jump_to_IPL_code((*((uint64_t *)0)) & 0x7fffffff); + } + + /* Try default linux start address */ + jump_to_IPL_code(KERN_IMAGE_START); +} + +static uint32_t find_iso_bc(void) +{ + IsoVolDesc *vd = (IsoVolDesc *)sec; + uint32_t block_num = ISO_PRIMARY_VD_SECTOR; + + if (virtio_read_many(block_num++, sec, 1)) { + /* If primary vd cannot be read, there is no boot catalog */ + return 0; + } + + while (is_iso_vd_valid(vd) && vd->type != VOL_DESC_TERMINATOR) { + if (vd->type == VOL_DESC_TYPE_BOOT) { + IsoVdElTorito *et = &vd->vd.boot; + + if (!_memcmp(&et->el_torito[0], el_torito_magic, 32)) { + return bswap32(et->bc_offset); + } + } + read_iso_sector(block_num++, sec, + "Failed to read ISO volume descriptor"); + } + + return 0; +} + +static IsoBcSection *find_iso_bc_entry(void) +{ + IsoBcEntry *e = (IsoBcEntry *)sec; + uint32_t offset = find_iso_bc(); + int i; + + if (!offset) { + return NULL; + } + + read_iso_sector(offset, sec, "Failed to read El Torito boot catalog"); + + if (!is_iso_bc_valid(e)) { + /* The validation entry is mandatory */ + virtio_panic("No valid boot catalog found!\n"); + return NULL; + } + + /* + * Each entry has 32 bytes size, so one sector cannot contain > 64 entries. + * We consider only boot catalogs with no more than 64 entries. + */ + for (i = 1; i < ISO_BC_ENTRY_PER_SECTOR; i++) { + if (e[i].id == ISO_BC_BOOTABLE_SECTION) { + if (is_iso_bc_entry_compatible(&e[i].body.sect)) { + return &e[i].body.sect; + } + } + } + + virtio_panic("No suitable boot entry found on ISO-9660 media!\n"); + + return NULL; +} + +static void ipl_iso_el_torito(void) +{ + IsoBcSection *s = find_iso_bc_entry(); + + if (s) { + load_iso_bc_entry(s); + /* no return */ + } +} + +/*********************************************************************** * IPL starts here */ @@ -463,6 +663,12 @@ void zipl_load(void) ipl_scsi(); /* no return */ } + /* Check if we can boot as ISO media */ + if (virtio_guessed_disk_nature()) { + virtio_assume_iso9660(); + } + ipl_iso_el_torito(); + /* We have failed to follow the SCSI scheme, so */ if (virtio_guessed_disk_nature()) { sclp_print("Using guessed DASD geometry.\n"); diff --git a/pc-bios/s390-ccw/bootmap.h b/pc-bios/s390-ccw/bootmap.h index ab132e3579..f98765b841 100644 --- a/pc-bios/s390-ccw/bootmap.h +++ b/pc-bios/s390-ccw/bootmap.h @@ -341,4 +341,210 @@ static inline bool magic_match(const void *data, const void *magic) return *((uint32_t *)data) == *((uint32_t *)magic); } +static inline int _memcmp(const void *s1, const void *s2, size_t n) +{ + int i; + const uint8_t *p1 = s1, *p2 = s2; + + for (i = 0; i < n; i++) { + if (p1[i] != p2[i]) { + return p1[i] > p2[i] ? 1 : -1; + } + } + + return 0; +} + +/* from include/qemu/bswap.h */ + +/* El Torito is always little-endian */ +static inline uint16_t bswap16(uint16_t x) +{ + return ((x & 0x00ff) << 8) | ((x & 0xff00) >> 8); +} + +static inline uint32_t bswap32(uint32_t x) +{ + return ((x & 0x000000ffU) << 24) | ((x & 0x0000ff00U) << 8) | + ((x & 0x00ff0000U) >> 8) | ((x & 0xff000000U) >> 24); +} + +static inline uint64_t bswap64(uint64_t x) +{ + return ((x & 0x00000000000000ffULL) << 56) | + ((x & 0x000000000000ff00ULL) << 40) | + ((x & 0x0000000000ff0000ULL) << 24) | + ((x & 0x00000000ff000000ULL) << 8) | + ((x & 0x000000ff00000000ULL) >> 8) | + ((x & 0x0000ff0000000000ULL) >> 24) | + ((x & 0x00ff000000000000ULL) >> 40) | + ((x & 0xff00000000000000ULL) >> 56); +} + +static inline uint32_t iso_733_to_u32(uint64_t x) +{ + return (uint32_t)x; +} + +#define ISO_SECTOR_SIZE 2048 +/* El Torito specifies boot image size in 512 byte blocks */ +#define ET_SECTOR_SHIFT 2 +#define KERN_IMAGE_START 0x010000UL +#define PSW_MASK_64 0x0000000100000000ULL +#define PSW_MASK_32 0x0000000080000000ULL +#define IPL_PSW_MASK (PSW_MASK_32 | PSW_MASK_64) + +#define ISO_PRIMARY_VD_SECTOR 16 + +static inline void read_iso_sector(uint32_t block_offset, void *buf, + const char *errmsg) +{ + IPL_assert(virtio_read_many(block_offset, buf, 1) == 0, errmsg); +} + +static inline void read_iso_boot_image(uint32_t block_offset, void *load_addr, + uint32_t blks_to_load) +{ + IPL_assert(virtio_read_many(block_offset, load_addr, blks_to_load) == 0, + "Failed to read boot image!"); +} + +const uint8_t el_torito_magic[] = "EL TORITO SPECIFICATION" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + +#define ISO9660_MAX_DIR_DEPTH 8 + +typedef struct IsoDirHdr { + uint8_t dr_len; + uint8_t ear_len; + uint64_t ext_loc; + uint64_t data_len; + uint8_t recording_datetime[7]; + uint8_t file_flags; + uint8_t file_unit_size; + uint8_t gap_size; + uint32_t vol_seqnum; + uint8_t fileid_len; +} __attribute__((packed)) IsoDirHdr; + +typedef struct IsoVdElTorito { + uint8_t el_torito[32]; /* must contain el_torito_magic value */ + uint8_t unused0[32]; + uint32_t bc_offset; + uint8_t unused1[1974]; +} __attribute__((packed)) IsoVdElTorito; + +typedef struct IsoVdPrimary { + uint8_t unused1; + uint8_t sys_id[32]; + uint8_t vol_id[32]; + uint8_t unused2[8]; + uint64_t vol_space_size; + uint8_t unused3[32]; + uint32_t vol_set_size; + uint32_t vol_seqnum; + uint32_t log_block_size; + uint64_t path_table_size; + uint32_t l_path_table; + uint32_t opt_l_path_table; + uint32_t m_path_table; + uint32_t opt_m_path_table; + IsoDirHdr rootdir; + uint8_t root_null; + uint8_t reserved2[1858]; +} __attribute__((packed)) IsoVdPrimary; + +typedef struct IsoVolDesc { + uint8_t type; + uint8_t ident[5]; + uint8_t version; + union { + IsoVdElTorito boot; + IsoVdPrimary primary; + } vd; +} __attribute__((packed)) IsoVolDesc; + +const uint8_t vol_desc_magic[] = "CD001"; +#define VOL_DESC_TYPE_BOOT 0 +#define VOL_DESC_TYPE_PRIMARY 1 +#define VOL_DESC_TYPE_SUPPLEMENT 2 +#define VOL_DESC_TYPE_PARTITION 3 +#define VOL_DESC_TERMINATOR 255 + +static inline bool is_iso_vd_valid(IsoVolDesc *vd) +{ + return !_memcmp(&vd->ident[0], vol_desc_magic, 5) && + vd->version == 0x1 && + vd->type <= VOL_DESC_TYPE_PARTITION; +} + +typedef struct IsoBcValid { + uint8_t platform_id; + uint16_t reserved; + uint8_t id[24]; + uint16_t checksum; + uint8_t key[2]; +} __attribute__((packed)) IsoBcValid; + +typedef struct IsoBcSection { + uint8_t boot_type; + uint16_t load_segment; + uint8_t sys_type; + uint8_t unused; + uint16_t sector_count; + uint32_t load_rba; + uint8_t selection[20]; +} __attribute__((packed)) IsoBcSection; + +typedef struct IsoBcHdr { + uint8_t platform_id; + uint16_t sect_num; + uint8_t id[28]; +} __attribute__((packed)) IsoBcHdr; + +/* + * Match two CCWs located after PSW and eight filler bytes. + * From libmagic and arch/s390/kernel/head.S. + */ +const uint8_t linux_s390_magic[] = "\x02\x00\x00\x18\x60\x00\x00\x50\x02\x00" + "\x00\x68\x60\x00\x00\x50\x40\x40\x40\x40" + "\x40\x40\x40\x40"; + +typedef struct IsoBcEntry { + uint8_t id; + union { + IsoBcValid valid; /* id == 0x01 */ + IsoBcSection sect; /* id == 0x88 || id == 0x0 */ + IsoBcHdr hdr; /* id == 0x90 || id == 0x91 */ + } body; +} __attribute__((packed)) IsoBcEntry; + +#define ISO_BC_ENTRY_PER_SECTOR (ISO_SECTOR_SIZE / sizeof(IsoBcEntry)) +#define ISO_BC_HDR_VALIDATION 0x01 +#define ISO_BC_BOOTABLE_SECTION 0x88 +#define ISO_BC_MAGIC_55 0x55 +#define ISO_BC_MAGIC_AA 0xaa +#define ISO_BC_PLATFORM_X86 0x0 +#define ISO_BC_PLATFORM_PPC 0x1 +#define ISO_BC_PLATFORM_MAC 0x2 + +static inline bool is_iso_bc_valid(IsoBcEntry *e) +{ + IsoBcValid *v = &e->body.valid; + + if (e->id != ISO_BC_HDR_VALIDATION) { + return false; + } + + if (v->platform_id != ISO_BC_PLATFORM_X86 && + v->platform_id != ISO_BC_PLATFORM_PPC && + v->platform_id != ISO_BC_PLATFORM_MAC) { + return false; + } + + return v->key[0] == ISO_BC_MAGIC_55 && + v->key[1] == ISO_BC_MAGIC_AA && + v->reserved == 0x0; +} + #endif /* _PC_BIOS_S390_CCW_BOOTMAP_H */ diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c index 57ff1b07ee..87aed38a95 100644 --- a/pc-bios/s390-ccw/virtio.c +++ b/pc-bios/s390-ccw/virtio.c @@ -278,6 +278,13 @@ void virtio_assume_scsi(void) blk_cfg.physical_block_exp = 0; } +void virtio_assume_iso9660(void) +{ + guessed_disk_nature = true; + blk_cfg.blk_size = 2048; + blk_cfg.physical_block_exp = 0; +} + void virtio_assume_eckd(void) { guessed_disk_nature = true; diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h index c23466b8db..afa01a885b 100644 --- a/pc-bios/s390-ccw/virtio.h +++ b/pc-bios/s390-ccw/virtio.h @@ -187,6 +187,7 @@ typedef struct VirtioBlkConfig { bool virtio_guessed_disk_nature(void); void virtio_assume_scsi(void); void virtio_assume_eckd(void); +void virtio_assume_iso9660(void); extern bool virtio_disk_is_scsi(void); extern bool virtio_disk_is_eckd(void); @@ -199,14 +200,9 @@ extern int virtio_read_many(ulong sector, void *load_addr, int sec_num); #define VIRTIO_SECTOR_SIZE 512 -static inline ulong virtio_eckd_sector_adjust(ulong sector) -{ - return sector * (virtio_get_block_size() / VIRTIO_SECTOR_SIZE); -} - static inline ulong virtio_sector_adjust(ulong sector) { - return virtio_disk_is_eckd() ? virtio_eckd_sector_adjust(sector) : sector; + return sector * (virtio_get_block_size() / VIRTIO_SECTOR_SIZE); } #endif /* VIRTIO_H */ diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin Binary files differindex 701933f7dc..90f30996f9 100644 --- a/pc-bios/slof.bin +++ b/pc-bios/slof.bin diff --git a/qapi-schema.json b/qapi-schema.json index f60be2950c..8b1a423fa7 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -430,6 +430,8 @@ # # @active: in the process of doing migration. # +# @postcopy-active: like active, but now in postcopy mode. (since 2.5) +# # @completed: migration is finished. # # @failed: some error occurred during migration process. @@ -439,7 +441,7 @@ ## { 'enum': 'MigrationStatus', 'data': [ 'none', 'setup', 'cancelling', 'cancelled', - 'active', 'completed', 'failed' ] } + 'active', 'postcopy-active', 'completed', 'failed' ] } ## # @MigrationInfo @@ -540,11 +542,15 @@ # @auto-converge: If enabled, QEMU will automatically throttle down the guest # to speed up convergence of RAM migration. (since 1.6) # +# @x-postcopy-ram: Start executing on the migration target before all of RAM has +# been migrated, pulling the remaining pages along as needed. NOTE: If +# the migration fails during postcopy the VM will fail. (since 2.5) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', - 'compress', 'events'] } + 'compress', 'events', 'x-postcopy-ram'] } ## # @MigrationCapabilityStatus @@ -698,6 +704,16 @@ '*tls-port': 'int', '*cert-subject': 'str' } } ## +# @migrate-start-postcopy +# +# Followup to a migration command to switch the migration to postcopy mode. +# The x-postcopy-ram capability must be set before the original migration +# command. +# +# Since: 2.5 +{ 'command': 'migrate-start-postcopy' } + +## # @MouseInfo: # # Information about a mouse device. @@ -1520,6 +1536,26 @@ 'data': { } } ## +# @ActionCompletionMode +# +# An enumeration of Transactional completion modes. +# +# @individual: Do not attempt to cancel any other Actions if any Actions fail +# after the Transaction request succeeds. All Actions that +# can complete successfully will do so without waiting on others. +# This is the default. +# +# @grouped: If any Action fails after the Transaction succeeds, cancel all +# Actions. Actions do not complete until all Actions are ready to +# complete. May be rejected by Actions that do not support this +# completion mode. +# +# Since: 2.5 +## +{ 'enum': 'ActionCompletionMode', + 'data': [ 'individual', 'grouped' ] } + +## # @TransactionAction # # A discriminated record of operations that can be performed with @@ -1531,25 +1567,52 @@ # abort since 1.6 # blockdev-snapshot-internal-sync since 1.7 # blockdev-backup since 2.3 +# blockdev-snapshot since 2.5 +# block-dirty-bitmap-add since 2.5 +# block-dirty-bitmap-clear since 2.5 ## { 'union': 'TransactionAction', 'data': { - 'blockdev-snapshot-sync': 'BlockdevSnapshot', + 'blockdev-snapshot': 'BlockdevSnapshot', + 'blockdev-snapshot-sync': 'BlockdevSnapshotSync', 'drive-backup': 'DriveBackup', 'blockdev-backup': 'BlockdevBackup', 'abort': 'Abort', - 'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal' + 'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal', + 'block-dirty-bitmap-add': 'BlockDirtyBitmapAdd', + 'block-dirty-bitmap-clear': 'BlockDirtyBitmap' } } ## +# @TransactionProperties +# +# Optional arguments to modify the behavior of a Transaction. +# +# @completion-mode: #optional Controls how jobs launched asynchronously by +# Actions will complete or fail as a group. +# See @ActionCompletionMode for details. +# +# Since: 2.5 +## +{ 'struct': 'TransactionProperties', + 'data': { + '*completion-mode': 'ActionCompletionMode' + } +} + +## # @transaction # # Executes a number of transactionable QMP commands atomically. If any # operation fails, then the entire set of actions will be abandoned and the # appropriate error returned. # -# List of: -# @TransactionAction: information needed for the respective operation +# @actions: List of @TransactionAction; +# information needed for the respective operations. +# +# @properties: #optional structure of additional options to control the +# execution of the transaction. See @TransactionProperties +# for additional detail. # # Returns: nothing on success # Errors depend on the operations of the transaction @@ -1561,7 +1624,10 @@ # Since 1.1 ## { 'command': 'transaction', - 'data': { 'actions': [ 'TransactionAction' ] } } + 'data': { 'actions': [ 'TransactionAction' ], + '*properties': 'TransactionProperties' + } +} ## # @human-monitor-command: @@ -1842,8 +1908,10 @@ # device's password. The behavior of reads and writes to the block # device between when these calls are executed is undefined. # -# Notes: It is strongly recommended that this interface is not used especially -# for changing block devices. +# Notes: This interface is deprecated, and it is strongly recommended that you +# avoid using it. For changing block devices, use +# blockdev-change-medium; for changing VNC parameters, use +# change-vnc-password. # # Since: 0.14.0 ## @@ -2614,9 +2682,7 @@ # # @host: host part of the address # -# @port: port part of the address, or lowest port if @to is present. -# Kernel selects a free port if omitted for listener addresses. -# #optional +# @port: port part of the address, or lowest port if @to is present # # @to: highest port to try # @@ -2631,7 +2697,7 @@ { 'struct': 'InetSocketAddress', 'data': { 'host': 'str', - '*port': 'str', + 'port': 'str', '*to': 'uint16', '*ipv4': 'bool', '*ipv6': 'bool' } } @@ -3513,16 +3579,22 @@ # Button of a pointer input device (mouse, tablet). # # Since: 2.0 +# +# Note that the spelling of these values may change when the +# x-input-send-event is promoted out of experimental status. ## { 'enum' : 'InputButton', 'data' : [ 'Left', 'Middle', 'Right', 'WheelUp', 'WheelDown' ] } ## -# @InputButton +# @InputAxis # # Position axis of a pointer input device (mouse, tablet). # # Since: 2.0 +# +# Note that the spelling of these values may change when the +# x-input-send-event is promoted out of experimental status. ## { 'enum' : 'InputAxis', 'data' : [ 'X', 'Y' ] } @@ -3613,7 +3685,10 @@ # # Since: 2.2 # -# Note: this command is experimental, and not a stable API. +# Note: this command is experimental, and not a stable API. Things that +# might change before it becomes stable include the spelling of enum +# values for InputButton and InputAxis, and the notion of how to designate +# which console will receive the event. # ## { 'command': 'x-input-send-event', @@ -3878,3 +3953,21 @@ # Rocker ethernet network switch { 'include': 'qapi/rocker.json' } + +## +# ReplayMode: +# +# Mode of the replay subsystem. +# +# @none: normal execution mode. Replay or record are not enabled. +# +# @record: record mode. All non-deterministic data is written into the +# replay log. +# +# @play: replay mode. Non-deterministic data required for system execution +# is read from the log. +# +# Since: 2.5 +## +{ 'enum': 'ReplayMode', + 'data': [ 'none', 'record', 'play' ] } diff --git a/qapi/block-core.json b/qapi/block-core.json index 425fdab706..a07b13f54a 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -414,6 +414,59 @@ ## { 'command': 'query-block', 'returns': ['BlockInfo'] } + +## +# @BlockDeviceTimedStats: +# +# Statistics of a block device during a given interval of time. +# +# @interval_length: Interval used for calculating the statistics, +# in seconds. +# +# @min_rd_latency_ns: Minimum latency of read operations in the +# defined interval, in nanoseconds. +# +# @min_wr_latency_ns: Minimum latency of write operations in the +# defined interval, in nanoseconds. +# +# @min_flush_latency_ns: Minimum latency of flush operations in the +# defined interval, in nanoseconds. +# +# @max_rd_latency_ns: Maximum latency of read operations in the +# defined interval, in nanoseconds. +# +# @max_wr_latency_ns: Maximum latency of write operations in the +# defined interval, in nanoseconds. +# +# @max_flush_latency_ns: Maximum latency of flush operations in the +# defined interval, in nanoseconds. +# +# @avg_rd_latency_ns: Average latency of read operations in the +# defined interval, in nanoseconds. +# +# @avg_wr_latency_ns: Average latency of write operations in the +# defined interval, in nanoseconds. +# +# @avg_flush_latency_ns: Average latency of flush operations in the +# defined interval, in nanoseconds. +# +# @avg_rd_queue_depth: Average number of pending read operations +# in the defined interval. +# +# @avg_wr_queue_depth: Average number of pending write operations +# in the defined interval. +# +# Since: 2.5 +## + +{ 'struct': 'BlockDeviceTimedStats', + 'data': { 'interval_length': 'int', 'min_rd_latency_ns': 'int', + 'max_rd_latency_ns': 'int', 'avg_rd_latency_ns': 'int', + 'min_wr_latency_ns': 'int', 'max_wr_latency_ns': 'int', + 'avg_wr_latency_ns': 'int', 'min_flush_latency_ns': 'int', + 'max_flush_latency_ns': 'int', 'avg_flush_latency_ns': 'int', + 'avg_rd_queue_depth': 'number', 'avg_wr_queue_depth': 'number' } } + ## # @BlockDeviceStats: # @@ -448,6 +501,37 @@ # @wr_merged: Number of write requests that have been merged into another # request (Since 2.3). # +# @idle_time_ns: #optional Time since the last I/O operation, in +# nanoseconds. If the field is absent it means that +# there haven't been any operations yet (Since 2.5). +# +# @failed_rd_operations: The number of failed read operations +# performed by the device (Since 2.5) +# +# @failed_wr_operations: The number of failed write operations +# performed by the device (Since 2.5) +# +# @failed_flush_operations: The number of failed flush operations +# performed by the device (Since 2.5) +# +# @invalid_rd_operations: The number of invalid read operations +# performed by the device (Since 2.5) +# +# @invalid_wr_operations: The number of invalid write operations +# performed by the device (Since 2.5) +# +# @invalid_flush_operations: The number of invalid flush operations +# performed by the device (Since 2.5) +# +# @account_invalid: Whether invalid operations are included in the +# last access statistics (Since 2.5) +# +# @account_failed: Whether failed operations are included in the +# latency and last access statistics (Since 2.5) +# +# @timed_stats: Statistics specific to the set of previously defined +# intervals of time (Since 2.5) +# # Since: 0.14.0 ## { 'struct': 'BlockDeviceStats', @@ -455,7 +539,12 @@ 'wr_operations': 'int', 'flush_operations': 'int', 'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int', 'rd_total_time_ns': 'int', 'wr_highest_offset': 'int', - 'rd_merged': 'int', 'wr_merged': 'int' } } + 'rd_merged': 'int', 'wr_merged': 'int', '*idle_time_ns': 'int', + 'failed_rd_operations': 'int', 'failed_wr_operations': 'int', + 'failed_flush_operations': 'int', 'invalid_rd_operations': 'int', + 'invalid_wr_operations': 'int', 'invalid_flush_operations': 'int', + 'account_invalid': 'bool', 'account_failed': 'bool', + 'timed_stats': ['BlockDeviceTimedStats'] } } ## # @BlockStats: @@ -682,7 +771,7 @@ 'data': [ 'existing', 'absolute-paths' ] } ## -# @BlockdevSnapshot +# @BlockdevSnapshotSync # # Either @device or @node-name must be set but not both. # @@ -699,12 +788,27 @@ # @mode: #optional whether and how QEMU should create a new image, default is # 'absolute-paths'. ## -{ 'struct': 'BlockdevSnapshot', +{ 'struct': 'BlockdevSnapshotSync', 'data': { '*device': 'str', '*node-name': 'str', 'snapshot-file': 'str', '*snapshot-node-name': 'str', '*format': 'str', '*mode': 'NewImageMode' } } ## +# @BlockdevSnapshot +# +# @node: device or node name that will have a snapshot created. +# +# @overlay: reference to the existing block device that will become +# the overlay of @node, as part of creating the snapshot. +# It must not have a current backing file (this can be +# achieved by passing "backing": "" to blockdev-add). +# +# Since 2.5 +## +{ 'struct': 'BlockdevSnapshot', + 'data': { 'node': 'str', 'overlay': 'str' } } + +## # @DriveBackup # # @device: the name of the device which should be copied. @@ -790,7 +894,7 @@ # # Generates a synchronous snapshot of a block device. # -# For the arguments, see the documentation of BlockdevSnapshot. +# For the arguments, see the documentation of BlockdevSnapshotSync. # # Returns: nothing on success # If @device is not a valid block device, DeviceNotFound @@ -798,6 +902,19 @@ # Since 0.14.0 ## { 'command': 'blockdev-snapshot-sync', + 'data': 'BlockdevSnapshotSync' } + + +## +# @blockdev-snapshot +# +# Generates a snapshot of a block device. +# +# For the arguments, see the documentation of BlockdevSnapshot. +# +# Since 2.5 +## +{ 'command': 'blockdev-snapshot', 'data': 'BlockdevSnapshot' } ## @@ -1408,6 +1525,14 @@ # (default: enospc) # @read-only: #optional whether the block device should be read-only # (default: false) +# @stats-account-invalid: #optional whether to include invalid +# operations when computing last access statistics +# (default: true) (Since 2.5) +# @stats-account-failed: #optional whether to include failed +# operations when computing latency and last +# access statistics (default: true) (Since 2.5) +# @stats-intervals: #optional list of intervals for collecting I/O +# statistics, in seconds (default: none) (Since 2.5) # @detect-zeroes: #optional detect and optimize zero writes (Since 2.1) # (default: off) # @@ -1423,6 +1548,9 @@ '*rerror': 'BlockdevOnError', '*werror': 'BlockdevOnError', '*read-only': 'bool', + '*stats-account-invalid': 'bool', + '*stats-account-failed': 'bool', + '*stats-intervals': ['int'], '*detect-zeroes': 'BlockdevDetectZeroesOptions' } } ## @@ -1867,8 +1995,8 @@ # level and no BlockBackend will be created. # # This command is still a work in progress. It doesn't support all -# block drivers, it lacks a matching blockdev-del, and more. Stay -# away from it unless you want to help with its development. +# block drivers among other things. Stay away from it unless you want +# to help with its development. # # @options: block device options for the new device # @@ -1876,6 +2004,160 @@ ## { 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } } +## +# @x-blockdev-del: +# +# Deletes a block device that has been added using blockdev-add. +# The selected device can be either a block backend or a graph node. +# +# In the former case the backend will be destroyed, along with its +# inserted medium if there's any. The command will fail if the backend +# or its medium are in use. +# +# In the latter case the node will be destroyed. The command will fail +# if the node is attached to a block backend or is otherwise being +# used. +# +# One of @id or @node-name must be specified, but not both. +# +# This command is still a work in progress and is considered +# experimental. Stay away from it unless you want to help with its +# development. +# +# @id: #optional Name of the block backend device to delete. +# +# @node-name: #optional Name of the graph node to delete. +# +# Since: 2.5 +## +{ 'command': 'x-blockdev-del', 'data': { '*id': 'str', '*node-name': 'str' } } + +## +# @blockdev-open-tray: +# +# Opens a block device's tray. If there is a block driver state tree inserted as +# a medium, it will become inaccessible to the guest (but it will remain +# associated to the block device, so closing the tray will make it accessible +# again). +# +# If the tray was already open before, this will be a no-op. +# +# Once the tray opens, a DEVICE_TRAY_MOVED event is emitted. There are cases in +# which no such event will be generated, these include: +# - if the guest has locked the tray, @force is false and the guest does not +# respond to the eject request +# - if the BlockBackend denoted by @device does not have a guest device attached +# to it +# - if the guest device does not have an actual tray and is empty, for instance +# for floppy disk drives +# +# @device: block device name +# +# @force: #optional if false (the default), an eject request will be sent to +# the guest if it has locked the tray (and the tray will not be opened +# immediately); if true, the tray will be opened regardless of whether +# it is locked +# +# Since: 2.5 +## +{ 'command': 'blockdev-open-tray', + 'data': { 'device': 'str', + '*force': 'bool' } } + +## +# @blockdev-close-tray: +# +# Closes a block device's tray. If there is a block driver state tree associated +# with the block device (which is currently ejected), that tree will be loaded +# as the medium. +# +# If the tray was already closed before, this will be a no-op. +# +# @device: block device name +# +# Since: 2.5 +## +{ 'command': 'blockdev-close-tray', + 'data': { 'device': 'str' } } + +## +# @blockdev-remove-medium: +# +# Removes a medium (a block driver state tree) from a block device. That block +# device's tray must currently be open (unless there is no attached guest +# device). +# +# If the tray is open and there is no medium inserted, this will be a no-op. +# +# @device: block device name +# +# Since: 2.5 +## +{ 'command': 'blockdev-remove-medium', + 'data': { 'device': 'str' } } + +## +# @blockdev-insert-medium: +# +# Inserts a medium (a block driver state tree) into a block device. That block +# device's tray must currently be open (unless there is no attached guest +# device) and there must be no medium inserted already. +# +# @device: block device name +# +# @node-name: name of a node in the block driver state graph +# +# Since: 2.5 +## +{ 'command': 'blockdev-insert-medium', + 'data': { 'device': 'str', + 'node-name': 'str'} } + + +## +# @BlockdevChangeReadOnlyMode: +# +# Specifies the new read-only mode of a block device subject to the +# @blockdev-change-medium command. +# +# @retain: Retains the current read-only mode +# +# @read-only: Makes the device read-only +# +# @read-write: Makes the device writable +# +# Since: 2.3 +## +{ 'enum': 'BlockdevChangeReadOnlyMode', + 'data': ['retain', 'read-only', 'read-write'] } + + +## +# @blockdev-change-medium: +# +# Changes the medium inserted into a block device by ejecting the current medium +# and loading a new image file which is inserted as the new medium (this command +# combines blockdev-open-tray, blockdev-remove-medium, blockdev-insert-medium +# and blockdev-close-tray). +# +# @device: block device name +# +# @filename: filename of the new image to be loaded +# +# @format: #optional, format to open the new image with (defaults to +# the probed format) +# +# @read-only-mode: #optional, change the read-only mode of the device; defaults +# to 'retain' +# +# Since: 2.5 +## +{ 'command': 'blockdev-change-medium', + 'data': { 'device': 'str', + 'filename': 'str', + '*format': 'str', + '*read-only-mode': 'BlockdevChangeReadOnlyMode' } } + ## # @BlockErrorAction diff --git a/qapi/introspect.json b/qapi/introspect.json index cc50dc6bcb..9e9369e160 100644 --- a/qapi/introspect.json +++ b/qapi/introspect.json @@ -22,9 +22,22 @@ # what's there), not interface specification. The specification is in # the QAPI schema. # +# Furthermore, while we strive to keep the QMP wire format +# backwards-compatible across qemu versions, the introspection output +# is not guaranteed to have the same stability. For example, one +# version of qemu may list an object member as an optional +# non-variant, while another lists the same member only through the +# object's variants; or the type of a member may change from a generic +# string into a specific enum or from one specific type into an +# alternate that includes the original type alongside something else. +# # Returns: array of @SchemaInfo, where each element describes an # entity in the ABI: command, event, type, ... # +# The order of the various SchemaInfo is unspecified; however, all +# names are guaranteed to be unique (no name will be duplicated with +# different meta-types). +# # Note: the QAPI schema is also used to help define *internal* # interfaces, by defining QAPI types. These are not part of the QMP # wire ABI, and therefore not returned by this command. @@ -78,7 +91,8 @@ # Commands and events have the name defined in the QAPI schema. # Unlike command and event names, type names are not part of # the wire ABI. Consequently, type names are meaningless -# strings here. +# strings here, although they are still guaranteed unique +# regardless of @meta-type. # # All references to other SchemaInfo are by name. # @@ -130,7 +144,7 @@ # # Additional SchemaInfo members for meta-type 'enum'. # -# @values: the enumeration type's values. +# @values: the enumeration type's values, in no particular order. # # Values of this type are JSON string on the wire. # @@ -158,14 +172,16 @@ # # Additional SchemaInfo members for meta-type 'object'. # -# @members: the object type's (non-variant) members. +# @members: the object type's (non-variant) members, in no particular order. # # @tag: #optional the name of the member serving as type tag. # An element of @members with this name must exist. # # @variants: #optional variant members, i.e. additional members that # depend on the type tag's value. Present exactly when -# @tag is present. +# @tag is present. The variants are in no particular order, +# and may even differ from the order of the values of the +# enum type of the @tag. # # Values of this type are JSON object on the wire. # @@ -219,7 +235,7 @@ # # Additional SchemaInfo members for meta-type 'alternate'. # -# @members: the alternate type's members. +# @members: the alternate type's members, in no particular order. # The members' wire encoding is distinct, see # docs/qapi-code-gen.txt section Alternate types. # diff --git a/qapi/qmp-input-visitor.c b/qapi/qmp-input-visitor.c index 5dd9ed5ce5..eb6e110300 100644 --- a/qapi/qmp-input-visitor.c +++ b/qapi/qmp-input-visitor.c @@ -225,45 +225,45 @@ static void qmp_input_type_int(Visitor *v, int64_t *obj, const char *name, Error **errp) { QmpInputVisitor *qiv = to_qiv(v); - QObject *qobj = qmp_input_get_object(qiv, name, true); + QInt *qint = qobject_to_qint(qmp_input_get_object(qiv, name, true)); - if (!qobj || qobject_type(qobj) != QTYPE_QINT) { + if (!qint) { error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null", "integer"); return; } - *obj = qint_get_int(qobject_to_qint(qobj)); + *obj = qint_get_int(qint); } static void qmp_input_type_bool(Visitor *v, bool *obj, const char *name, Error **errp) { QmpInputVisitor *qiv = to_qiv(v); - QObject *qobj = qmp_input_get_object(qiv, name, true); + QBool *qbool = qobject_to_qbool(qmp_input_get_object(qiv, name, true)); - if (!qobj || qobject_type(qobj) != QTYPE_QBOOL) { + if (!qbool) { error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null", "boolean"); return; } - *obj = qbool_get_bool(qobject_to_qbool(qobj)); + *obj = qbool_get_bool(qbool); } static void qmp_input_type_str(Visitor *v, char **obj, const char *name, Error **errp) { QmpInputVisitor *qiv = to_qiv(v); - QObject *qobj = qmp_input_get_object(qiv, name, true); + QString *qstr = qobject_to_qstring(qmp_input_get_object(qiv, name, true)); - if (!qobj || qobject_type(qobj) != QTYPE_QSTRING) { + if (!qstr) { error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null", "string"); return; } - *obj = g_strdup(qstring_get_str(qobject_to_qstring(qobj))); + *obj = g_strdup(qstring_get_str(qstr)); } static void qmp_input_type_number(Visitor *v, double *obj, const char *name, @@ -271,19 +271,23 @@ static void qmp_input_type_number(Visitor *v, double *obj, const char *name, { QmpInputVisitor *qiv = to_qiv(v); QObject *qobj = qmp_input_get_object(qiv, name, true); + QInt *qint; + QFloat *qfloat; - if (!qobj || (qobject_type(qobj) != QTYPE_QFLOAT && - qobject_type(qobj) != QTYPE_QINT)) { - error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null", - "number"); + qint = qobject_to_qint(qobj); + if (qint) { + *obj = qint_get_int(qobject_to_qint(qobj)); return; } - if (qobject_type(qobj) == QTYPE_QINT) { - *obj = qint_get_int(qobject_to_qint(qobj)); - } else { + qfloat = qobject_to_qfloat(qobj); + if (qfloat) { *obj = qfloat_get_double(qobject_to_qfloat(qobj)); + return; } + + error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null", + "number"); } static void qmp_input_type_any(Visitor *v, QObject **obj, const char *name, diff --git a/qemu-char.c b/qemu-char.c index c4eb4eea31..2969c44e84 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -97,18 +97,18 @@ static int SocketAddress_to_str(char *dest, int max_len, const char *prefix, SocketAddress *addr, bool is_listen, bool is_telnet) { - switch (addr->kind) { + switch (addr->type) { case SOCKET_ADDRESS_KIND_INET: return snprintf(dest, max_len, "%s%s:%s:%s%s", prefix, - is_telnet ? "telnet" : "tcp", addr->inet->host, - addr->inet->port, is_listen ? ",server" : ""); + is_telnet ? "telnet" : "tcp", addr->u.inet->host, + addr->u.inet->port, is_listen ? ",server" : ""); break; case SOCKET_ADDRESS_KIND_UNIX: return snprintf(dest, max_len, "%sunix:%s%s", prefix, - addr->q_unix->path, is_listen ? ",server" : ""); + addr->u.q_unix->path, is_listen ? ",server" : ""); break; case SOCKET_ADDRESS_KIND_FD: - return snprintf(dest, max_len, "%sfd:%s%s", prefix, addr->fd->str, + return snprintf(dest, max_len, "%sfd:%s%s", prefix, addr->u.fd->str, is_listen ? ",server" : ""); break; default: @@ -661,7 +661,7 @@ static CharDriverState *qemu_chr_open_mux(const char *id, ChardevBackend *backend, ChardevReturn *ret, Error **errp) { - ChardevMux *mux = backend->mux; + ChardevMux *mux = backend->u.mux; CharDriverState *chr, *drv; MuxDriver *d; @@ -1070,7 +1070,7 @@ static CharDriverState *qemu_chr_open_pipe(const char *id, ChardevReturn *ret, Error **errp) { - ChardevHostdev *opts = backend->pipe; + ChardevHostdev *opts = backend->u.pipe; int fd_in, fd_out; char filename_in[CHR_MAX_FILENAME_SIZE]; char filename_out[CHR_MAX_FILENAME_SIZE]; @@ -1148,7 +1148,7 @@ static CharDriverState *qemu_chr_open_stdio(const char *id, ChardevReturn *ret, Error **errp) { - ChardevStdio *opts = backend->stdio; + ChardevStdio *opts = backend->u.stdio; CharDriverState *chr; struct sigaction act; @@ -1241,11 +1241,16 @@ static void pty_chr_update_read_handler_locked(CharDriverState *chr) { PtyCharDriver *s = chr->opaque; GPollFD pfd; + int rc; pfd.fd = g_io_channel_unix_get_fd(s->fd); pfd.events = G_IO_OUT; pfd.revents = 0; - g_poll(&pfd, 1, 0); + do { + rc = g_poll(&pfd, 1, 0); + } while (rc == -1 && errno == EINTR); + assert(rc >= 0); + if (pfd.revents & G_IO_HUP) { pty_chr_state(chr, 0); } else { @@ -2147,7 +2152,7 @@ static CharDriverState *qemu_chr_open_pipe(const char *id, ChardevReturn *ret, Error **errp) { - ChardevHostdev *opts = backend->pipe; + ChardevHostdev *opts = backend->u.pipe; const char *filename = opts->device; CharDriverState *chr; WinCharState *s; @@ -3202,7 +3207,7 @@ static CharDriverState *qemu_chr_open_ringbuf(const char *id, ChardevReturn *ret, Error **errp) { - ChardevRingbuf *opts = backend->ringbuf; + ChardevRingbuf *opts = backend->u.ringbuf; CharDriverState *chr; RingBufCharDriver *d; @@ -3477,16 +3482,16 @@ static void qemu_chr_parse_file_out(QemuOpts *opts, ChardevBackend *backend, error_setg(errp, "chardev: file: no filename given"); return; } - backend->file = g_new0(ChardevFile, 1); - backend->file->out = g_strdup(path); + backend->u.file = g_new0(ChardevFile, 1); + backend->u.file->out = g_strdup(path); } static void qemu_chr_parse_stdio(QemuOpts *opts, ChardevBackend *backend, Error **errp) { - backend->stdio = g_new0(ChardevStdio, 1); - backend->stdio->has_signal = true; - backend->stdio->signal = qemu_opt_get_bool(opts, "signal", true); + backend->u.stdio = g_new0(ChardevStdio, 1); + backend->u.stdio->has_signal = true; + backend->u.stdio->signal = qemu_opt_get_bool(opts, "signal", true); } #ifdef HAVE_CHARDEV_SERIAL @@ -3499,8 +3504,8 @@ static void qemu_chr_parse_serial(QemuOpts *opts, ChardevBackend *backend, error_setg(errp, "chardev: serial/tty: no device path given"); return; } - backend->serial = g_new0(ChardevHostdev, 1); - backend->serial->device = g_strdup(device); + backend->u.serial = g_new0(ChardevHostdev, 1); + backend->u.serial->device = g_strdup(device); } #endif @@ -3514,8 +3519,8 @@ static void qemu_chr_parse_parallel(QemuOpts *opts, ChardevBackend *backend, error_setg(errp, "chardev: parallel: no device path given"); return; } - backend->parallel = g_new0(ChardevHostdev, 1); - backend->parallel->device = g_strdup(device); + backend->u.parallel = g_new0(ChardevHostdev, 1); + backend->u.parallel->device = g_strdup(device); } #endif @@ -3528,8 +3533,8 @@ static void qemu_chr_parse_pipe(QemuOpts *opts, ChardevBackend *backend, error_setg(errp, "chardev: pipe: no device path given"); return; } - backend->pipe = g_new0(ChardevHostdev, 1); - backend->pipe->device = g_strdup(device); + backend->u.pipe = g_new0(ChardevHostdev, 1); + backend->u.pipe->device = g_strdup(device); } static void qemu_chr_parse_ringbuf(QemuOpts *opts, ChardevBackend *backend, @@ -3537,12 +3542,12 @@ static void qemu_chr_parse_ringbuf(QemuOpts *opts, ChardevBackend *backend, { int val; - backend->ringbuf = g_new0(ChardevRingbuf, 1); + backend->u.ringbuf = g_new0(ChardevRingbuf, 1); val = qemu_opt_get_size(opts, "size", 0); if (val != 0) { - backend->ringbuf->has_size = true; - backend->ringbuf->size = val; + backend->u.ringbuf->has_size = true; + backend->u.ringbuf->size = val; } } @@ -3555,8 +3560,8 @@ static void qemu_chr_parse_mux(QemuOpts *opts, ChardevBackend *backend, error_setg(errp, "chardev: mux: no chardev given"); return; } - backend->mux = g_new0(ChardevMux, 1); - backend->mux->chardev = g_strdup(chardev); + backend->u.mux = g_new0(ChardevMux, 1); + backend->u.mux->chardev = g_strdup(chardev); } static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, @@ -3583,37 +3588,37 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, } } - backend->socket = g_new0(ChardevSocket, 1); + backend->u.socket = g_new0(ChardevSocket, 1); - backend->socket->has_nodelay = true; - backend->socket->nodelay = do_nodelay; - backend->socket->has_server = true; - backend->socket->server = is_listen; - backend->socket->has_telnet = true; - backend->socket->telnet = is_telnet; - backend->socket->has_wait = true; - backend->socket->wait = is_waitconnect; - backend->socket->has_reconnect = true; - backend->socket->reconnect = reconnect; + backend->u.socket->has_nodelay = true; + backend->u.socket->nodelay = do_nodelay; + backend->u.socket->has_server = true; + backend->u.socket->server = is_listen; + backend->u.socket->has_telnet = true; + backend->u.socket->telnet = is_telnet; + backend->u.socket->has_wait = true; + backend->u.socket->wait = is_waitconnect; + backend->u.socket->has_reconnect = true; + backend->u.socket->reconnect = reconnect; addr = g_new0(SocketAddress, 1); if (path) { - addr->kind = SOCKET_ADDRESS_KIND_UNIX; - addr->q_unix = g_new0(UnixSocketAddress, 1); - addr->q_unix->path = g_strdup(path); + addr->type = SOCKET_ADDRESS_KIND_UNIX; + addr->u.q_unix = g_new0(UnixSocketAddress, 1); + addr->u.q_unix->path = g_strdup(path); } else { - addr->kind = SOCKET_ADDRESS_KIND_INET; - addr->inet = g_new0(InetSocketAddress, 1); - addr->inet->host = g_strdup(host); - addr->inet->port = g_strdup(port); - addr->inet->has_to = qemu_opt_get(opts, "to"); - addr->inet->to = qemu_opt_get_number(opts, "to", 0); - addr->inet->has_ipv4 = qemu_opt_get(opts, "ipv4"); - addr->inet->ipv4 = qemu_opt_get_bool(opts, "ipv4", 0); - addr->inet->has_ipv6 = qemu_opt_get(opts, "ipv6"); - addr->inet->ipv6 = qemu_opt_get_bool(opts, "ipv6", 0); + addr->type = SOCKET_ADDRESS_KIND_INET; + addr->u.inet = g_new0(InetSocketAddress, 1); + addr->u.inet->host = g_strdup(host); + addr->u.inet->port = g_strdup(port); + addr->u.inet->has_to = qemu_opt_get(opts, "to"); + addr->u.inet->to = qemu_opt_get_number(opts, "to", 0); + addr->u.inet->has_ipv4 = qemu_opt_get(opts, "ipv4"); + addr->u.inet->ipv4 = qemu_opt_get_bool(opts, "ipv4", 0); + addr->u.inet->has_ipv6 = qemu_opt_get(opts, "ipv6"); + addr->u.inet->ipv6 = qemu_opt_get_bool(opts, "ipv6", 0); } - backend->socket->addr = addr; + backend->u.socket->addr = addr; } static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend, @@ -3644,27 +3649,27 @@ static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend, has_local = true; } - backend->udp = g_new0(ChardevUdp, 1); + backend->u.udp = g_new0(ChardevUdp, 1); addr = g_new0(SocketAddress, 1); - addr->kind = SOCKET_ADDRESS_KIND_INET; - addr->inet = g_new0(InetSocketAddress, 1); - addr->inet->host = g_strdup(host); - addr->inet->port = g_strdup(port); - addr->inet->has_ipv4 = qemu_opt_get(opts, "ipv4"); - addr->inet->ipv4 = qemu_opt_get_bool(opts, "ipv4", 0); - addr->inet->has_ipv6 = qemu_opt_get(opts, "ipv6"); - addr->inet->ipv6 = qemu_opt_get_bool(opts, "ipv6", 0); - backend->udp->remote = addr; + addr->type = SOCKET_ADDRESS_KIND_INET; + addr->u.inet = g_new0(InetSocketAddress, 1); + addr->u.inet->host = g_strdup(host); + addr->u.inet->port = g_strdup(port); + addr->u.inet->has_ipv4 = qemu_opt_get(opts, "ipv4"); + addr->u.inet->ipv4 = qemu_opt_get_bool(opts, "ipv4", 0); + addr->u.inet->has_ipv6 = qemu_opt_get(opts, "ipv6"); + addr->u.inet->ipv6 = qemu_opt_get_bool(opts, "ipv6", 0); + backend->u.udp->remote = addr; if (has_local) { - backend->udp->has_local = true; + backend->u.udp->has_local = true; addr = g_new0(SocketAddress, 1); - addr->kind = SOCKET_ADDRESS_KIND_INET; - addr->inet = g_new0(InetSocketAddress, 1); - addr->inet->host = g_strdup(localaddr); - addr->inet->port = g_strdup(localport); - backend->udp->local = addr; + addr->type = SOCKET_ADDRESS_KIND_INET; + addr->u.inet = g_new0(InetSocketAddress, 1); + addr->u.inet->host = g_strdup(localaddr); + addr->u.inet->port = g_strdup(localport); + backend->u.udp->local = addr; } } @@ -3737,7 +3742,7 @@ CharDriverState *qemu_chr_new_from_opts(QemuOpts *opts, } chr = NULL; - backend->kind = cd->kind; + backend->type = cd->kind; if (cd->parse) { cd->parse(opts, backend, &local_err); if (local_err) { @@ -3754,9 +3759,9 @@ CharDriverState *qemu_chr_new_from_opts(QemuOpts *opts, qapi_free_ChardevBackend(backend); qapi_free_ChardevReturn(ret); backend = g_new0(ChardevBackend, 1); - backend->mux = g_new0(ChardevMux, 1); - backend->kind = CHARDEV_BACKEND_KIND_MUX; - backend->mux->chardev = g_strdup(bid); + backend->u.mux = g_new0(ChardevMux, 1); + backend->type = CHARDEV_BACKEND_KIND_MUX; + backend->u.mux->chardev = g_strdup(bid); ret = qmp_chardev_add(id, backend, errp); if (!ret) { chr = qemu_chr_find(bid); @@ -4048,7 +4053,7 @@ static CharDriverState *qmp_chardev_open_file(const char *id, ChardevReturn *ret, Error **errp) { - ChardevFile *file = backend->file; + ChardevFile *file = backend->u.file; HANDLE out; if (file->has_in) { @@ -4070,7 +4075,7 @@ static CharDriverState *qmp_chardev_open_serial(const char *id, ChardevReturn *ret, Error **errp) { - ChardevHostdev *serial = backend->serial; + ChardevHostdev *serial = backend->u.serial; return qemu_chr_open_win_path(serial->device, errp); } @@ -4093,7 +4098,7 @@ static CharDriverState *qmp_chardev_open_file(const char *id, ChardevReturn *ret, Error **errp) { - ChardevFile *file = backend->file; + ChardevFile *file = backend->u.file; int flags, in = -1, out; flags = O_WRONLY | O_TRUNC | O_CREAT | O_BINARY; @@ -4120,7 +4125,7 @@ static CharDriverState *qmp_chardev_open_serial(const char *id, ChardevReturn *ret, Error **errp) { - ChardevHostdev *serial = backend->serial; + ChardevHostdev *serial = backend->u.serial; int fd; fd = qmp_chardev_open_file_source(serial->device, O_RDWR, errp); @@ -4138,7 +4143,7 @@ static CharDriverState *qmp_chardev_open_parallel(const char *id, ChardevReturn *ret, Error **errp) { - ChardevHostdev *parallel = backend->parallel; + ChardevHostdev *parallel = backend->u.parallel; int fd; fd = qmp_chardev_open_file_source(parallel->device, O_RDWR, errp); @@ -4183,7 +4188,7 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, { CharDriverState *chr; TCPCharDriver *s; - ChardevSocket *sock = backend->socket; + ChardevSocket *sock = backend->u.socket; SocketAddress *addr = sock->addr; bool do_nodelay = sock->has_nodelay ? sock->nodelay : false; bool is_listen = sock->has_server ? sock->server : true; @@ -4196,7 +4201,7 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, s->fd = -1; s->listen_fd = -1; - s->is_unix = addr->kind == SOCKET_ADDRESS_KIND_UNIX; + s->is_unix = addr->type == SOCKET_ADDRESS_KIND_UNIX; s->is_listen = is_listen; s->is_telnet = is_telnet; s->do_nodelay = do_nodelay; @@ -4250,7 +4255,7 @@ static CharDriverState *qmp_chardev_open_udp(const char *id, ChardevReturn *ret, Error **errp) { - ChardevUdp *udp = backend->udp; + ChardevUdp *udp = backend->u.udp; int fd; fd = socket_dgram(udp->remote, udp->local, errp); @@ -4279,7 +4284,7 @@ ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend, for (i = backends; i; i = i->next) { cd = i->data; - if (cd->kind == backend->kind) { + if (cd->kind == backend->type) { chr = cd->create(id, backend, ret, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -4297,9 +4302,9 @@ ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend, chr->label = g_strdup(id); chr->avail_connections = - (backend->kind == CHARDEV_BACKEND_KIND_MUX) ? MAX_MUX : 1; + (backend->type == CHARDEV_BACKEND_KIND_MUX) ? MAX_MUX : 1; if (!chr->filename) { - chr->filename = g_strdup(ChardevBackendKind_lookup[backend->kind]); + chr->filename = g_strdup(ChardevBackendKind_lookup[backend->type]); } if (!chr->explicit_be_open) { qemu_chr_be_event(chr, CHR_EVENT_OPENED); diff --git a/qemu-doc.texi b/qemu-doc.texi index 3126abdcd3..ffc3e50abb 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -1256,7 +1256,7 @@ zero-copy communication to the application level of the guests. The basic syntax is: @example -qemu-system-i386 -device ivshmem,size=<size in format accepted by -m>[,shm=<shm name>] +qemu-system-i386 -device ivshmem,size=@var{size},shm=@var{shm-name} @end example If desired, interrupts can be sent between guest VMs accessing the same shared @@ -1267,12 +1267,12 @@ memory server is: @example # First start the ivshmem server once and for all -ivshmem-server -p <pidfile> -S <path> -m <shm name> -l <shm size> -n <vectors n> +ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors} # Then start your qemu instances with matching arguments -qemu-system-i386 -device ivshmem,size=<shm size>,vectors=<vectors n>,chardev=<id> +qemu-system-i386 -device ivshmem,size=@var{shm-size},vectors=@var{vectors},chardev=@var{id} [,msi=on][,ioeventfd=on][,role=peer|master] - -chardev socket,path=<path>,id=<id> + -chardev socket,path=@var{path},id=@var{id} @end example When using the server, the guest will be assigned a VM ID (>=0) that allows guests @@ -1299,8 +1299,8 @@ Instead of specifying the <shm size> using POSIX shm, you may specify a memory backend that has hugepage support: @example -qemu-system-i386 -object memory-backend-file,size=1G,mem-path=/mnt/hugepages,id=mb1 - -device ivshmem,memdev=mb1 +qemu-system-i386 -object memory-backend-file,size=1G,mem-path=/mnt/hugepages/my-shmem-file,id=mb1 + -device ivshmem,x-memdev=mb1 @end example ivshmem-server also supports hugepages mount points with the diff --git a/qemu-img.c b/qemu-img.c index 3025776e14..033011c4e7 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -645,9 +645,6 @@ static void common_block_job_cb(void *opaque, int ret) if (ret < 0) { error_setg_errno(cbi->errp, -ret, "Block job failed"); } - - /* Drop this block job's reference */ - bdrv_unref(cbi->bs); } static void run_block_job(BlockJob *job, Error **errp) @@ -656,7 +653,8 @@ static void run_block_job(BlockJob *job, Error **errp) do { aio_poll(aio_context, true); - qemu_progress_print((float)job->offset / job->len * 100.f, 0); + qemu_progress_print(job->len ? + ((float)job->offset / job->len * 100.f) : 0.0f, 0); } while (!job->ready); block_job_complete_sync(job, errp); diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 6e5d1e4d38..18fc2bdc10 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -136,7 +136,29 @@ static char **breakline(char *input, int *count) static int64_t cvtnum(const char *s) { char *end; - return qemu_strtosz_suffix(s, &end, QEMU_STRTOSZ_DEFSUFFIX_B); + int64_t ret; + + ret = qemu_strtosz_suffix(s, &end, QEMU_STRTOSZ_DEFSUFFIX_B); + if (*end != '\0') { + /* Detritus at the end of the string */ + return -EINVAL; + } + return ret; +} + +static void print_cvtnum_err(int64_t rc, const char *arg) +{ + switch (rc) { + case -EINVAL: + printf("Parsing error: non-numeric argument," + " or extraneous/unrecognized suffix -- %s\n", arg); + break; + case -ERANGE: + printf("Parsing error: argument too large -- %s\n", arg); + break; + default: + printf("Parsing error: %s\n", arg); + } } #define EXABYTES(x) ((long long)(x) << 60) @@ -294,9 +316,10 @@ static void qemu_io_free(void *p) qemu_vfree(p); } -static void dump_buffer(const void *buffer, int64_t offset, int len) +static void dump_buffer(const void *buffer, int64_t offset, int64_t len) { - int i, j; + uint64_t i; + int j; const uint8_t *p; for (i = 0, p = buffer; i < len; i += 16) { @@ -319,7 +342,7 @@ static void dump_buffer(const void *buffer, int64_t offset, int len) } static void print_report(const char *op, struct timeval *t, int64_t offset, - int count, int total, int cnt, int Cflag) + int64_t count, int64_t total, int cnt, int Cflag) { char s1[64], s2[64], ts[64]; @@ -327,12 +350,12 @@ static void print_report(const char *op, struct timeval *t, int64_t offset, if (!Cflag) { cvtstr((double)total, s1, sizeof(s1)); cvtstr(tdiv((double)total, *t), s2, sizeof(s2)); - printf("%s %d/%d bytes at offset %" PRId64 "\n", + printf("%s %"PRId64"/%"PRId64" bytes at offset %" PRId64 "\n", op, total, count, offset); printf("%s, %d ops; %s (%s/sec and %.4f ops/sec)\n", s1, cnt, ts, s2, tdiv((double)cnt, *t)); } else {/* bytes,ops,time,bytes/sec,ops/sec */ - printf("%d,%d,%s,%.3f,%.3f\n", + printf("%"PRId64",%d,%s,%.3f,%.3f\n", total, cnt, ts, tdiv((double)total, *t), tdiv((double)cnt, *t)); @@ -359,13 +382,13 @@ create_iovec(BlockBackend *blk, QEMUIOVector *qiov, char **argv, int nr_iov, len = cvtnum(arg); if (len < 0) { - printf("non-numeric length argument -- %s\n", arg); + print_cvtnum_err(len, arg); goto fail; } /* should be SIZE_T_MAX, but that doesn't exist */ if (len > INT_MAX) { - printf("too large length argument -- %s\n", arg); + printf("Argument '%s' exceeds maximum size %d\n", arg, INT_MAX); goto fail; } @@ -393,11 +416,15 @@ fail: return buf; } -static int do_read(BlockBackend *blk, char *buf, int64_t offset, int count, - int *total) +static int do_read(BlockBackend *blk, char *buf, int64_t offset, int64_t count, + int64_t *total) { int ret; + if (count >> 9 > INT_MAX) { + return -ERANGE; + } + ret = blk_read(blk, offset >> 9, (uint8_t *)buf, count >> 9); if (ret < 0) { return ret; @@ -406,11 +433,15 @@ static int do_read(BlockBackend *blk, char *buf, int64_t offset, int count, return 1; } -static int do_write(BlockBackend *blk, char *buf, int64_t offset, int count, - int *total) +static int do_write(BlockBackend *blk, char *buf, int64_t offset, int64_t count, + int64_t *total) { int ret; + if (count >> 9 > INT_MAX) { + return -ERANGE; + } + ret = blk_write(blk, offset >> 9, (uint8_t *)buf, count >> 9); if (ret < 0) { return ret; @@ -419,9 +450,13 @@ static int do_write(BlockBackend *blk, char *buf, int64_t offset, int count, return 1; } -static int do_pread(BlockBackend *blk, char *buf, int64_t offset, int count, - int *total) +static int do_pread(BlockBackend *blk, char *buf, int64_t offset, + int64_t count, int64_t *total) { + if (count > INT_MAX) { + return -ERANGE; + } + *total = blk_pread(blk, offset, (uint8_t *)buf, count); if (*total < 0) { return *total; @@ -429,9 +464,13 @@ static int do_pread(BlockBackend *blk, char *buf, int64_t offset, int count, return 1; } -static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset, int count, - int *total) +static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset, + int64_t count, int64_t *total) { + if (count > INT_MAX) { + return -ERANGE; + } + *total = blk_pwrite(blk, offset, (uint8_t *)buf, count); if (*total < 0) { return *total; @@ -442,8 +481,8 @@ static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset, int count, typedef struct { BlockBackend *blk; int64_t offset; - int count; - int *total; + int64_t count; + int64_t *total; int ret; bool done; } CoWriteZeroes; @@ -463,8 +502,8 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque) *data->total = data->count; } -static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int count, - int *total) +static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int64_t count, + int64_t *total) { Coroutine *co; CoWriteZeroes data = { @@ -475,6 +514,10 @@ static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int count, .done = false, }; + if (count >> BDRV_SECTOR_BITS > INT_MAX) { + return -ERANGE; + } + co = qemu_coroutine_create(co_write_zeroes_entry); qemu_coroutine_enter(co, &data); while (!data.done) { @@ -488,10 +531,14 @@ static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int count, } static int do_write_compressed(BlockBackend *blk, char *buf, int64_t offset, - int count, int *total) + int64_t count, int64_t *total) { int ret; + if (count >> 9 > INT_MAX) { + return -ERANGE; + } + ret = blk_write_compressed(blk, offset >> 9, (uint8_t *)buf, count >> 9); if (ret < 0) { return ret; @@ -501,8 +548,12 @@ static int do_write_compressed(BlockBackend *blk, char *buf, int64_t offset, } static int do_load_vmstate(BlockBackend *blk, char *buf, int64_t offset, - int count, int *total) + int64_t count, int64_t *total) { + if (count > INT_MAX) { + return -ERANGE; + } + *total = blk_load_vmstate(blk, (uint8_t *)buf, offset, count); if (*total < 0) { return *total; @@ -511,8 +562,12 @@ static int do_load_vmstate(BlockBackend *blk, char *buf, int64_t offset, } static int do_save_vmstate(BlockBackend *blk, char *buf, int64_t offset, - int count, int *total) + int64_t count, int64_t *total) { + if (count > INT_MAX) { + return -ERANGE; + } + *total = blk_save_vmstate(blk, (uint8_t *)buf, offset, count); if (*total < 0) { return *total; @@ -642,10 +697,11 @@ static int read_f(BlockBackend *blk, int argc, char **argv) int c, cnt; char *buf; int64_t offset; - int count; + int64_t count; /* Some compilers get confused and warn if this is not initialized. */ - int total = 0; - int pattern = 0, pattern_offset = 0, pattern_count = 0; + int64_t total = 0; + int pattern = 0; + int64_t pattern_offset = 0, pattern_count = 0; while ((c = getopt(argc, argv, "bCl:pP:qs:v")) != -1) { switch (c) { @@ -659,7 +715,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv) lflag = 1; pattern_count = cvtnum(optarg); if (pattern_count < 0) { - printf("non-numeric length argument -- %s\n", optarg); + print_cvtnum_err(pattern_count, optarg); return 0; } break; @@ -680,7 +736,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv) sflag = 1; pattern_offset = cvtnum(optarg); if (pattern_offset < 0) { - printf("non-numeric length argument -- %s\n", optarg); + print_cvtnum_err(pattern_offset, optarg); return 0; } break; @@ -703,14 +759,18 @@ static int read_f(BlockBackend *blk, int argc, char **argv) offset = cvtnum(argv[optind]); if (offset < 0) { - printf("non-numeric length argument -- %s\n", argv[optind]); + print_cvtnum_err(offset, argv[optind]); return 0; } optind++; count = cvtnum(argv[optind]); if (count < 0) { - printf("non-numeric length argument -- %s\n", argv[optind]); + print_cvtnum_err(count, argv[optind]); + return 0; + } else if (count > SIZE_MAX) { + printf("length cannot exceed %" PRIu64 ", given %s\n", + (uint64_t) SIZE_MAX, argv[optind]); return 0; } @@ -734,7 +794,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv) return 0; } if (count & 0x1ff) { - printf("count %d is not sector aligned\n", + printf("count %"PRId64" is not sector aligned\n", count); return 0; } @@ -762,7 +822,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv) memset(cmp_buf, pattern, pattern_count); if (memcmp(buf + pattern_offset, cmp_buf, pattern_count)) { printf("Pattern verification failed at offset %" - PRId64 ", %d bytes\n", + PRId64 ", %"PRId64" bytes\n", offset + pattern_offset, pattern_count); } g_free(cmp_buf); @@ -861,7 +921,7 @@ static int readv_f(BlockBackend *blk, int argc, char **argv) offset = cvtnum(argv[optind]); if (offset < 0) { - printf("non-numeric length argument -- %s\n", argv[optind]); + print_cvtnum_err(offset, argv[optind]); return 0; } optind++; @@ -957,9 +1017,9 @@ static int write_f(BlockBackend *blk, int argc, char **argv) int c, cnt; char *buf = NULL; int64_t offset; - int count; + int64_t count; /* Some compilers get confused and warn if this is not initialized. */ - int total = 0; + int64_t total = 0; int pattern = 0xcd; while ((c = getopt(argc, argv, "bcCpP:qz")) != -1) { @@ -1010,14 +1070,18 @@ static int write_f(BlockBackend *blk, int argc, char **argv) offset = cvtnum(argv[optind]); if (offset < 0) { - printf("non-numeric length argument -- %s\n", argv[optind]); + print_cvtnum_err(offset, argv[optind]); return 0; } optind++; count = cvtnum(argv[optind]); if (count < 0) { - printf("non-numeric length argument -- %s\n", argv[optind]); + print_cvtnum_err(count, argv[optind]); + return 0; + } else if (count > SIZE_MAX) { + printf("length cannot exceed %" PRIu64 ", given %s\n", + (uint64_t) SIZE_MAX, argv[optind]); return 0; } @@ -1029,7 +1093,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv) } if (count & 0x1ff) { - printf("count %d is not sector aligned\n", + printf("count %"PRId64" is not sector aligned\n", count); return 0; } @@ -1142,7 +1206,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv) offset = cvtnum(argv[optind]); if (offset < 0) { - printf("non-numeric length argument -- %s\n", argv[optind]); + print_cvtnum_err(offset, argv[optind]); return 0; } optind++; @@ -1269,7 +1333,7 @@ static int multiwrite_f(BlockBackend *blk, int argc, char **argv) /* Read the offset of the request */ offset = cvtnum(argv[optind]); if (offset < 0) { - printf("non-numeric offset argument -- %s\n", argv[optind]); + print_cvtnum_err(offset, argv[optind]); goto out; } optind++; @@ -1364,6 +1428,7 @@ static void aio_write_done(void *opaque, int ret) if (ret < 0) { printf("aio_write failed: %s\n", strerror(-ret)); + block_acct_failed(blk_get_stats(ctx->blk), &ctx->acct); goto out; } @@ -1392,6 +1457,7 @@ static void aio_read_done(void *opaque, int ret) if (ret < 0) { printf("readv failed: %s\n", strerror(-ret)); + block_acct_failed(blk_get_stats(ctx->blk), &ctx->acct); goto out; } @@ -1496,7 +1562,7 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv) ctx->offset = cvtnum(argv[optind]); if (ctx->offset < 0) { - printf("non-numeric length argument -- %s\n", argv[optind]); + print_cvtnum_err(ctx->offset, argv[optind]); g_free(ctx); return 0; } @@ -1505,6 +1571,7 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv) if (ctx->offset & 0x1ff) { printf("offset %" PRId64 " is not sector aligned\n", ctx->offset); + block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ); g_free(ctx); return 0; } @@ -1512,6 +1579,7 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv) nr_iov = argc - optind; ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, 0xab); if (ctx->buf == NULL) { + block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ); g_free(ctx); return 0; } @@ -1591,7 +1659,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv) ctx->offset = cvtnum(argv[optind]); if (ctx->offset < 0) { - printf("non-numeric length argument -- %s\n", argv[optind]); + print_cvtnum_err(ctx->offset, argv[optind]); g_free(ctx); return 0; } @@ -1600,6 +1668,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv) if (ctx->offset & 0x1ff) { printf("offset %" PRId64 " is not sector aligned\n", ctx->offset); + block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE); g_free(ctx); return 0; } @@ -1607,6 +1676,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv) nr_iov = argc - optind; ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, pattern); if (ctx->buf == NULL) { + block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE); g_free(ctx); return 0; } @@ -1621,7 +1691,10 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv) static int aio_flush_f(BlockBackend *blk, int argc, char **argv) { + BlockAcctCookie cookie; + block_acct_start(blk_get_stats(blk), &cookie, 0, BLOCK_ACCT_FLUSH); blk_drain_all(); + block_acct_done(blk_get_stats(blk), &cookie); return 0; } @@ -1651,7 +1724,7 @@ static int truncate_f(BlockBackend *blk, int argc, char **argv) offset = cvtnum(argv[1]); if (offset < 0) { - printf("non-numeric truncate argument -- %s\n", argv[1]); + print_cvtnum_err(offset, argv[1]); return 0; } @@ -1777,8 +1850,7 @@ static int discard_f(BlockBackend *blk, int argc, char **argv) struct timeval t1, t2; int Cflag = 0, qflag = 0; int c, ret; - int64_t offset; - int count; + int64_t offset, count; while ((c = getopt(argc, argv, "Cq")) != -1) { switch (c) { @@ -1799,14 +1871,19 @@ static int discard_f(BlockBackend *blk, int argc, char **argv) offset = cvtnum(argv[optind]); if (offset < 0) { - printf("non-numeric length argument -- %s\n", argv[optind]); + print_cvtnum_err(offset, argv[optind]); return 0; } optind++; count = cvtnum(argv[optind]); if (count < 0) { - printf("non-numeric length argument -- %s\n", argv[optind]); + print_cvtnum_err(count, argv[optind]); + return 0; + } else if (count >> BDRV_SECTOR_BITS > INT_MAX) { + printf("length cannot exceed %"PRIu64", given %s\n", + (uint64_t)INT_MAX << BDRV_SECTOR_BITS, + argv[optind]); return 0; } @@ -1833,15 +1910,14 @@ out: static int alloc_f(BlockBackend *blk, int argc, char **argv) { BlockDriverState *bs = blk_bs(blk); - int64_t offset, sector_num; - int nb_sectors, remaining; + int64_t offset, sector_num, nb_sectors, remaining; char s1[64]; - int num, sum_alloc; - int ret; + int num, ret; + int64_t sum_alloc; offset = cvtnum(argv[1]); if (offset < 0) { - printf("non-numeric offset argument -- %s\n", argv[1]); + print_cvtnum_err(offset, argv[1]); return 0; } else if (offset & 0x1ff) { printf("offset %" PRId64 " is not sector aligned\n", @@ -1852,7 +1928,11 @@ static int alloc_f(BlockBackend *blk, int argc, char **argv) if (argc == 3) { nb_sectors = cvtnum(argv[2]); if (nb_sectors < 0) { - printf("non-numeric length argument -- %s\n", argv[2]); + print_cvtnum_err(nb_sectors, argv[2]); + return 0; + } else if (nb_sectors > INT_MAX) { + printf("length argument cannot exceed %d, given %s\n", + INT_MAX, argv[2]); return 0; } } else { @@ -1881,7 +1961,7 @@ static int alloc_f(BlockBackend *blk, int argc, char **argv) cvtstr(offset, s1, sizeof(s1)); - printf("%d/%d sectors allocated at offset %s\n", + printf("%"PRId64"/%"PRId64" sectors allocated at offset %s\n", sum_alloc, nb_sectors, s1); return 0; } @@ -2191,9 +2271,13 @@ static const cmdinfo_t sigraise_cmd = { static int sigraise_f(BlockBackend *blk, int argc, char **argv) { - int sig = cvtnum(argv[1]); + int64_t sig = cvtnum(argv[1]); if (sig < 0) { - printf("non-numeric signal number argument -- %s\n", argv[1]); + print_cvtnum_err(sig, argv[1]); + return 0; + } else if (sig > NSIG) { + printf("signal argument '%s' is too large to be a valid signal\n", + argv[1]); return 0; } diff --git a/qemu-log.c b/qemu-log.c index efd07c81ea..7cb01a802b 100644 --- a/qemu-log.c +++ b/qemu-log.c @@ -112,8 +112,6 @@ const QEMULogItem qemu_log_items[] = { "x86 only: show protected mode far calls/returns/exceptions" }, { CPU_LOG_RESET, "cpu_reset", "show CPU state before CPU resets" }, - { CPU_LOG_IOPORT, "ioport", - "show all i/o ports accesses" }, { LOG_UNIMP, "unimp", "log unimplemented functionality" }, { LOG_GUEST_ERROR, "guest_errors", diff --git a/qemu-nbd.c b/qemu-nbd.c index 422a607bdd..3afec76504 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -362,17 +362,17 @@ static SocketAddress *nbd_build_socket_address(const char *sockpath, saddr = g_new0(SocketAddress, 1); if (sockpath) { - saddr->kind = SOCKET_ADDRESS_KIND_UNIX; - saddr->q_unix = g_new0(UnixSocketAddress, 1); - saddr->q_unix->path = g_strdup(sockpath); + saddr->type = SOCKET_ADDRESS_KIND_UNIX; + saddr->u.q_unix = g_new0(UnixSocketAddress, 1); + saddr->u.q_unix->path = g_strdup(sockpath); } else { - saddr->kind = SOCKET_ADDRESS_KIND_INET; - saddr->inet = g_new0(InetSocketAddress, 1); - saddr->inet->host = g_strdup(bindto); + saddr->type = SOCKET_ADDRESS_KIND_INET; + saddr->u.inet = g_new0(InetSocketAddress, 1); + saddr->u.inet->host = g_strdup(bindto); if (port) { - saddr->inet->port = g_strdup(port); + saddr->u.inet->port = g_strdup(port); } else { - saddr->inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT); + saddr->u.inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT); } } diff --git a/qemu-options.hx b/qemu-options.hx index 949db7f2ea..0eea4ee9e9 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3157,12 +3157,12 @@ re-inject them. ETEXI DEF("icount", HAS_ARG, QEMU_OPTION_icount, \ - "-icount [shift=N|auto][,align=on|off][,sleep=no]\n" \ + "-icount [shift=N|auto][,align=on|off][,sleep=no,rr=record|replay,rrfile=<filename>]\n" \ " enable virtual instruction counter with 2^N clock ticks per\n" \ " instruction, enable aligning the host and virtual clocks\n" \ " or disable real time cpu sleeping\n", QEMU_ARCH_ALL) STEXI -@item -icount [shift=@var{N}|auto] +@item -icount [shift=@var{N}|auto][,rr=record|replay,rrfile=@var{filename}] @findex -icount Enable virtual instruction counter. The virtual cpu will execute one instruction every 2^@var{N} ns of virtual time. If @code{auto} is specified @@ -3191,6 +3191,10 @@ Currently this option does not work when @option{shift} is @code{auto}. Note: The sync algorithm will work for those shift values for which the guest clock runs ahead of the host clock. Typically this happens when the shift value is high (how high depends on the host machine). + +When @option{rr} option is specified deterministic record/replay is enabled. +Replay log is written into @var{filename} file in record mode and +read from this file in replay mode. ETEXI DEF("watchdog", HAS_ARG, QEMU_OPTION_watchdog, \ diff --git a/qemu-seccomp.c b/qemu-seccomp.c index 80d034a8d5..c831fe83ad 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -16,6 +16,14 @@ #include <seccomp.h> #include "sysemu/seccomp.h" +#if SCMP_VER_MAJOR >= 3 + #define HAVE_CACHEFLUSH +#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 3 + #define HAVE_CACHEFLUSH +#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 2 && SCMP_VER_MICRO >= 3 + #define HAVE_CACHEFLUSH +#endif + struct QemuSeccompSyscall { int32_t num; uint8_t priority; @@ -238,7 +246,10 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = { { SCMP_SYS(inotify_init1), 240 }, { SCMP_SYS(inotify_add_watch), 240 }, { SCMP_SYS(mbind), 240 }, - { SCMP_SYS(memfd_create), 240 } + { SCMP_SYS(memfd_create), 240 }, +#ifdef HAVE_CACHEFLUSH + { SCMP_SYS(cacheflush), 240 }, +#endif }; int seccomp_start(void) diff --git a/qemu-timer.c b/qemu-timer.c index 2463fe6f6a..f16e422837 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -24,6 +24,8 @@ #include "qemu/main-loop.h" #include "qemu/timer.h" +#include "sysemu/replay.h" +#include "sysemu/sysemu.h" #ifdef CONFIG_POSIX #include <pthread.h> @@ -477,10 +479,31 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) void *opaque; qemu_event_reset(&timer_list->timers_done_ev); - if (!timer_list->clock->enabled) { + if (!timer_list->clock->enabled || !timer_list->active_timers) { goto out; } + switch (timer_list->clock->type) { + case QEMU_CLOCK_REALTIME: + break; + default: + case QEMU_CLOCK_VIRTUAL: + if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) { + goto out; + } + break; + case QEMU_CLOCK_HOST: + if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) { + goto out; + } + break; + case QEMU_CLOCK_VIRTUAL_RT: + if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) { + goto out; + } + break; + } + current_time = qemu_clock_get_ns(timer_list->clock->type); for(;;) { qemu_mutex_lock(&timer_list->active_timers_lock); @@ -544,11 +567,17 @@ int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg) { int64_t deadline = -1; QEMUClockType type; + bool play = replay_mode == REPLAY_MODE_PLAY; for (type = 0; type < QEMU_CLOCK_MAX; type++) { - if (qemu_clock_use_for_deadline(tlg->tl[type]->clock->type)) { - deadline = qemu_soonest_timeout(deadline, - timerlist_deadline_ns( - tlg->tl[type])); + if (qemu_clock_use_for_deadline(type)) { + if (!play || type == QEMU_CLOCK_REALTIME) { + deadline = qemu_soonest_timeout(deadline, + timerlist_deadline_ns(tlg->tl[type])); + } else { + /* Read clock from the replay file and + do not calculate the deadline, based on virtual clock. */ + qemu_clock_get_ns(type); + } } } return deadline; @@ -570,7 +599,7 @@ int64_t qemu_clock_get_ns(QEMUClockType type) return cpu_get_clock(); } case QEMU_CLOCK_HOST: - now = get_clock_realtime(); + now = REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime()); last = clock->last; clock->last = now; if (now < last || now > (last + get_max_clock_jump())) { @@ -578,7 +607,7 @@ int64_t qemu_clock_get_ns(QEMUClockType type) } return now; case QEMU_CLOCK_VIRTUAL_RT: - return cpu_get_clock(); + return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock()); } } diff --git a/qga/commands-posix.c b/qga/commands-posix.c index 67a173af4f..c2ff97021f 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -28,6 +28,7 @@ #include "qapi/qmp/qerror.h" #include "qemu/queue.h" #include "qemu/host-utils.h" +#include "qemu/sockets.h" #ifndef CONFIG_HAS_ENVIRON #ifdef __APPLE__ @@ -215,9 +216,16 @@ void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp) } } +typedef enum { + RW_STATE_NEW, + RW_STATE_READING, + RW_STATE_WRITING, +} RwState; + typedef struct GuestFileHandle { uint64_t id; FILE *fh; + RwState state; QTAILQ_ENTRY(GuestFileHandle) next; } GuestFileHandle; @@ -385,27 +393,6 @@ safe_open_or_create(const char *path, const char *mode, Error **errp) return NULL; } -static int guest_file_toggle_flags(int fd, int flags, bool set, Error **err) -{ - int ret, old_flags; - - old_flags = fcntl(fd, F_GETFL); - if (old_flags == -1) { - error_setg_errno(err, errno, QERR_QGA_COMMAND_FAILED, - "failed to fetch filehandle flags"); - return -1; - } - - ret = fcntl(fd, F_SETFL, set ? (old_flags | flags) : (old_flags & ~flags)); - if (ret == -1) { - error_setg_errno(err, errno, QERR_QGA_COMMAND_FAILED, - "failed to set filehandle flags"); - return -1; - } - - return ret; -} - int64_t qmp_guest_file_open(const char *path, bool has_mode, const char *mode, Error **errp) { @@ -426,10 +413,7 @@ int64_t qmp_guest_file_open(const char *path, bool has_mode, const char *mode, /* set fd non-blocking to avoid common use cases (like reading from a * named pipe) from hanging the agent */ - if (guest_file_toggle_flags(fileno(fh), O_NONBLOCK, true, errp) < 0) { - fclose(fh); - return -1; - } + qemu_set_nonblock(fileno(fh)); handle = guest_file_handle_add(fh, errp); if (handle < 0) { @@ -483,6 +467,17 @@ struct GuestFileRead *qmp_guest_file_read(int64_t handle, bool has_count, } fh = gfh->fh; + + /* explicitly flush when switching from writing to reading */ + if (gfh->state == RW_STATE_WRITING) { + int ret = fflush(fh); + if (ret == EOF) { + error_setg_errno(errp, errno, "failed to flush file"); + return NULL; + } + gfh->state = RW_STATE_NEW; + } + buf = g_malloc0(count+1); read_count = fread(buf, 1, count, fh); if (ferror(fh)) { @@ -496,6 +491,7 @@ struct GuestFileRead *qmp_guest_file_read(int64_t handle, bool has_count, if (read_count) { read_data->buf_b64 = g_base64_encode(buf, read_count); } + gfh->state = RW_STATE_READING; } g_free(buf); clearerr(fh); @@ -519,6 +515,16 @@ GuestFileWrite *qmp_guest_file_write(int64_t handle, const char *buf_b64, } fh = gfh->fh; + + if (gfh->state == RW_STATE_READING) { + int ret = fseek(fh, 0, SEEK_CUR); + if (ret == -1) { + error_setg_errno(errp, errno, "failed to seek file"); + return NULL; + } + gfh->state = RW_STATE_NEW; + } + buf = g_base64_decode(buf_b64, &buf_len); if (!has_count) { @@ -538,6 +544,7 @@ GuestFileWrite *qmp_guest_file_write(int64_t handle, const char *buf_b64, write_data = g_new0(GuestFileWrite, 1); write_data->count = write_count; write_data->eof = feof(fh); + gfh->state = RW_STATE_WRITING; } g_free(buf); clearerr(fh); @@ -546,25 +553,47 @@ GuestFileWrite *qmp_guest_file_write(int64_t handle, const char *buf_b64, } struct GuestFileSeek *qmp_guest_file_seek(int64_t handle, int64_t offset, - int64_t whence, Error **errp) + int64_t whence_code, Error **errp) { GuestFileHandle *gfh = guest_file_handle_find(handle, errp); GuestFileSeek *seek_data = NULL; FILE *fh; int ret; + int whence; if (!gfh) { return NULL; } + /* We stupidly exposed 'whence':'int' in our qapi */ + switch (whence_code) { + case QGA_SEEK_SET: + whence = SEEK_SET; + break; + case QGA_SEEK_CUR: + whence = SEEK_CUR; + break; + case QGA_SEEK_END: + whence = SEEK_END; + break; + default: + error_setg(errp, "invalid whence code %"PRId64, whence_code); + return NULL; + } + fh = gfh->fh; ret = fseek(fh, offset, whence); if (ret == -1) { error_setg_errno(errp, errno, "failed to seek file"); + if (errno == ESPIPE) { + /* file is non-seekable, stdio shouldn't be buffering anyways */ + gfh->state = RW_STATE_NEW; + } } else { seek_data = g_new0(GuestFileSeek, 1); seek_data->position = ftell(fh); seek_data->eof = feof(fh); + gfh->state = RW_STATE_NEW; } clearerr(fh); @@ -585,6 +614,8 @@ void qmp_guest_file_flush(int64_t handle, Error **errp) ret = fflush(fh); if (ret == EOF) { error_setg_errno(errp, errno, "failed to flush file"); + } else { + gfh->state = RW_STATE_NEW; } } diff --git a/qga/commands-win32.c b/qga/commands-win32.c index d9de23bbb8..0654fe4fe7 100644 --- a/qga/commands-win32.c +++ b/qga/commands-win32.c @@ -59,6 +59,7 @@ static struct { .filehandles = QTAILQ_HEAD_INITIALIZER(guest_file_state.filehandles), }; +#define FILE_GENERIC_APPEND (FILE_GENERIC_WRITE & ~FILE_WRITE_DATA) typedef struct OpenFlags { const char *forms; @@ -66,20 +67,20 @@ typedef struct OpenFlags { DWORD creation_disposition; } OpenFlags; static OpenFlags guest_file_open_modes[] = { - {"r", GENERIC_READ, OPEN_EXISTING}, - {"rb", GENERIC_READ, OPEN_EXISTING}, - {"w", GENERIC_WRITE, CREATE_ALWAYS}, - {"wb", GENERIC_WRITE, CREATE_ALWAYS}, - {"a", GENERIC_WRITE, OPEN_ALWAYS }, - {"r+", GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING}, - {"rb+", GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING}, - {"r+b", GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING}, - {"w+", GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS}, - {"wb+", GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS}, - {"w+b", GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS}, - {"a+", GENERIC_WRITE|GENERIC_READ, OPEN_ALWAYS }, - {"ab+", GENERIC_WRITE|GENERIC_READ, OPEN_ALWAYS }, - {"a+b", GENERIC_WRITE|GENERIC_READ, OPEN_ALWAYS } + {"r", GENERIC_READ, OPEN_EXISTING}, + {"rb", GENERIC_READ, OPEN_EXISTING}, + {"w", GENERIC_WRITE, CREATE_ALWAYS}, + {"wb", GENERIC_WRITE, CREATE_ALWAYS}, + {"a", FILE_GENERIC_APPEND, OPEN_ALWAYS }, + {"r+", GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING}, + {"rb+", GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING}, + {"r+b", GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING}, + {"w+", GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS}, + {"wb+", GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS}, + {"w+b", GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS}, + {"a+", FILE_GENERIC_APPEND|GENERIC_READ, OPEN_ALWAYS }, + {"ab+", FILE_GENERIC_APPEND|GENERIC_READ, OPEN_ALWAYS }, + {"a+b", FILE_GENERIC_APPEND|GENERIC_READ, OPEN_ALWAYS } }; static OpenFlags *find_open_flag(const char *mode_str) @@ -128,6 +129,28 @@ static GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp) return NULL; } +static void handle_set_nonblocking(HANDLE fh) +{ + DWORD file_type, pipe_state; + file_type = GetFileType(fh); + if (file_type != FILE_TYPE_PIPE) { + return; + } + /* If file_type == FILE_TYPE_PIPE, according to MSDN + * the specified file is socket or named pipe */ + if (!GetNamedPipeHandleState(fh, &pipe_state, NULL, + NULL, NULL, NULL, 0)) { + return; + } + /* The fd is named pipe fd */ + if (pipe_state & PIPE_NOWAIT) { + return; + } + + pipe_state |= PIPE_NOWAIT; + SetNamedPipeHandleState(fh, &pipe_state, NULL, NULL); +} + int64_t qmp_guest_file_open(const char *path, bool has_mode, const char *mode, Error **errp) { @@ -158,9 +181,14 @@ int64_t qmp_guest_file_open(const char *path, bool has_mode, return -1; } + /* set fd non-blocking to avoid common use cases (like reading from a + * named pipe) from hanging the agent + */ + handle_set_nonblocking(fh); + fd = guest_file_handle_add(fh, errp); if (fd < 0) { - CloseHandle(&fh); + CloseHandle(fh); error_setg(errp, "failed to add handle to qmp handle table"); return -1; } @@ -354,7 +382,7 @@ done: } GuestFileSeek *qmp_guest_file_seek(int64_t handle, int64_t offset, - int64_t whence, Error **errp) + int64_t whence_code, Error **errp) { GuestFileHandle *gfh; GuestFileSeek *seek_data; @@ -362,11 +390,29 @@ GuestFileSeek *qmp_guest_file_seek(int64_t handle, int64_t offset, LARGE_INTEGER new_pos, off_pos; off_pos.QuadPart = offset; BOOL res; + int whence; + gfh = guest_file_handle_find(handle, errp); if (!gfh) { return NULL; } + /* We stupidly exposed 'whence':'int' in our qapi */ + switch (whence_code) { + case QGA_SEEK_SET: + whence = SEEK_SET; + break; + case QGA_SEEK_CUR: + whence = SEEK_CUR; + break; + case QGA_SEEK_END: + whence = SEEK_END; + break; + default: + error_setg(errp, "invalid whence code %"PRId64, whence_code); + return NULL; + } + fh = gfh->fh; res = SetFilePointerEx(fh, off_pos, &new_pos, whence); if (!res) { diff --git a/qga/commands.c b/qga/commands.c index 0f80ce65a4..bb73e7dfbf 100644 --- a/qga/commands.c +++ b/qga/commands.c @@ -398,9 +398,12 @@ GuestExec *qmp_guest_exec(const char *path, arglist.next = has_arg ? arg : NULL; argv = guest_exec_get_args(&arglist, true); - envp = guest_exec_get_args(has_env ? env : NULL, false); + envp = has_env ? guest_exec_get_args(env, false) : NULL; flags = G_SPAWN_SEARCH_PATH | G_SPAWN_DO_NOT_REAP_CHILD; +#if GLIB_CHECK_VERSION(2, 33, 2) + flags |= G_SPAWN_SEARCH_PATH_FROM_ENVP; +#endif if (!has_output) { flags |= G_SPAWN_STDOUT_TO_DEV_NULL | G_SPAWN_STDERR_TO_DEV_NULL; } diff --git a/qga/guest-agent-core.h b/qga/guest-agent-core.h index e92c6abafb..238dc6b08d 100644 --- a/qga/guest-agent-core.h +++ b/qga/guest-agent-core.h @@ -15,6 +15,13 @@ #define QGA_READ_COUNT_DEFAULT 4096 +/* Mapping of whence codes used by guest-file-seek. */ +enum { + QGA_SEEK_SET = 0, + QGA_SEEK_CUR = 1, + QGA_SEEK_END = 2, +}; + typedef struct GAState GAState; typedef struct GACommandState GACommandState; extern GAState *ga_state; diff --git a/qga/installer/qemu-ga.wxs b/qga/installer/qemu-ga.wxs index 6804f0279f..9473875723 100644 --- a/qga/installer/qemu-ga.wxs +++ b/qga/installer/qemu-ga.wxs @@ -91,6 +91,22 @@ <File Id="qga_vss.tlb" Name="qga-vss.tlb" Source="$(env.BUILD_DIR)/qga/vss-win32/qga-vss.tlb" KeyPath="yes" DiskId="1"/> </Component> <?endif?> + <?if $(var.Arch) = "32"?> + <Component Id="gspawn-helper-console" Guid="{446185B3-87BE-43D2-96B8-0FEFD9E8696D}"> + <File Id="gspawn-win32-helper-console.exe" Name="gspawn-win32-helper-console.exe" Source="$(var.Mingw_bin)/gspawn-win32-helper-console.exe" KeyPath="yes" DiskId="1"/> + </Component> + <Component Id="gspawn-helper" Guid="{CD67A5A3-2DB1-4DA1-A67A-8D71E797B466}"> + <File Id="gspawn-win32-helper.exe" Name="gspawn-win32-helper.exe" Source="$(var.Mingw_bin)/gspawn-win32-helper.exe" KeyPath="yes" DiskId="1"/> + </Component> + <?endif?> + <?if $(var.Arch) = "64"?> + <Component Id="gspawn-helper-console" Guid="{9E615A9F-349A-4992-A5C2-C10BAD173660}"> + <File Id="gspawn-win64-helper-console.exe" Name="gspawn-win64-helper-console.exe" Source="$(var.Mingw_bin)/gspawn-win64-helper-console.exe" KeyPath="yes" DiskId="1"/> + </Component> + <Component Id="gspawn-helper" Guid="{D201AD22-1846-4E4F-B6E1-C7A908ED2457}"> + <File Id="gspawn-win64-helper.exe" Name="gspawn-win64-helper.exe" Source="$(var.Mingw_bin)/gspawn-win64-helper.exe" KeyPath="yes" DiskId="1"/> + </Component> + <?endif?> <Component Id="iconv" Guid="{35EE3558-D34B-4F0A-B8BD-430FF0775246}"> <File Id="iconv.dll" Name="iconv.dll" Source="$(var.Mingw_bin)/iconv.dll" KeyPath="yes" DiskId="1"/> </Component> @@ -148,6 +164,8 @@ <ComponentRef Id="qga_vss_dll" /> <ComponentRef Id="qga_vss_tlb" /> <?endif?> + <ComponentRef Id="gspawn-helper-console" /> + <ComponentRef Id="gspawn-helper" /> <ComponentRef Id="iconv" /> <ComponentRef Id="libgcc_arch_lib" /> <ComponentRef Id="libglib" /> diff --git a/qga/main.c b/qga/main.c index 068169fcbc..f83a97d245 100644 --- a/qga/main.c +++ b/qga/main.c @@ -570,10 +570,9 @@ static void process_command(GAState *s, QDict *req) } /* handle requests/control events coming in over the channel */ -static void process_event(JSONMessageParser *parser, QList *tokens) +static void process_event(JSONMessageParser *parser, GQueue *tokens) { GAState *s = container_of(parser, GAState, parser); - QObject *obj; QDict *qdict; Error *err = NULL; int ret; @@ -581,9 +580,9 @@ static void process_event(JSONMessageParser *parser, QList *tokens) g_assert(s && parser); g_debug("process_event: called"); - obj = json_parser_parse_err(tokens, NULL, &err); - if (err || !obj || qobject_type(obj) != QTYPE_QDICT) { - qobject_decref(obj); + qdict = qobject_to_qdict(json_parser_parse_err(tokens, NULL, &err)); + if (err || !qdict) { + QDECREF(qdict); qdict = qdict_new(); if (!err) { g_warning("failed to parse event: unknown error"); @@ -593,12 +592,8 @@ static void process_event(JSONMessageParser *parser, QList *tokens) } qdict_put_obj(qdict, "error", qmp_build_error_object(err)); error_free(err); - } else { - qdict = qobject_to_qdict(obj); } - g_assert(qdict); - /* handle host->guest commands */ if (qdict_haskey(qdict, "execute")) { process_command(s, qdict); diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json index 78362e071d..01c9ee48d8 100644 --- a/qga/qapi-schema.json +++ b/qga/qapi-schema.json @@ -318,13 +318,13 @@ # # Seek to a position in the file, as with fseek(), and return the # current file position afterward. Also encapsulates ftell()'s -# functionality, just Set offset=0, whence=SEEK_CUR. +# functionality, with offset=0 and whence=1. # # @handle: filehandle returned by guest-file-open # # @offset: bytes to skip over in the file stream # -# @whence: SEEK_SET, SEEK_CUR, or SEEK_END, as with fseek() +# @whence: 0 for SEEK_SET, 1 for SEEK_CUR, or 2 for SEEK_END # # Returns: @GuestFileSeek on success. # diff --git a/qmp-commands.hx b/qmp-commands.hx index d7cf0ff264..9d8b42f59a 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -718,6 +718,25 @@ Example: EQMP { + .name = "migrate-start-postcopy", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_migrate_start_postcopy, + }, + +SQMP +migrate-start-postcopy +---------------------- + +Switch an in-progress migration to postcopy mode. Ignored after the end of +migration (or once already in postcopy). + +Example: +-> { "execute": "migrate-start-postcopy" } +<- { "return": {} } + +EQMP + + { .name = "query-migrate-cache-size", .args_type = "", .mhandler.cmd_new = qmp_marshal_query_migrate_cache_size, @@ -1262,7 +1281,7 @@ EQMP }, { .name = "transaction", - .args_type = "actions:q", + .args_type = "actions:q,properties:q?", .mhandler.cmd_new = qmp_marshal_transaction, }, @@ -1476,6 +1495,44 @@ Example: EQMP { + .name = "blockdev-snapshot", + .args_type = "node:s,overlay:s", + .mhandler.cmd_new = qmp_marshal_blockdev_snapshot, + }, + +SQMP +blockdev-snapshot +----------------- +Since 2.5 + +Create a snapshot, by installing 'node' as the backing image of +'overlay'. Additionally, if 'node' is associated with a block +device, the block device changes to using 'overlay' as its new active +image. + +Arguments: + +- "node": device that will have a snapshot created (json-string) +- "overlay": device that will have 'node' as its backing image (json-string) + +Example: + +-> { "execute": "blockdev-add", + "arguments": { "options": { "driver": "qcow2", + "node-name": "node1534", + "file": { "driver": "file", + "filename": "hd1.qcow2" }, + "backing": "" } } } + +<- { "return": {} } + +-> { "execute": "blockdev-snapshot", "arguments": { "node": "ide-hd0", + "overlay": "node1534" } } +<- { "return": {} } + +EQMP + + { .name = "blockdev-snapshot-internal-sync", .args_type = "device:B,name:s", .mhandler.cmd_new = qmp_marshal_blockdev_snapshot_internal_sync, @@ -2526,6 +2583,64 @@ Each json-object contain the following: another request (json-int) - "wr_merged": number of write requests that have been merged into another request (json-int) + - "idle_time_ns": time since the last I/O operation, in + nanoseconds. If the field is absent it means + that there haven't been any operations yet + (json-int, optional) + - "failed_rd_operations": number of failed read operations + (json-int) + - "failed_wr_operations": number of failed write operations + (json-int) + - "failed_flush_operations": number of failed flush operations + (json-int) + - "invalid_rd_operations": number of invalid read operations + (json-int) + - "invalid_wr_operations": number of invalid write operations + (json-int) + - "invalid_flush_operations": number of invalid flush operations + (json-int) + - "account_invalid": whether invalid operations are included in + the last access statistics (json-bool) + - "account_failed": whether failed operations are included in the + latency and last access statistics + (json-bool) + - "timed_stats": A json-array containing statistics collected in + specific intervals, with the following members: + - "interval_length": interval used for calculating the + statistics, in seconds (json-int) + - "min_rd_latency_ns": minimum latency of read operations in + the defined interval, in nanoseconds + (json-int) + - "min_wr_latency_ns": minimum latency of write operations in + the defined interval, in nanoseconds + (json-int) + - "min_flush_latency_ns": minimum latency of flush operations + in the defined interval, in + nanoseconds (json-int) + - "max_rd_latency_ns": maximum latency of read operations in + the defined interval, in nanoseconds + (json-int) + - "max_wr_latency_ns": maximum latency of write operations in + the defined interval, in nanoseconds + (json-int) + - "max_flush_latency_ns": maximum latency of flush operations + in the defined interval, in + nanoseconds (json-int) + - "avg_rd_latency_ns": average latency of read operations in + the defined interval, in nanoseconds + (json-int) + - "avg_wr_latency_ns": average latency of write operations in + the defined interval, in nanoseconds + (json-int) + - "avg_flush_latency_ns": average latency of flush operations + in the defined interval, in + nanoseconds (json-int) + - "avg_rd_queue_depth": average number of pending read + operations in the defined interval + (json-number) + - "avg_wr_queue_depth": average number of pending write + operations in the defined interval + (json-number). - "parent": Contains recursively the statistics of the underlying protocol (e.g. the host file for a qcow2 image). If there is no underlying protocol, this field is omitted @@ -2550,7 +2665,10 @@ Example: "flush_total_times_ns":49653 "flush_operations":61, "rd_merged":0, - "wr_merged":0 + "wr_merged":0, + "idle_time_ns":2953431879, + "account_invalid":true, + "account_failed":false } }, "stats":{ @@ -2564,7 +2682,10 @@ Example: "rd_total_times_ns":3465673657 "flush_total_times_ns":49653, "rd_merged":0, - "wr_merged":0 + "wr_merged":0, + "idle_time_ns":2953431879, + "account_invalid":true, + "account_failed":false } }, { @@ -2580,7 +2701,9 @@ Example: "rd_total_times_ns":0 "flush_total_times_ns":0, "rd_merged":0, - "wr_merged":0 + "wr_merged":0, + "account_invalid":false, + "account_failed":false } }, { @@ -2596,7 +2719,9 @@ Example: "rd_total_times_ns":0 "flush_total_times_ns":0, "rd_merged":0, - "wr_merged":0 + "wr_merged":0, + "account_invalid":false, + "account_failed":false } }, { @@ -2612,7 +2737,9 @@ Example: "rd_total_times_ns":0 "flush_total_times_ns":0, "rd_merged":0, - "wr_merged":0 + "wr_merged":0, + "account_invalid":false, + "account_failed":false } } ] @@ -3889,8 +4016,8 @@ blockdev-add Add a block device. This command is still a work in progress. It doesn't support all -block drivers, it lacks a matching blockdev-del, and more. Stay away -from it unless you want to help with its development. +block drivers among other things. Stay away from it unless you want +to help with its development. Arguments: @@ -3936,6 +4063,228 @@ Example (2): EQMP { + .name = "x-blockdev-del", + .args_type = "id:s?,node-name:s?", + .mhandler.cmd_new = qmp_marshal_x_blockdev_del, + }, + +SQMP +x-blockdev-del +------------ +Since 2.5 + +Deletes a block device thas has been added using blockdev-add. +The selected device can be either a block backend or a graph node. + +In the former case the backend will be destroyed, along with its +inserted medium if there's any. The command will fail if the backend +or its medium are in use. + +In the latter case the node will be destroyed. The command will fail +if the node is attached to a block backend or is otherwise being +used. + +One of "id" or "node-name" must be specified, but not both. + +This command is still a work in progress and is considered +experimental. Stay away from it unless you want to help with its +development. + +Arguments: + +- "id": Name of the block backend device to delete (json-string, optional) +- "node-name": Name of the graph node to delete (json-string, optional) + +Example: + +-> { "execute": "blockdev-add", + "arguments": { + "options": { + "driver": "qcow2", + "id": "drive0", + "file": { + "driver": "file", + "filename": "test.qcow2" + } + } + } + } + +<- { "return": {} } + +-> { "execute": "x-blockdev-del", + "arguments": { "id": "drive0" } + } +<- { "return": {} } + +EQMP + + { + .name = "blockdev-open-tray", + .args_type = "device:s,force:b?", + .mhandler.cmd_new = qmp_marshal_blockdev_open_tray, + }, + +SQMP +blockdev-open-tray +------------------ + +Opens a block device's tray. If there is a block driver state tree inserted as a +medium, it will become inaccessible to the guest (but it will remain associated +to the block device, so closing the tray will make it accessible again). + +If the tray was already open before, this will be a no-op. + +Once the tray opens, a DEVICE_TRAY_MOVED event is emitted. There are cases in +which no such event will be generated, these include: +- if the guest has locked the tray, @force is false and the guest does not + respond to the eject request +- if the BlockBackend denoted by @device does not have a guest device attached + to it +- if the guest device does not have an actual tray and is empty, for instance + for floppy disk drives + +Arguments: + +- "device": block device name (json-string) +- "force": if false (the default), an eject request will be sent to the guest if + it has locked the tray (and the tray will not be opened immediately); + if true, the tray will be opened regardless of whether it is locked + (json-bool, optional) + +Example: + +-> { "execute": "blockdev-open-tray", + "arguments": { "device": "ide1-cd0" } } + +<- { "timestamp": { "seconds": 1418751016, + "microseconds": 716996 }, + "event": "DEVICE_TRAY_MOVED", + "data": { "device": "ide1-cd0", + "tray-open": true } } + +<- { "return": {} } + +EQMP + + { + .name = "blockdev-close-tray", + .args_type = "device:s", + .mhandler.cmd_new = qmp_marshal_blockdev_close_tray, + }, + +SQMP +blockdev-close-tray +------------------- + +Closes a block device's tray. If there is a block driver state tree associated +with the block device (which is currently ejected), that tree will be loaded as +the medium. + +If the tray was already closed before, this will be a no-op. + +Arguments: + +- "device": block device name (json-string) + +Example: + +-> { "execute": "blockdev-close-tray", + "arguments": { "device": "ide1-cd0" } } + +<- { "timestamp": { "seconds": 1418751345, + "microseconds": 272147 }, + "event": "DEVICE_TRAY_MOVED", + "data": { "device": "ide1-cd0", + "tray-open": false } } + +<- { "return": {} } + +EQMP + + { + .name = "blockdev-remove-medium", + .args_type = "device:s", + .mhandler.cmd_new = qmp_marshal_blockdev_remove_medium, + }, + +SQMP +blockdev-remove-medium +---------------------- + +Removes a medium (a block driver state tree) from a block device. That block +device's tray must currently be open (unless there is no attached guest device). + +If the tray is open and there is no medium inserted, this will be a no-op. + +Arguments: + +- "device": block device name (json-string) + +Example: + +-> { "execute": "blockdev-remove-medium", + "arguments": { "device": "ide1-cd0" } } + +<- { "error": { "class": "GenericError", + "desc": "Tray of device 'ide1-cd0' is not open" } } + +-> { "execute": "blockdev-open-tray", + "arguments": { "device": "ide1-cd0" } } + +<- { "timestamp": { "seconds": 1418751627, + "microseconds": 549958 }, + "event": "DEVICE_TRAY_MOVED", + "data": { "device": "ide1-cd0", + "tray-open": true } } + +<- { "return": {} } + +-> { "execute": "blockdev-remove-medium", + "arguments": { "device": "ide1-cd0" } } + +<- { "return": {} } + +EQMP + + { + .name = "blockdev-insert-medium", + .args_type = "device:s,node-name:s", + .mhandler.cmd_new = qmp_marshal_blockdev_insert_medium, + }, + +SQMP +blockdev-insert-medium +---------------------- + +Inserts a medium (a block driver state tree) into a block device. That block +device's tray must currently be open (unless there is no attached guest device) +and there must be no medium inserted already. + +Arguments: + +- "device": block device name (json-string) +- "node-name": root node of the BDS tree to insert into the block device + +Example: + +-> { "execute": "blockdev-add", + "arguments": { "options": { "node-name": "node0", + "driver": "raw", + "file": { "driver": "file", + "filename": "fedora.iso" } } } } + +<- { "return": {} } + +-> { "execute": "blockdev-insert-medium", + "arguments": { "device": "ide1-cd0", + "node-name": "node0" } } + +<- { "return": {} } + +EQMP + + { .name = "query-named-block-nodes", .args_type = "", .mhandler.cmd_new = qmp_marshal_query_named_block_nodes, @@ -3998,6 +4347,59 @@ Example: EQMP { + .name = "blockdev-change-medium", + .args_type = "device:B,filename:F,format:s?,read-only-mode:s?", + .mhandler.cmd_new = qmp_marshal_blockdev_change_medium, + }, + +SQMP +blockdev-change-medium +---------------------- + +Changes the medium inserted into a block device by ejecting the current medium +and loading a new image file which is inserted as the new medium. + +Arguments: + +- "device": device name (json-string) +- "filename": filename of the new image (json-string) +- "format": format of the new image (json-string, optional) +- "read-only-mode": new read-only mode (json-string, optional) + - Possible values: "retain" (default), "read-only", "read-write" + +Examples: + +1. Change a removable medium + +-> { "execute": "blockdev-change-medium", + "arguments": { "device": "ide1-cd0", + "filename": "/srv/images/Fedora-12-x86_64-DVD.iso", + "format": "raw" } } +<- { "return": {} } + +2. Load a read-only medium into a writable drive + +-> { "execute": "blockdev-change-medium", + "arguments": { "device": "isa-fd0", + "filename": "/srv/images/ro.img", + "format": "raw", + "read-only-mode": "retain" } } + +<- { "error": + { "class": "GenericError", + "desc": "Could not open '/srv/images/ro.img': Permission denied" } } + +-> { "execute": "blockdev-change-medium", + "arguments": { "device": "isa-fd0", + "filename": "/srv/images/ro.img", + "format": "raw", + "read-only-mode": "read-only" } } + +<- { "return": {} } + +EQMP + + { .name = "query-memdev", .args_type = "", .mhandler.cmd_new = qmp_marshal_query_memdev, @@ -210,6 +210,7 @@ ObjectPropertyInfoList *qmp_qom_list(const char *path, Error **errp) bool ambiguous = false; ObjectPropertyInfoList *props = NULL; ObjectProperty *prop; + ObjectPropertyIterator *iter; obj = object_resolve_path(path, &ambiguous); if (obj == NULL) { @@ -222,7 +223,8 @@ ObjectPropertyInfoList *qmp_qom_list(const char *path, Error **errp) return NULL; } - QTAILQ_FOREACH(prop, &obj->properties, node) { + iter = object_property_iter_init(obj); + while ((prop = object_property_iter_next(iter))) { ObjectPropertyInfoList *entry = g_malloc0(sizeof(*entry)); entry->value = g_malloc0(sizeof(ObjectPropertyInfo)); @@ -232,6 +234,7 @@ ObjectPropertyInfoList *qmp_qom_list(const char *path, Error **errp) entry->value->name = g_strdup(prop->name); entry->value->type = g_strdup(prop->type); } + object_property_iter_free(iter); return props; } @@ -414,7 +417,8 @@ void qmp_change(const char *device, const char *target, if (strcmp(device, "vnc") == 0) { qmp_change_vnc(target, has_arg, arg, errp); } else { - qmp_change_blockdev(device, target, arg, errp); + qmp_blockdev_change_medium(device, target, has_arg, arg, false, 0, + errp); } } @@ -502,6 +506,7 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename, ObjectClass *klass; Object *obj; ObjectProperty *prop; + ObjectPropertyIterator *iter; DevicePropertyInfoList *prop_list = NULL; klass = object_class_by_name(typename); @@ -530,7 +535,8 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename, obj = object_new(typename); - QTAILQ_FOREACH(prop, &obj->properties, node) { + iter = object_property_iter_init(obj); + while ((prop = object_property_iter_next(iter))) { DevicePropertyInfo *info; DevicePropertyInfoList *entry; @@ -561,6 +567,7 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename, entry->next = prop_list; prop_list = entry; } + object_property_iter_free(iter); object_unref(obj); diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index b19623e229..92798ae3a4 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -11,12 +11,9 @@ * */ -#include "qapi/qmp/qstring.h" -#include "qapi/qmp/qlist.h" -#include "qapi/qmp/qdict.h" -#include "qapi/qmp/qint.h" #include "qemu-common.h" #include "qapi/qmp/json-lexer.h" +#include <stdint.h> #define MAX_TOKEN_SIZE (64ULL << 20) @@ -30,7 +27,7 @@ */ enum json_lexer_state { - IN_ERROR = 0, + IN_ERROR = 0, /* must really be 0, see json_lexer[] */ IN_DQ_UCODE3, IN_DQ_UCODE2, IN_DQ_UCODE1, @@ -62,6 +59,8 @@ enum json_lexer_state { IN_START, }; +QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START); + #define TERMINAL(state) [0 ... 0x7F] = (state) /* Return whether TERMINAL is a terminal state and the transition to it @@ -71,6 +70,8 @@ enum json_lexer_state { (json_lexer[(old_state)][0] == (terminal)) static const uint8_t json_lexer[][256] = { + /* Relies on default initialization to IN_ERROR! */ + /* double quote string */ [IN_DQ_UCODE3] = { ['0' ... '9'] = IN_DQ_STRING, @@ -253,12 +254,12 @@ static const uint8_t json_lexer[][256] = { ['0'] = IN_ZERO, ['1' ... '9'] = IN_NONZERO_NUMBER, ['-'] = IN_NEG_NONZERO_NUMBER, - ['{'] = JSON_OPERATOR, - ['}'] = JSON_OPERATOR, - ['['] = JSON_OPERATOR, - [']'] = JSON_OPERATOR, - [','] = JSON_OPERATOR, - [':'] = JSON_OPERATOR, + ['{'] = JSON_LCURLY, + ['}'] = JSON_RCURLY, + ['['] = JSON_LSQUARE, + [']'] = JSON_RSQUARE, + [','] = JSON_COMMA, + [':'] = JSON_COLON, ['a' ... 'z'] = IN_KEYWORD, ['%'] = IN_ESCAPE, [' '] = IN_WHITESPACE, @@ -272,7 +273,7 @@ void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) { lexer->emit = func; lexer->state = IN_START; - lexer->token = qstring_new(); + lexer->token = g_string_sized_new(3); lexer->x = lexer->y = 0; } @@ -287,14 +288,20 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) } do { + assert(lexer->state <= ARRAY_SIZE(json_lexer)); new_state = json_lexer[lexer->state][(uint8_t)ch]; char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state); if (char_consumed) { - qstring_append_chr(lexer->token, ch); + g_string_append_c(lexer->token, ch); } switch (new_state) { - case JSON_OPERATOR: + case JSON_LCURLY: + case JSON_RCURLY: + case JSON_LSQUARE: + case JSON_RSQUARE: + case JSON_COLON: + case JSON_COMMA: case JSON_ESCAPE: case JSON_INTEGER: case JSON_FLOAT: @@ -303,8 +310,7 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y); /* fall through */ case JSON_SKIP: - QDECREF(lexer->token); - lexer->token = qstring_new(); + g_string_truncate(lexer->token, 0); new_state = IN_START; break; case IN_ERROR: @@ -322,8 +328,7 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) * induce an error/flush state. */ lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y); - QDECREF(lexer->token); - lexer->token = qstring_new(); + g_string_truncate(lexer->token, 0); new_state = IN_START; lexer->state = new_state; return 0; @@ -336,10 +341,9 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) /* Do not let a single token grow to an arbitrarily large size, * this is a security consideration. */ - if (lexer->token->length > MAX_TOKEN_SIZE) { + if (lexer->token->len > MAX_TOKEN_SIZE) { lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); - QDECREF(lexer->token); - lexer->token = qstring_new(); + g_string_truncate(lexer->token, 0); lexer->state = IN_START; } @@ -369,5 +373,5 @@ int json_lexer_flush(JSONLexer *lexer) void json_lexer_destroy(JSONLexer *lexer) { - QDECREF(lexer->token); + g_string_free(lexer->token, true); } diff --git a/qobject/json-parser.c b/qobject/json-parser.c index ac991ba3cf..3c5d35d7b0 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -22,15 +22,13 @@ #include "qapi/qmp/qbool.h" #include "qapi/qmp/json-parser.h" #include "qapi/qmp/json-lexer.h" +#include "qapi/qmp/json-streamer.h" typedef struct JSONParserContext { Error *err; - struct { - QObject **buf; - size_t pos; - size_t count; - } tokens; + JSONToken *current; + GQueue *buf; } JSONParserContext; #define BUG_ON(cond) assert(!(cond)) @@ -47,58 +45,10 @@ typedef struct JSONParserContext static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); /** - * Token manipulators - * - * tokens are dictionaries that contain a type, a string value, and geometry information - * about a token identified by the lexer. These are routines that make working with - * these objects a bit easier. - */ -static const char *token_get_value(QObject *obj) -{ - return qdict_get_str(qobject_to_qdict(obj), "token"); -} - -static JSONTokenType token_get_type(QObject *obj) -{ - return qdict_get_int(qobject_to_qdict(obj), "type"); -} - -static int token_is_operator(QObject *obj, char op) -{ - const char *val; - - if (token_get_type(obj) != JSON_OPERATOR) { - return 0; - } - - val = token_get_value(obj); - - return (val[0] == op) && (val[1] == 0); -} - -static int token_is_keyword(QObject *obj, const char *value) -{ - if (token_get_type(obj) != JSON_KEYWORD) { - return 0; - } - - return strcmp(token_get_value(obj), value) == 0; -} - -static int token_is_escape(QObject *obj, const char *value) -{ - if (token_get_type(obj) != JSON_ESCAPE) { - return 0; - } - - return (strcmp(token_get_value(obj), value) == 0); -} - -/** * Error handler */ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, - QObject *token, const char *msg, ...) + JSONToken *token, const char *msg, ...) { va_list ap; char message[1024]; @@ -176,9 +126,10 @@ static int hex2decimal(char ch) * \t * \u four-hex-digits */ -static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token) +static QString *qstring_from_escaped_str(JSONParserContext *ctxt, + JSONToken *token) { - const char *ptr = token_get_value(token); + const char *ptr = token->str; QString *str; int double_quote = 1; @@ -274,73 +225,34 @@ out: return NULL; } -static QObject *parser_context_pop_token(JSONParserContext *ctxt) -{ - QObject *token; - g_assert(ctxt->tokens.pos < ctxt->tokens.count); - token = ctxt->tokens.buf[ctxt->tokens.pos]; - ctxt->tokens.pos++; - return token; -} - -/* Note: parser_context_{peek|pop}_token do not increment the - * token object's refcount. In both cases the references will continue - * to be tracked and cleaned up in parser_context_free(), so do not - * attempt to free the token object. +/* Note: the token object returned by parser_context_peek_token or + * parser_context_pop_token is deleted as soon as parser_context_pop_token + * is called again. */ -static QObject *parser_context_peek_token(JSONParserContext *ctxt) -{ - QObject *token; - g_assert(ctxt->tokens.pos < ctxt->tokens.count); - token = ctxt->tokens.buf[ctxt->tokens.pos]; - return token; -} - -static JSONParserContext parser_context_save(JSONParserContext *ctxt) -{ - JSONParserContext saved_ctxt = {0}; - saved_ctxt.tokens.pos = ctxt->tokens.pos; - saved_ctxt.tokens.count = ctxt->tokens.count; - saved_ctxt.tokens.buf = ctxt->tokens.buf; - return saved_ctxt; -} - -static void parser_context_restore(JSONParserContext *ctxt, - JSONParserContext saved_ctxt) +static JSONToken *parser_context_pop_token(JSONParserContext *ctxt) { - ctxt->tokens.pos = saved_ctxt.tokens.pos; - ctxt->tokens.count = saved_ctxt.tokens.count; - ctxt->tokens.buf = saved_ctxt.tokens.buf; + g_free(ctxt->current); + assert(!g_queue_is_empty(ctxt->buf)); + ctxt->current = g_queue_pop_head(ctxt->buf); + return ctxt->current; } -static void tokens_append_from_iter(QObject *obj, void *opaque) +static JSONToken *parser_context_peek_token(JSONParserContext *ctxt) { - JSONParserContext *ctxt = opaque; - g_assert(ctxt->tokens.pos < ctxt->tokens.count); - ctxt->tokens.buf[ctxt->tokens.pos++] = obj; - qobject_incref(obj); + assert(!g_queue_is_empty(ctxt->buf)); + return g_queue_peek_head(ctxt->buf); } -static JSONParserContext *parser_context_new(QList *tokens) +static JSONParserContext *parser_context_new(GQueue *tokens) { JSONParserContext *ctxt; - size_t count; if (!tokens) { return NULL; } - count = qlist_size(tokens); - if (count == 0) { - return NULL; - } - ctxt = g_malloc0(sizeof(JSONParserContext)); - ctxt->tokens.pos = 0; - ctxt->tokens.count = count; - ctxt->tokens.buf = g_malloc(count * sizeof(QObject *)); - qlist_iter(tokens, tokens_append_from_iter, ctxt); - ctxt->tokens.pos = 0; + ctxt->buf = tokens; return ctxt; } @@ -348,12 +260,12 @@ static JSONParserContext *parser_context_new(QList *tokens) /* to support error propagation, ctxt->err must be freed separately */ static void parser_context_free(JSONParserContext *ctxt) { - int i; if (ctxt) { - for (i = 0; i < ctxt->tokens.count; i++) { - qobject_decref(ctxt->tokens.buf[i]); + while (!g_queue_is_empty(ctxt->buf)) { + parser_context_pop_token(ctxt); } - g_free(ctxt->tokens.buf); + g_free(ctxt->current); + g_queue_free(ctxt->buf); g_free(ctxt); } } @@ -363,8 +275,8 @@ static void parser_context_free(JSONParserContext *ctxt) */ static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) { - QObject *key = NULL, *token = NULL, *value, *peek; - JSONParserContext saved_ctxt = parser_context_save(ctxt); + QObject *key = NULL, *value; + JSONToken *peek, *token; peek = parser_context_peek_token(ctxt); if (peek == NULL) { @@ -384,7 +296,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) goto out; } - if (!token_is_operator(token, ':')) { + if (token->type != JSON_COLON) { parse_error(ctxt, token, "missing : in object pair"); goto out; } @@ -402,7 +314,6 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) return 0; out: - parser_context_restore(ctxt, saved_ctxt); qobject_decref(key); return -1; @@ -411,17 +322,10 @@ out: static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) { QDict *dict = NULL; - QObject *token, *peek; - JSONParserContext saved_ctxt = parser_context_save(ctxt); + JSONToken *token, *peek; token = parser_context_pop_token(ctxt); - if (token == NULL) { - goto out; - } - - if (!token_is_operator(token, '{')) { - goto out; - } + assert(token && token->type == JSON_LCURLY); dict = qdict_new(); @@ -431,7 +335,7 @@ static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) goto out; } - if (!token_is_operator(peek, '}')) { + if (peek->type != JSON_RCURLY) { if (parse_pair(ctxt, dict, ap) == -1) { goto out; } @@ -442,8 +346,8 @@ static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) goto out; } - while (!token_is_operator(token, '}')) { - if (!token_is_operator(token, ',')) { + while (token->type != JSON_RCURLY) { + if (token->type != JSON_COMMA) { parse_error(ctxt, token, "expected separator in dict"); goto out; } @@ -465,7 +369,6 @@ static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) return QOBJECT(dict); out: - parser_context_restore(ctxt, saved_ctxt); QDECREF(dict); return NULL; } @@ -473,17 +376,10 @@ out: static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) { QList *list = NULL; - QObject *token, *peek; - JSONParserContext saved_ctxt = parser_context_save(ctxt); + JSONToken *token, *peek; token = parser_context_pop_token(ctxt); - if (token == NULL) { - goto out; - } - - if (!token_is_operator(token, '[')) { - goto out; - } + assert(token && token->type == JSON_LSQUARE); list = qlist_new(); @@ -493,7 +389,7 @@ static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) goto out; } - if (!token_is_operator(peek, ']')) { + if (peek->type != JSON_RSQUARE) { QObject *obj; obj = parse_value(ctxt, ap); @@ -510,8 +406,8 @@ static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) goto out; } - while (!token_is_operator(token, ']')) { - if (!token_is_operator(token, ',')) { + while (token->type != JSON_RSQUARE) { + if (token->type != JSON_COMMA) { parse_error(ctxt, token, "expected separator in list"); goto out; } @@ -537,99 +433,68 @@ static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) return QOBJECT(list); out: - parser_context_restore(ctxt, saved_ctxt); QDECREF(list); return NULL; } static QObject *parse_keyword(JSONParserContext *ctxt) { - QObject *token, *ret; - JSONParserContext saved_ctxt = parser_context_save(ctxt); + JSONToken *token; token = parser_context_pop_token(ctxt); - if (token == NULL) { - goto out; - } - - if (token_get_type(token) != JSON_KEYWORD) { - goto out; - } + assert(token && token->type == JSON_KEYWORD); - if (token_is_keyword(token, "true")) { - ret = QOBJECT(qbool_from_bool(true)); - } else if (token_is_keyword(token, "false")) { - ret = QOBJECT(qbool_from_bool(false)); - } else if (token_is_keyword(token, "null")) { - ret = qnull(); - } else { - parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token)); - goto out; + if (!strcmp(token->str, "true")) { + return QOBJECT(qbool_from_bool(true)); + } else if (!strcmp(token->str, "false")) { + return QOBJECT(qbool_from_bool(false)); + } else if (!strcmp(token->str, "null")) { + return qnull(); } - - return ret; - -out: - parser_context_restore(ctxt, saved_ctxt); - + parse_error(ctxt, token, "invalid keyword '%s'", token->str); return NULL; } static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) { - QObject *token = NULL, *obj; - JSONParserContext saved_ctxt = parser_context_save(ctxt); + JSONToken *token; if (ap == NULL) { - goto out; + return NULL; } token = parser_context_pop_token(ctxt); - if (token == NULL) { - goto out; - } - - if (token_is_escape(token, "%p")) { - obj = va_arg(*ap, QObject *); - } else if (token_is_escape(token, "%i")) { - obj = QOBJECT(qbool_from_bool(va_arg(*ap, int))); - } else if (token_is_escape(token, "%d")) { - obj = QOBJECT(qint_from_int(va_arg(*ap, int))); - } else if (token_is_escape(token, "%ld")) { - obj = QOBJECT(qint_from_int(va_arg(*ap, long))); - } else if (token_is_escape(token, "%lld") || - token_is_escape(token, "%I64d")) { - obj = QOBJECT(qint_from_int(va_arg(*ap, long long))); - } else if (token_is_escape(token, "%s")) { - obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *))); - } else if (token_is_escape(token, "%f")) { - obj = QOBJECT(qfloat_from_double(va_arg(*ap, double))); - } else { - goto out; + assert(token && token->type == JSON_ESCAPE); + + if (!strcmp(token->str, "%p")) { + return va_arg(*ap, QObject *); + } else if (!strcmp(token->str, "%i")) { + return QOBJECT(qbool_from_bool(va_arg(*ap, int))); + } else if (!strcmp(token->str, "%d")) { + return QOBJECT(qint_from_int(va_arg(*ap, int))); + } else if (!strcmp(token->str, "%ld")) { + return QOBJECT(qint_from_int(va_arg(*ap, long))); + } else if (!strcmp(token->str, "%lld") || + !strcmp(token->str, "%I64d")) { + return QOBJECT(qint_from_int(va_arg(*ap, long long))); + } else if (!strcmp(token->str, "%s")) { + return QOBJECT(qstring_from_str(va_arg(*ap, const char *))); + } else if (!strcmp(token->str, "%f")) { + return QOBJECT(qfloat_from_double(va_arg(*ap, double))); } - - return obj; - -out: - parser_context_restore(ctxt, saved_ctxt); - return NULL; } static QObject *parse_literal(JSONParserContext *ctxt) { - QObject *token, *obj; - JSONParserContext saved_ctxt = parser_context_save(ctxt); + JSONToken *token; token = parser_context_pop_token(ctxt); - if (token == NULL) { - goto out; - } + assert(token); - switch (token_get_type(token)) { + switch (token->type) { case JSON_STRING: - obj = QOBJECT(qstring_from_escaped_str(ctxt, token)); - break; + return QOBJECT(qstring_from_escaped_str(ctxt, token)); case JSON_INTEGER: { /* A possibility exists that this is a whole-valued float where the * fractional part was left out due to being 0 (.0). It's not a big @@ -646,56 +511,55 @@ static QObject *parse_literal(JSONParserContext *ctxt) int64_t value; errno = 0; /* strtoll doesn't set errno on success */ - value = strtoll(token_get_value(token), NULL, 10); + value = strtoll(token->str, NULL, 10); if (errno != ERANGE) { - obj = QOBJECT(qint_from_int(value)); - break; + return QOBJECT(qint_from_int(value)); } /* fall through to JSON_FLOAT */ } case JSON_FLOAT: /* FIXME dependent on locale */ - obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL))); - break; + return QOBJECT(qfloat_from_double(strtod(token->str, NULL))); default: - goto out; + abort(); } - - return obj; - -out: - parser_context_restore(ctxt, saved_ctxt); - - return NULL; } static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) { - QObject *obj; + JSONToken *token; - obj = parse_object(ctxt, ap); - if (obj == NULL) { - obj = parse_array(ctxt, ap); - } - if (obj == NULL) { - obj = parse_escape(ctxt, ap); - } - if (obj == NULL) { - obj = parse_keyword(ctxt); - } - if (obj == NULL) { - obj = parse_literal(ctxt); + token = parser_context_peek_token(ctxt); + if (token == NULL) { + parse_error(ctxt, NULL, "premature EOI"); + return NULL; } - return obj; + switch (token->type) { + case JSON_LCURLY: + return parse_object(ctxt, ap); + case JSON_LSQUARE: + return parse_array(ctxt, ap); + case JSON_ESCAPE: + return parse_escape(ctxt, ap); + case JSON_INTEGER: + case JSON_FLOAT: + case JSON_STRING: + return parse_literal(ctxt); + case JSON_KEYWORD: + return parse_keyword(ctxt); + default: + parse_error(ctxt, token, "expecting value"); + return NULL; + } } -QObject *json_parser_parse(QList *tokens, va_list *ap) +QObject *json_parser_parse(GQueue *tokens, va_list *ap) { return json_parser_parse_err(tokens, ap, NULL); } -QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp) +QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp) { JSONParserContext *ctxt = parser_context_new(tokens); QObject *result; diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index 1b2f9b1d10..a4db4b832e 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -11,50 +11,55 @@ * */ -#include "qapi/qmp/qlist.h" -#include "qapi/qmp/qint.h" -#include "qapi/qmp/qdict.h" #include "qemu-common.h" #include "qapi/qmp/json-lexer.h" #include "qapi/qmp/json-streamer.h" #define MAX_TOKEN_SIZE (64ULL << 20) +#define MAX_TOKEN_COUNT (2ULL << 20) #define MAX_NESTING (1ULL << 10) -static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTokenType type, int x, int y) +static void json_message_free_tokens(JSONMessageParser *parser) +{ + if (parser->tokens) { + g_queue_free(parser->tokens); + parser->tokens = NULL; + } +} + +static void json_message_process_token(JSONLexer *lexer, GString *input, + JSONTokenType type, int x, int y) { JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer); - QDict *dict; - - if (type == JSON_OPERATOR) { - switch (qstring_get_str(token)[0]) { - case '{': - parser->brace_count++; - break; - case '}': - parser->brace_count--; - break; - case '[': - parser->bracket_count++; - break; - case ']': - parser->bracket_count--; - break; - default: - break; - } + JSONToken *token; + + switch (type) { + case JSON_LCURLY: + parser->brace_count++; + break; + case JSON_RCURLY: + parser->brace_count--; + break; + case JSON_LSQUARE: + parser->bracket_count++; + break; + case JSON_RSQUARE: + parser->bracket_count--; + break; + default: + break; } - dict = qdict_new(); - qdict_put(dict, "type", qint_from_int(type)); - QINCREF(token); - qdict_put(dict, "token", token); - qdict_put(dict, "x", qint_from_int(x)); - qdict_put(dict, "y", qint_from_int(y)); + token = g_malloc(sizeof(JSONToken) + input->len + 1); + token->type = type; + memcpy(token->str, input->str, input->len); + token->str[input->len] = 0; + token->x = x; + token->y = y; - parser->token_size += token->length; + parser->token_size += input->len; - qlist_append(parser->tokens, dict); + g_queue_push_tail(parser->tokens, token); if (type == JSON_ERROR) { goto out_emit_bad; @@ -64,41 +69,39 @@ static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTok parser->bracket_count == 0)) { goto out_emit; } else if (parser->token_size > MAX_TOKEN_SIZE || - parser->bracket_count > MAX_NESTING || - parser->brace_count > MAX_NESTING) { + g_queue_get_length(parser->tokens) > MAX_TOKEN_COUNT || + parser->bracket_count + parser->brace_count > MAX_NESTING) { /* Security consideration, we limit total memory allocated per object * and the maximum recursion depth that a message can force. */ - goto out_emit; + goto out_emit_bad; } return; out_emit_bad: - /* clear out token list and tell the parser to emit and error + /* + * Clear out token list and tell the parser to emit an error * indication by passing it a NULL list */ - QDECREF(parser->tokens); - parser->tokens = NULL; + json_message_free_tokens(parser); out_emit: /* send current list of tokens to parser and reset tokenizer */ parser->brace_count = 0; parser->bracket_count = 0; + /* parser->emit takes ownership of parser->tokens. */ parser->emit(parser, parser->tokens); - if (parser->tokens) { - QDECREF(parser->tokens); - } - parser->tokens = qlist_new(); + parser->tokens = g_queue_new(); parser->token_size = 0; } void json_message_parser_init(JSONMessageParser *parser, - void (*func)(JSONMessageParser *, QList *)) + void (*func)(JSONMessageParser *, GQueue *)) { parser->emit = func; parser->brace_count = 0; parser->bracket_count = 0; - parser->tokens = qlist_new(); + parser->tokens = g_queue_new(); parser->token_size = 0; json_lexer_init(&parser->lexer, json_message_process_token); @@ -118,5 +121,5 @@ int json_message_parser_flush(JSONMessageParser *parser) void json_message_parser_destroy(JSONMessageParser *parser) { json_lexer_destroy(&parser->lexer); - QDECREF(parser->tokens); + json_message_free_tokens(parser); } diff --git a/qobject/qbool.c b/qobject/qbool.c index 5ff69f0b2d..bc6535fa49 100644 --- a/qobject/qbool.c +++ b/qobject/qbool.c @@ -51,9 +51,9 @@ bool qbool_get_bool(const QBool *qb) */ QBool *qobject_to_qbool(const QObject *obj) { - if (qobject_type(obj) != QTYPE_QBOOL) + if (!obj || qobject_type(obj) != QTYPE_QBOOL) { return NULL; - + } return container_of(obj, QBool, base); } diff --git a/qobject/qdict.c b/qobject/qdict.c index 67b1a58abf..2d67bf1579 100644 --- a/qobject/qdict.c +++ b/qobject/qdict.c @@ -46,9 +46,9 @@ QDict *qdict_new(void) */ QDict *qobject_to_qdict(const QObject *obj) { - if (qobject_type(obj) != QTYPE_QDICT) + if (!obj || qobject_type(obj) != QTYPE_QDICT) { return NULL; - + } return container_of(obj, QDict, base); } @@ -229,8 +229,7 @@ double qdict_get_double(const QDict *qdict, const char *key) */ int64_t qdict_get_int(const QDict *qdict, const char *key) { - QObject *obj = qdict_get_obj(qdict, key, QTYPE_QINT); - return qint_get_int(qobject_to_qint(obj)); + return qint_get_int(qobject_to_qint(qdict_get(qdict, key))); } /** @@ -243,8 +242,7 @@ int64_t qdict_get_int(const QDict *qdict, const char *key) */ bool qdict_get_bool(const QDict *qdict, const char *key) { - QObject *obj = qdict_get_obj(qdict, key, QTYPE_QBOOL); - return qbool_get_bool(qobject_to_qbool(obj)); + return qbool_get_bool(qobject_to_qbool(qdict_get(qdict, key))); } /** @@ -270,7 +268,7 @@ QList *qdict_get_qlist(const QDict *qdict, const char *key) */ QDict *qdict_get_qdict(const QDict *qdict, const char *key) { - return qobject_to_qdict(qdict_get_obj(qdict, key, QTYPE_QDICT)); + return qobject_to_qdict(qdict_get(qdict, key)); } /** @@ -284,8 +282,7 @@ QDict *qdict_get_qdict(const QDict *qdict, const char *key) */ const char *qdict_get_str(const QDict *qdict, const char *key) { - QObject *obj = qdict_get_obj(qdict, key, QTYPE_QSTRING); - return qstring_get_str(qobject_to_qstring(obj)); + return qstring_get_str(qobject_to_qstring(qdict_get(qdict, key))); } /** @@ -298,13 +295,9 @@ const char *qdict_get_str(const QDict *qdict, const char *key) int64_t qdict_get_try_int(const QDict *qdict, const char *key, int64_t def_value) { - QObject *obj; + QInt *qint = qobject_to_qint(qdict_get(qdict, key)); - obj = qdict_get(qdict, key); - if (!obj || qobject_type(obj) != QTYPE_QINT) - return def_value; - - return qint_get_int(qobject_to_qint(obj)); + return qint ? qint_get_int(qint) : def_value; } /** @@ -316,13 +309,9 @@ int64_t qdict_get_try_int(const QDict *qdict, const char *key, */ bool qdict_get_try_bool(const QDict *qdict, const char *key, bool def_value) { - QObject *obj; + QBool *qbool = qobject_to_qbool(qdict_get(qdict, key)); - obj = qdict_get(qdict, key); - if (!obj || qobject_type(obj) != QTYPE_QBOOL) - return def_value; - - return qbool_get_bool(qobject_to_qbool(obj)); + return qbool ? qbool_get_bool(qbool) : def_value; } /** @@ -335,13 +324,9 @@ bool qdict_get_try_bool(const QDict *qdict, const char *key, bool def_value) */ const char *qdict_get_try_str(const QDict *qdict, const char *key) { - QObject *obj; - - obj = qdict_get(qdict, key); - if (!obj || qobject_type(obj) != QTYPE_QSTRING) - return NULL; + QString *qstr = qobject_to_qstring(qdict_get(qdict, key)); - return qstring_get_str(qobject_to_qstring(obj)); + return qstr ? qstring_get_str(qstr) : NULL; } /** diff --git a/qobject/qfloat.c b/qobject/qfloat.c index 7de0992dba..c86516327f 100644 --- a/qobject/qfloat.c +++ b/qobject/qfloat.c @@ -51,9 +51,9 @@ double qfloat_get_double(const QFloat *qf) */ QFloat *qobject_to_qfloat(const QObject *obj) { - if (qobject_type(obj) != QTYPE_QFLOAT) + if (!obj || qobject_type(obj) != QTYPE_QFLOAT) { return NULL; - + } return container_of(obj, QFloat, base); } diff --git a/qobject/qint.c b/qobject/qint.c index 86b9b04f0b..999688e9ce 100644 --- a/qobject/qint.c +++ b/qobject/qint.c @@ -50,9 +50,9 @@ int64_t qint_get_int(const QInt *qi) */ QInt *qobject_to_qint(const QObject *obj) { - if (qobject_type(obj) != QTYPE_QINT) + if (!obj || qobject_type(obj) != QTYPE_QINT) { return NULL; - + } return container_of(obj, QInt, base); } diff --git a/qobject/qjson.c b/qobject/qjson.c index 33f8ef530c..a3e6a7ca42 100644 --- a/qobject/qjson.c +++ b/qobject/qjson.c @@ -28,7 +28,7 @@ typedef struct JSONParsingState QObject *result; } JSONParsingState; -static void parse_json(JSONMessageParser *parser, QList *tokens) +static void parse_json(JSONMessageParser *parser, GQueue *tokens) { JSONParsingState *s = container_of(parser, JSONParsingState, parser); s->result = json_parser_parse(tokens, s->ap); diff --git a/qobject/qlist.c b/qobject/qlist.c index 1ced0de58e..298003aaf7 100644 --- a/qobject/qlist.c +++ b/qobject/qlist.c @@ -142,10 +142,9 @@ size_t qlist_size(const QList *qlist) */ QList *qobject_to_qlist(const QObject *obj) { - if (qobject_type(obj) != QTYPE_QLIST) { + if (!obj || qobject_type(obj) != QTYPE_QLIST) { return NULL; } - return container_of(obj, QList, base); } diff --git a/qobject/qstring.c b/qobject/qstring.c index 607b7a142c..cb72dfbfc8 100644 --- a/qobject/qstring.c +++ b/qobject/qstring.c @@ -117,9 +117,9 @@ void qstring_append_chr(QString *qstring, int c) */ QString *qobject_to_qstring(const QObject *obj) { - if (qobject_type(obj) != QTYPE_QSTRING) + if (!obj || qobject_type(obj) != QTYPE_QSTRING) { return NULL; - + } return container_of(obj, QString, base); } diff --git a/qom/object.c b/qom/object.c index 11cd86b931..d7515697a3 100644 --- a/qom/object.c +++ b/qom/object.c @@ -67,6 +67,10 @@ struct TypeImpl InterfaceImpl interfaces[MAX_INTERFACES]; }; +struct ObjectPropertyIterator { + GHashTableIter iter; +}; + static Type type_interface; static GHashTable *type_table_get(void) @@ -204,7 +208,7 @@ static bool type_is_ancestor(TypeImpl *type, TypeImpl *target_type) { assert(target_type); - /* Check if typename is a direct ancestor of type */ + /* Check if target_type is a direct ancestor of type */ while (type) { if (type == target_type) { return true; @@ -261,7 +265,7 @@ static void type_initialize(TypeImpl *ti) GSList *e; int i; - g_assert(parent->class_size <= ti->class_size); + g_assert_cmpint(parent->class_size, <=, ti->class_size); memcpy(ti->class, parent->class, parent->class_size); ti->class->interfaces = NULL; @@ -326,6 +330,16 @@ static void object_post_init_with_type(Object *obj, TypeImpl *ti) } } +static void object_property_free(gpointer data) +{ + ObjectProperty *prop = data; + + g_free(prop->name); + g_free(prop->type); + g_free(prop->description); + g_free(prop); +} + void object_initialize_with_type(void *data, size_t size, TypeImpl *type) { Object *obj = data; @@ -333,14 +347,15 @@ void object_initialize_with_type(void *data, size_t size, TypeImpl *type) g_assert(type != NULL); type_initialize(type); - g_assert(type->instance_size >= sizeof(Object)); + g_assert_cmpint(type->instance_size, >=, sizeof(Object)); g_assert(type->abstract == false); - g_assert(size >= type->instance_size); + g_assert_cmpint(size, >=, type->instance_size); memset(obj, 0, type->instance_size); obj->class = type->class; object_ref(obj); - QTAILQ_INIT(&obj->properties); + obj->properties = g_hash_table_new_full(g_str_hash, g_str_equal, + NULL, object_property_free); object_init_with_type(obj, type); object_post_init_with_type(obj, type); } @@ -359,29 +374,51 @@ static inline bool object_property_is_child(ObjectProperty *prop) static void object_property_del_all(Object *obj) { - while (!QTAILQ_EMPTY(&obj->properties)) { - ObjectProperty *prop = QTAILQ_FIRST(&obj->properties); - - QTAILQ_REMOVE(&obj->properties, prop, node); - - if (prop->release) { - prop->release(obj, prop->name, prop->opaque); + ObjectProperty *prop; + GHashTableIter iter; + gpointer key, value; + bool released; + + do { + released = false; + g_hash_table_iter_init(&iter, obj->properties); + while (g_hash_table_iter_next(&iter, &key, &value)) { + prop = value; + if (prop->release) { + prop->release(obj, prop->name, prop->opaque); + prop->release = NULL; + released = true; + break; + } + g_hash_table_iter_remove(&iter); } + } while (released); - g_free(prop->name); - g_free(prop->type); - g_free(prop->description); - g_free(prop); - } + g_hash_table_unref(obj->properties); } static void object_property_del_child(Object *obj, Object *child, Error **errp) { ObjectProperty *prop; + GHashTableIter iter; + gpointer key, value; - QTAILQ_FOREACH(prop, &obj->properties, node) { + g_hash_table_iter_init(&iter, obj->properties); + while (g_hash_table_iter_next(&iter, &key, &value)) { + prop = value; + if (object_property_is_child(prop) && prop->opaque == child) { + if (prop->release) { + prop->release(obj, prop->name, prop->opaque); + prop->release = NULL; + } + break; + } + } + g_hash_table_iter_init(&iter, obj->properties); + while (g_hash_table_iter_next(&iter, &key, &value)) { + prop = value; if (object_property_is_child(prop) && prop->opaque == child) { - object_property_del(obj, prop->name, errp); + g_hash_table_iter_remove(&iter); break; } } @@ -413,7 +450,7 @@ static void object_finalize(void *data) object_property_del_all(obj); object_deinit(obj, ti); - g_assert(obj->ref == 0); + g_assert_cmpint(obj->ref, ==, 0); if (obj->free) { obj->free(obj); } @@ -779,10 +816,12 @@ static int do_object_child_foreach(Object *obj, int (*fn)(Object *child, void *opaque), void *opaque, bool recurse) { - ObjectProperty *prop, *next; + GHashTableIter iter; + ObjectProperty *prop; int ret = 0; - QTAILQ_FOREACH_SAFE(prop, &obj->properties, node, next) { + g_hash_table_iter_init(&iter, obj->properties); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&prop)) { if (object_property_is_child(prop)) { Object *child = prop->opaque; @@ -833,7 +872,7 @@ void object_ref(Object *obj) if (!obj) { return; } - atomic_inc(&obj->ref); + atomic_inc(&obj->ref); } void object_unref(Object *obj) @@ -841,7 +880,7 @@ void object_unref(Object *obj) if (!obj) { return; } - g_assert(obj->ref > 0); + g_assert_cmpint(obj->ref, >, 0); /* parent always holds a reference to its children */ if (atomic_fetch_dec(&obj->ref) == 1) { @@ -879,13 +918,11 @@ object_property_add(Object *obj, const char *name, const char *type, return ret; } - QTAILQ_FOREACH(prop, &obj->properties, node) { - if (strcmp(prop->name, name) == 0) { - error_setg(errp, "attempt to add duplicate property '%s'" + if (g_hash_table_lookup(obj->properties, name) != NULL) { + error_setg(errp, "attempt to add duplicate property '%s'" " to object (type '%s')", name, object_get_typename(obj)); - return NULL; - } + return NULL; } prop = g_malloc0(sizeof(*prop)); @@ -898,7 +935,7 @@ object_property_add(Object *obj, const char *name, const char *type, prop->release = release; prop->opaque = opaque; - QTAILQ_INSERT_TAIL(&obj->properties, prop, node); + g_hash_table_insert(obj->properties, prop->name, prop); return prop; } @@ -907,33 +944,52 @@ ObjectProperty *object_property_find(Object *obj, const char *name, { ObjectProperty *prop; - QTAILQ_FOREACH(prop, &obj->properties, node) { - if (strcmp(prop->name, name) == 0) { - return prop; - } + prop = g_hash_table_lookup(obj->properties, name); + if (prop) { + return prop; } error_setg(errp, "Property '.%s' not found", name); return NULL; } +ObjectPropertyIterator *object_property_iter_init(Object *obj) +{ + ObjectPropertyIterator *ret = g_new0(ObjectPropertyIterator, 1); + g_hash_table_iter_init(&ret->iter, obj->properties); + return ret; +} + +void object_property_iter_free(ObjectPropertyIterator *iter) +{ + if (!iter) { + return; + } + g_free(iter); +} + +ObjectProperty *object_property_iter_next(ObjectPropertyIterator *iter) +{ + gpointer key, val; + if (!g_hash_table_iter_next(&iter->iter, &key, &val)) { + return NULL; + } + return val; +} + void object_property_del(Object *obj, const char *name, Error **errp) { - ObjectProperty *prop = object_property_find(obj, name, errp); - if (prop == NULL) { + ObjectProperty *prop = g_hash_table_lookup(obj->properties, name); + + if (!prop) { + error_setg(errp, "Property '.%s' not found", name); return; } if (prop->release) { prop->release(obj, name, prop->opaque); } - - QTAILQ_REMOVE(&obj->properties, prop, node); - - g_free(prop->name); - g_free(prop->type); - g_free(prop->description); - g_free(prop); + g_hash_table_remove(obj->properties, name); } void object_property_get(Object *obj, Visitor *v, const char *name, @@ -1330,8 +1386,8 @@ static Object *object_resolve_link(Object *obj, const char *name, target = object_resolve_path_type(path, target_type, &ambiguous); if (ambiguous) { - error_set(errp, ERROR_CLASS_GENERIC_ERROR, - "Path '%s' does not uniquely identify an object", path); + error_setg(errp, "Path '%s' does not uniquely identify an object", + path); } else if (!target) { target = object_resolve_path(path, &ambiguous); if (target || ambiguous) { @@ -1453,11 +1509,13 @@ void object_property_add_const_link(Object *obj, const char *name, gchar *object_get_canonical_path_component(Object *obj) { ObjectProperty *prop = NULL; + GHashTableIter iter; g_assert(obj); g_assert(obj->parent != NULL); - QTAILQ_FOREACH(prop, &obj->parent->properties, node) { + g_hash_table_iter_init(&iter, obj->parent->properties); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&prop)) { if (!object_property_is_child(prop)) { continue; } @@ -1541,11 +1599,13 @@ static Object *object_resolve_partial_path(Object *parent, bool *ambiguous) { Object *obj; + GHashTableIter iter; ObjectProperty *prop; obj = object_resolve_abs_path(parent, parts, typename, 0); - QTAILQ_FOREACH(prop, &parent->properties, node) { + g_hash_table_iter_init(&iter, parent->properties); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&prop)) { Object *found; if (!object_property_is_child(prop)) { @@ -657,7 +657,6 @@ void qtest_init(const char *qtest_chrdev, const char *qtest_log, Error **errp) inbuf = g_string_new(""); qtest_chr = chr; - page_size_init(); } bool qtest_driver(void) diff --git a/replay/Makefile.objs b/replay/Makefile.objs new file mode 100644 index 0000000000..232193a24b --- /dev/null +++ b/replay/Makefile.objs @@ -0,0 +1,5 @@ +common-obj-y += replay.o +common-obj-y += replay-internal.o +common-obj-y += replay-events.o +common-obj-y += replay-time.o +common-obj-y += replay-input.o diff --git a/replay/replay-events.c b/replay/replay-events.c new file mode 100644 index 0000000000..402f644067 --- /dev/null +++ b/replay/replay-events.c @@ -0,0 +1,279 @@ +/* + * replay-events.c + * + * Copyright (c) 2010-2015 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "sysemu/replay.h" +#include "replay-internal.h" +#include "block/aio.h" +#include "ui/input.h" + +typedef struct Event { + ReplayAsyncEventKind event_kind; + void *opaque; + void *opaque2; + uint64_t id; + + QTAILQ_ENTRY(Event) events; +} Event; + +static QTAILQ_HEAD(, Event) events_list = QTAILQ_HEAD_INITIALIZER(events_list); +static unsigned int read_event_kind = -1; +static uint64_t read_id = -1; +static int read_checkpoint = -1; + +static bool events_enabled; + +/* Functions */ + +static void replay_run_event(Event *event) +{ + switch (event->event_kind) { + case REPLAY_ASYNC_EVENT_BH: + aio_bh_call(event->opaque); + break; + case REPLAY_ASYNC_EVENT_INPUT: + qemu_input_event_send_impl(NULL, (InputEvent *)event->opaque); + qapi_free_InputEvent((InputEvent *)event->opaque); + break; + case REPLAY_ASYNC_EVENT_INPUT_SYNC: + qemu_input_event_sync_impl(); + break; + default: + error_report("Replay: invalid async event ID (%d) in the queue", + event->event_kind); + exit(1); + break; + } +} + +void replay_enable_events(void) +{ + events_enabled = true; +} + +bool replay_has_events(void) +{ + return !QTAILQ_EMPTY(&events_list); +} + +void replay_flush_events(void) +{ + replay_mutex_lock(); + while (!QTAILQ_EMPTY(&events_list)) { + Event *event = QTAILQ_FIRST(&events_list); + replay_mutex_unlock(); + replay_run_event(event); + replay_mutex_lock(); + QTAILQ_REMOVE(&events_list, event, events); + g_free(event); + } + replay_mutex_unlock(); +} + +void replay_disable_events(void) +{ + if (replay_mode != REPLAY_MODE_NONE) { + events_enabled = false; + /* Flush events queue before waiting of completion */ + replay_flush_events(); + } +} + +void replay_clear_events(void) +{ + replay_mutex_lock(); + while (!QTAILQ_EMPTY(&events_list)) { + Event *event = QTAILQ_FIRST(&events_list); + QTAILQ_REMOVE(&events_list, event, events); + + g_free(event); + } + replay_mutex_unlock(); +} + +/*! Adds specified async event to the queue */ +static void replay_add_event(ReplayAsyncEventKind event_kind, + void *opaque, + void *opaque2, uint64_t id) +{ + assert(event_kind < REPLAY_ASYNC_COUNT); + + if (!replay_file || replay_mode == REPLAY_MODE_NONE + || !events_enabled) { + Event e; + e.event_kind = event_kind; + e.opaque = opaque; + e.opaque2 = opaque2; + e.id = id; + replay_run_event(&e); + return; + } + + Event *event = g_malloc0(sizeof(Event)); + event->event_kind = event_kind; + event->opaque = opaque; + event->opaque2 = opaque2; + event->id = id; + + replay_mutex_lock(); + QTAILQ_INSERT_TAIL(&events_list, event, events); + replay_mutex_unlock(); +} + +void replay_bh_schedule_event(QEMUBH *bh) +{ + if (replay_mode != REPLAY_MODE_NONE) { + uint64_t id = replay_get_current_step(); + replay_add_event(REPLAY_ASYNC_EVENT_BH, bh, NULL, id); + } else { + qemu_bh_schedule(bh); + } +} + +void replay_add_input_event(struct InputEvent *event) +{ + replay_add_event(REPLAY_ASYNC_EVENT_INPUT, event, NULL, 0); +} + +void replay_add_input_sync_event(void) +{ + replay_add_event(REPLAY_ASYNC_EVENT_INPUT_SYNC, NULL, NULL, 0); +} + +static void replay_save_event(Event *event, int checkpoint) +{ + if (replay_mode != REPLAY_MODE_PLAY) { + /* put the event into the file */ + replay_put_event(EVENT_ASYNC); + replay_put_byte(checkpoint); + replay_put_byte(event->event_kind); + + /* save event-specific data */ + switch (event->event_kind) { + case REPLAY_ASYNC_EVENT_BH: + replay_put_qword(event->id); + break; + case REPLAY_ASYNC_EVENT_INPUT: + replay_save_input_event(event->opaque); + break; + case REPLAY_ASYNC_EVENT_INPUT_SYNC: + break; + default: + error_report("Unknown ID %d of replay event", read_event_kind); + exit(1); + } + } +} + +/* Called with replay mutex locked */ +void replay_save_events(int checkpoint) +{ + while (!QTAILQ_EMPTY(&events_list)) { + Event *event = QTAILQ_FIRST(&events_list); + replay_save_event(event, checkpoint); + + replay_mutex_unlock(); + replay_run_event(event); + replay_mutex_lock(); + QTAILQ_REMOVE(&events_list, event, events); + g_free(event); + } +} + +static Event *replay_read_event(int checkpoint) +{ + Event *event; + if (read_event_kind == -1) { + read_checkpoint = replay_get_byte(); + read_event_kind = replay_get_byte(); + read_id = -1; + replay_check_error(); + } + + if (checkpoint != read_checkpoint) { + return NULL; + } + + /* Events that has not to be in the queue */ + switch (read_event_kind) { + case REPLAY_ASYNC_EVENT_BH: + if (read_id == -1) { + read_id = replay_get_qword(); + } + break; + case REPLAY_ASYNC_EVENT_INPUT: + event = g_malloc0(sizeof(Event)); + event->event_kind = read_event_kind; + event->opaque = replay_read_input_event(); + return event; + case REPLAY_ASYNC_EVENT_INPUT_SYNC: + event = g_malloc0(sizeof(Event)); + event->event_kind = read_event_kind; + event->opaque = 0; + return event; + default: + error_report("Unknown ID %d of replay event", read_event_kind); + exit(1); + break; + } + + QTAILQ_FOREACH(event, &events_list, events) { + if (event->event_kind == read_event_kind + && (read_id == -1 || read_id == event->id)) { + break; + } + } + + if (event) { + QTAILQ_REMOVE(&events_list, event, events); + } else { + return NULL; + } + + /* Read event-specific data */ + + return event; +} + +/* Called with replay mutex locked */ +void replay_read_events(int checkpoint) +{ + while (replay_data_kind == EVENT_ASYNC) { + Event *event = replay_read_event(checkpoint); + if (!event) { + break; + } + replay_mutex_unlock(); + replay_run_event(event); + replay_mutex_lock(); + + g_free(event); + replay_finish_event(); + read_event_kind = -1; + } +} + +void replay_init_events(void) +{ + read_event_kind = -1; +} + +void replay_finish_events(void) +{ + events_enabled = false; + replay_clear_events(); +} + +bool replay_events_enabled(void) +{ + return events_enabled; +} diff --git a/replay/replay-input.c b/replay/replay-input.c new file mode 100644 index 0000000000..98798955a1 --- /dev/null +++ b/replay/replay-input.c @@ -0,0 +1,160 @@ +/* + * replay-input.c + * + * Copyright (c) 2010-2015 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "sysemu/replay.h" +#include "replay-internal.h" +#include "qemu/notify.h" +#include "ui/input.h" +#include "qapi/qmp-output-visitor.h" +#include "qapi/qmp-input-visitor.h" +#include "qapi-visit.h" + +static InputEvent *qapi_clone_InputEvent(InputEvent *src) +{ + QmpOutputVisitor *qov; + QmpInputVisitor *qiv; + Visitor *ov, *iv; + QObject *obj; + InputEvent *dst = NULL; + + qov = qmp_output_visitor_new(); + ov = qmp_output_get_visitor(qov); + visit_type_InputEvent(ov, &src, NULL, &error_abort); + obj = qmp_output_get_qobject(qov); + qmp_output_visitor_cleanup(qov); + if (!obj) { + return NULL; + } + + qiv = qmp_input_visitor_new(obj); + iv = qmp_input_get_visitor(qiv); + visit_type_InputEvent(iv, &dst, NULL, &error_abort); + qmp_input_visitor_cleanup(qiv); + qobject_decref(obj); + + return dst; +} + +void replay_save_input_event(InputEvent *evt) +{ + replay_put_dword(evt->type); + + switch (evt->type) { + case INPUT_EVENT_KIND_KEY: + replay_put_dword(evt->u.key->key->type); + + switch (evt->u.key->key->type) { + case KEY_VALUE_KIND_NUMBER: + replay_put_qword(evt->u.key->key->u.number); + replay_put_byte(evt->u.key->down); + break; + case KEY_VALUE_KIND_QCODE: + replay_put_dword(evt->u.key->key->u.qcode); + replay_put_byte(evt->u.key->down); + break; + case KEY_VALUE_KIND_MAX: + /* keep gcc happy */ + break; + } + break; + case INPUT_EVENT_KIND_BTN: + replay_put_dword(evt->u.btn->button); + replay_put_byte(evt->u.btn->down); + break; + case INPUT_EVENT_KIND_REL: + replay_put_dword(evt->u.rel->axis); + replay_put_qword(evt->u.rel->value); + break; + case INPUT_EVENT_KIND_ABS: + replay_put_dword(evt->u.abs->axis); + replay_put_qword(evt->u.abs->value); + break; + case INPUT_EVENT_KIND_MAX: + /* keep gcc happy */ + break; + } +} + +InputEvent *replay_read_input_event(void) +{ + InputEvent evt; + KeyValue keyValue; + InputKeyEvent key; + key.key = &keyValue; + InputBtnEvent btn; + InputMoveEvent rel; + InputMoveEvent abs; + + evt.type = replay_get_dword(); + switch (evt.type) { + case INPUT_EVENT_KIND_KEY: + evt.u.key = &key; + evt.u.key->key->type = replay_get_dword(); + + switch (evt.u.key->key->type) { + case KEY_VALUE_KIND_NUMBER: + evt.u.key->key->u.number = replay_get_qword(); + evt.u.key->down = replay_get_byte(); + break; + case KEY_VALUE_KIND_QCODE: + evt.u.key->key->u.qcode = (QKeyCode)replay_get_dword(); + evt.u.key->down = replay_get_byte(); + break; + case KEY_VALUE_KIND_MAX: + /* keep gcc happy */ + break; + } + break; + case INPUT_EVENT_KIND_BTN: + evt.u.btn = &btn; + evt.u.btn->button = (InputButton)replay_get_dword(); + evt.u.btn->down = replay_get_byte(); + break; + case INPUT_EVENT_KIND_REL: + evt.u.rel = &rel; + evt.u.rel->axis = (InputAxis)replay_get_dword(); + evt.u.rel->value = replay_get_qword(); + break; + case INPUT_EVENT_KIND_ABS: + evt.u.abs = &abs; + evt.u.abs->axis = (InputAxis)replay_get_dword(); + evt.u.abs->value = replay_get_qword(); + break; + case INPUT_EVENT_KIND_MAX: + /* keep gcc happy */ + break; + } + + return qapi_clone_InputEvent(&evt); +} + +void replay_input_event(QemuConsole *src, InputEvent *evt) +{ + if (replay_mode == REPLAY_MODE_PLAY) { + /* Nothing */ + } else if (replay_mode == REPLAY_MODE_RECORD) { + replay_add_input_event(qapi_clone_InputEvent(evt)); + } else { + qemu_input_event_send_impl(src, evt); + } +} + +void replay_input_sync_event(void) +{ + if (replay_mode == REPLAY_MODE_PLAY) { + /* Nothing */ + } else if (replay_mode == REPLAY_MODE_RECORD) { + replay_add_input_sync_event(); + } else { + qemu_input_event_sync_impl(); + } +} diff --git a/replay/replay-internal.c b/replay/replay-internal.c new file mode 100644 index 0000000000..35cff44a36 --- /dev/null +++ b/replay/replay-internal.c @@ -0,0 +1,206 @@ +/* + * replay-internal.c + * + * Copyright (c) 2010-2015 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "sysemu/replay.h" +#include "replay-internal.h" +#include "qemu/error-report.h" +#include "sysemu/sysemu.h" + +unsigned int replay_data_kind = -1; +static unsigned int replay_has_unread_data; + +/* Mutex to protect reading and writing events to the log. + replay_data_kind and replay_has_unread_data are also protected + by this mutex. + It also protects replay events queue which stores events to be + written or read to the log. */ +static QemuMutex lock; + +/* File for replay writing */ +FILE *replay_file; + +void replay_put_byte(uint8_t byte) +{ + if (replay_file) { + putc(byte, replay_file); + } +} + +void replay_put_event(uint8_t event) +{ + assert(event < EVENT_COUNT); + replay_put_byte(event); +} + + +void replay_put_word(uint16_t word) +{ + replay_put_byte(word >> 8); + replay_put_byte(word); +} + +void replay_put_dword(uint32_t dword) +{ + replay_put_word(dword >> 16); + replay_put_word(dword); +} + +void replay_put_qword(int64_t qword) +{ + replay_put_dword(qword >> 32); + replay_put_dword(qword); +} + +void replay_put_array(const uint8_t *buf, size_t size) +{ + if (replay_file) { + replay_put_dword(size); + fwrite(buf, 1, size, replay_file); + } +} + +uint8_t replay_get_byte(void) +{ + uint8_t byte = 0; + if (replay_file) { + byte = getc(replay_file); + } + return byte; +} + +uint16_t replay_get_word(void) +{ + uint16_t word = 0; + if (replay_file) { + word = replay_get_byte(); + word = (word << 8) + replay_get_byte(); + } + + return word; +} + +uint32_t replay_get_dword(void) +{ + uint32_t dword = 0; + if (replay_file) { + dword = replay_get_word(); + dword = (dword << 16) + replay_get_word(); + } + + return dword; +} + +int64_t replay_get_qword(void) +{ + int64_t qword = 0; + if (replay_file) { + qword = replay_get_dword(); + qword = (qword << 32) + replay_get_dword(); + } + + return qword; +} + +void replay_get_array(uint8_t *buf, size_t *size) +{ + if (replay_file) { + *size = replay_get_dword(); + if (fread(buf, 1, *size, replay_file) != *size) { + error_report("replay read error"); + } + } +} + +void replay_get_array_alloc(uint8_t **buf, size_t *size) +{ + if (replay_file) { + *size = replay_get_dword(); + *buf = g_malloc(*size); + if (fread(*buf, 1, *size, replay_file) != *size) { + error_report("replay read error"); + } + } +} + +void replay_check_error(void) +{ + if (replay_file) { + if (feof(replay_file)) { + error_report("replay file is over"); + qemu_system_vmstop_request_prepare(); + qemu_system_vmstop_request(RUN_STATE_PAUSED); + } else if (ferror(replay_file)) { + error_report("replay file is over or something goes wrong"); + qemu_system_vmstop_request_prepare(); + qemu_system_vmstop_request(RUN_STATE_INTERNAL_ERROR); + } + } +} + +void replay_fetch_data_kind(void) +{ + if (replay_file) { + if (!replay_has_unread_data) { + replay_data_kind = replay_get_byte(); + if (replay_data_kind == EVENT_INSTRUCTION) { + replay_state.instructions_count = replay_get_dword(); + } + replay_check_error(); + replay_has_unread_data = 1; + if (replay_data_kind >= EVENT_COUNT) { + error_report("Replay: unknown event kind %d", replay_data_kind); + exit(1); + } + } + } +} + +void replay_finish_event(void) +{ + replay_has_unread_data = 0; + replay_fetch_data_kind(); +} + +void replay_mutex_init(void) +{ + qemu_mutex_init(&lock); +} + +void replay_mutex_destroy(void) +{ + qemu_mutex_destroy(&lock); +} + +void replay_mutex_lock(void) +{ + qemu_mutex_lock(&lock); +} + +void replay_mutex_unlock(void) +{ + qemu_mutex_unlock(&lock); +} + +/*! Saves cached instructions. */ +void replay_save_instructions(void) +{ + if (replay_file && replay_mode == REPLAY_MODE_RECORD) { + replay_mutex_lock(); + int diff = (int)(replay_get_current_step() - replay_state.current_step); + if (diff > 0) { + replay_put_event(EVENT_INSTRUCTION); + replay_put_dword(diff); + replay_state.current_step += diff; + } + replay_mutex_unlock(); + } +} diff --git a/replay/replay-internal.h b/replay/replay-internal.h new file mode 100644 index 0000000000..77e0d292c7 --- /dev/null +++ b/replay/replay-internal.h @@ -0,0 +1,140 @@ +#ifndef REPLAY_INTERNAL_H +#define REPLAY_INTERNAL_H + +/* + * replay-internal.h + * + * Copyright (c) 2010-2015 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include <stdio.h> + +enum ReplayEvents { + /* for instruction event */ + EVENT_INSTRUCTION, + /* for software interrupt */ + EVENT_INTERRUPT, + /* for emulated exceptions */ + EVENT_EXCEPTION, + /* for async events */ + EVENT_ASYNC, + /* for shutdown request */ + EVENT_SHUTDOWN, + /* for clock read/writes */ + /* some of greater codes are reserved for clocks */ + EVENT_CLOCK, + EVENT_CLOCK_LAST = EVENT_CLOCK + REPLAY_CLOCK_COUNT - 1, + /* for checkpoint event */ + /* some of greater codes are reserved for checkpoints */ + EVENT_CHECKPOINT, + EVENT_CHECKPOINT_LAST = EVENT_CHECKPOINT + CHECKPOINT_COUNT - 1, + /* end of log event */ + EVENT_END, + EVENT_COUNT +}; + +/* Asynchronous events IDs */ + +enum ReplayAsyncEventKind { + REPLAY_ASYNC_EVENT_BH, + REPLAY_ASYNC_EVENT_INPUT, + REPLAY_ASYNC_EVENT_INPUT_SYNC, + REPLAY_ASYNC_COUNT +}; + +typedef enum ReplayAsyncEventKind ReplayAsyncEventKind; + +typedef struct ReplayState { + /*! Cached clock values. */ + int64_t cached_clock[REPLAY_CLOCK_COUNT]; + /*! Current step - number of processed instructions and timer events. */ + uint64_t current_step; + /*! Number of instructions to be executed before other events happen. */ + int instructions_count; +} ReplayState; +extern ReplayState replay_state; + +extern unsigned int replay_data_kind; + +/* File for replay writing */ +extern FILE *replay_file; + +void replay_put_byte(uint8_t byte); +void replay_put_event(uint8_t event); +void replay_put_word(uint16_t word); +void replay_put_dword(uint32_t dword); +void replay_put_qword(int64_t qword); +void replay_put_array(const uint8_t *buf, size_t size); + +uint8_t replay_get_byte(void); +uint16_t replay_get_word(void); +uint32_t replay_get_dword(void); +int64_t replay_get_qword(void); +void replay_get_array(uint8_t *buf, size_t *size); +void replay_get_array_alloc(uint8_t **buf, size_t *size); + +/* Mutex functions for protecting replay log file */ + +void replay_mutex_init(void); +void replay_mutex_destroy(void); +void replay_mutex_lock(void); +void replay_mutex_unlock(void); + +/*! Checks error status of the file. */ +void replay_check_error(void); + +/*! Finishes processing of the replayed event and fetches + the next event from the log. */ +void replay_finish_event(void); +/*! Reads data type from the file and stores it in the + replay_data_kind variable. */ +void replay_fetch_data_kind(void); + +/*! Saves queued events (like instructions and sound). */ +void replay_save_instructions(void); + +/*! Skips async events until some sync event will be found. + \return true, if event was found */ +bool replay_next_event_is(int event); + +/*! Reads next clock value from the file. + If clock kind read from the file is different from the parameter, + the value is not used. */ +void replay_read_next_clock(unsigned int kind); + +/* Asynchronous events queue */ + +/*! Initializes events' processing internals */ +void replay_init_events(void); +/*! Clears internal data structures for events handling */ +void replay_finish_events(void); +/*! Enables storing events in the queue */ +void replay_enable_events(void); +/*! Flushes events queue */ +void replay_flush_events(void); +/*! Clears events list before loading new VM state */ +void replay_clear_events(void); +/*! Returns true if there are any unsaved events in the queue */ +bool replay_has_events(void); +/*! Saves events from queue into the file */ +void replay_save_events(int checkpoint); +/*! Read events from the file into the input queue */ +void replay_read_events(int checkpoint); + +/* Input events */ + +/*! Saves input event to the log */ +void replay_save_input_event(InputEvent *evt); +/*! Reads input event from the log */ +InputEvent *replay_read_input_event(void); +/*! Adds input event to the queue */ +void replay_add_input_event(struct InputEvent *event); +/*! Adds input sync event to the queue */ +void replay_add_input_sync_event(void); + +#endif diff --git a/replay/replay-time.c b/replay/replay-time.c new file mode 100644 index 0000000000..6d06951f5e --- /dev/null +++ b/replay/replay-time.c @@ -0,0 +1,64 @@ +/* + * replay-time.c + * + * Copyright (c) 2010-2015 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "sysemu/replay.h" +#include "replay-internal.h" +#include "qemu/error-report.h" + +int64_t replay_save_clock(ReplayClockKind kind, int64_t clock) +{ + replay_save_instructions(); + + if (replay_file) { + replay_mutex_lock(); + replay_put_event(EVENT_CLOCK + kind); + replay_put_qword(clock); + replay_mutex_unlock(); + } + + return clock; +} + +void replay_read_next_clock(ReplayClockKind kind) +{ + unsigned int read_kind = replay_data_kind - EVENT_CLOCK; + + assert(read_kind == kind); + + int64_t clock = replay_get_qword(); + + replay_check_error(); + replay_finish_event(); + + replay_state.cached_clock[read_kind] = clock; +} + +/*! Reads next clock event from the input. */ +int64_t replay_read_clock(ReplayClockKind kind) +{ + replay_account_executed_instructions(); + + if (replay_file) { + int64_t ret; + replay_mutex_lock(); + if (replay_next_event_is(EVENT_CLOCK + kind)) { + replay_read_next_clock(kind); + } + ret = replay_state.cached_clock[kind]; + replay_mutex_unlock(); + + return ret; + } + + error_report("REPLAY INTERNAL ERROR %d", __LINE__); + exit(1); +} diff --git a/replay/replay.c b/replay/replay.c new file mode 100644 index 0000000000..0d33e82c95 --- /dev/null +++ b/replay/replay.c @@ -0,0 +1,342 @@ +/* + * replay.c + * + * Copyright (c) 2010-2015 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "sysemu/replay.h" +#include "replay-internal.h" +#include "qemu/timer.h" +#include "qemu/main-loop.h" +#include "sysemu/sysemu.h" +#include "qemu/error-report.h" + +/* Current version of the replay mechanism. + Increase it when file format changes. */ +#define REPLAY_VERSION 0xe02002 +/* Size of replay log header */ +#define HEADER_SIZE (sizeof(uint32_t) + sizeof(uint64_t)) + +ReplayMode replay_mode = REPLAY_MODE_NONE; + +/* Name of replay file */ +static char *replay_filename; +ReplayState replay_state; +static GSList *replay_blockers; + +bool replay_next_event_is(int event) +{ + bool res = false; + + /* nothing to skip - not all instructions used */ + if (replay_state.instructions_count != 0) { + assert(replay_data_kind == EVENT_INSTRUCTION); + return event == EVENT_INSTRUCTION; + } + + while (true) { + if (event == replay_data_kind) { + res = true; + } + switch (replay_data_kind) { + case EVENT_SHUTDOWN: + replay_finish_event(); + qemu_system_shutdown_request(); + break; + default: + /* clock, time_t, checkpoint and other events */ + return res; + } + } + return res; +} + +uint64_t replay_get_current_step(void) +{ + return cpu_get_icount_raw(); +} + +int replay_get_instructions(void) +{ + int res = 0; + replay_mutex_lock(); + if (replay_next_event_is(EVENT_INSTRUCTION)) { + res = replay_state.instructions_count; + } + replay_mutex_unlock(); + return res; +} + +void replay_account_executed_instructions(void) +{ + if (replay_mode == REPLAY_MODE_PLAY) { + replay_mutex_lock(); + if (replay_state.instructions_count > 0) { + int count = (int)(replay_get_current_step() + - replay_state.current_step); + replay_state.instructions_count -= count; + replay_state.current_step += count; + if (replay_state.instructions_count == 0) { + assert(replay_data_kind == EVENT_INSTRUCTION); + replay_finish_event(); + /* Wake up iothread. This is required because + timers will not expire until clock counters + will be read from the log. */ + qemu_notify_event(); + } + } + replay_mutex_unlock(); + } +} + +bool replay_exception(void) +{ + if (replay_mode == REPLAY_MODE_RECORD) { + replay_save_instructions(); + replay_mutex_lock(); + replay_put_event(EVENT_EXCEPTION); + replay_mutex_unlock(); + return true; + } else if (replay_mode == REPLAY_MODE_PLAY) { + bool res = replay_has_exception(); + if (res) { + replay_mutex_lock(); + replay_finish_event(); + replay_mutex_unlock(); + } + return res; + } + + return true; +} + +bool replay_has_exception(void) +{ + bool res = false; + if (replay_mode == REPLAY_MODE_PLAY) { + replay_account_executed_instructions(); + replay_mutex_lock(); + res = replay_next_event_is(EVENT_EXCEPTION); + replay_mutex_unlock(); + } + + return res; +} + +bool replay_interrupt(void) +{ + if (replay_mode == REPLAY_MODE_RECORD) { + replay_save_instructions(); + replay_mutex_lock(); + replay_put_event(EVENT_INTERRUPT); + replay_mutex_unlock(); + return true; + } else if (replay_mode == REPLAY_MODE_PLAY) { + bool res = replay_has_interrupt(); + if (res) { + replay_mutex_lock(); + replay_finish_event(); + replay_mutex_unlock(); + } + return res; + } + + return true; +} + +bool replay_has_interrupt(void) +{ + bool res = false; + if (replay_mode == REPLAY_MODE_PLAY) { + replay_account_executed_instructions(); + replay_mutex_lock(); + res = replay_next_event_is(EVENT_INTERRUPT); + replay_mutex_unlock(); + } + return res; +} + +void replay_shutdown_request(void) +{ + if (replay_mode == REPLAY_MODE_RECORD) { + replay_mutex_lock(); + replay_put_event(EVENT_SHUTDOWN); + replay_mutex_unlock(); + } +} + +bool replay_checkpoint(ReplayCheckpoint checkpoint) +{ + bool res = false; + assert(EVENT_CHECKPOINT + checkpoint <= EVENT_CHECKPOINT_LAST); + replay_save_instructions(); + + if (!replay_file) { + return true; + } + + replay_mutex_lock(); + + if (replay_mode == REPLAY_MODE_PLAY) { + if (replay_next_event_is(EVENT_CHECKPOINT + checkpoint)) { + replay_finish_event(); + } else if (replay_data_kind != EVENT_ASYNC) { + res = false; + goto out; + } + replay_read_events(checkpoint); + /* replay_read_events may leave some unread events. + Return false if not all of the events associated with + checkpoint were processed */ + res = replay_data_kind != EVENT_ASYNC; + } else if (replay_mode == REPLAY_MODE_RECORD) { + replay_put_event(EVENT_CHECKPOINT + checkpoint); + replay_save_events(checkpoint); + res = true; + } +out: + replay_mutex_unlock(); + return res; +} + +static void replay_enable(const char *fname, int mode) +{ + const char *fmode = NULL; + assert(!replay_file); + + switch (mode) { + case REPLAY_MODE_RECORD: + fmode = "wb"; + break; + case REPLAY_MODE_PLAY: + fmode = "rb"; + break; + default: + fprintf(stderr, "Replay: internal error: invalid replay mode\n"); + exit(1); + } + + atexit(replay_finish); + + replay_mutex_init(); + + replay_file = fopen(fname, fmode); + if (replay_file == NULL) { + fprintf(stderr, "Replay: open %s: %s\n", fname, strerror(errno)); + exit(1); + } + + replay_filename = g_strdup(fname); + + replay_mode = mode; + replay_data_kind = -1; + replay_state.instructions_count = 0; + replay_state.current_step = 0; + + /* skip file header for RECORD and check it for PLAY */ + if (replay_mode == REPLAY_MODE_RECORD) { + fseek(replay_file, HEADER_SIZE, SEEK_SET); + } else if (replay_mode == REPLAY_MODE_PLAY) { + unsigned int version = replay_get_dword(); + if (version != REPLAY_VERSION) { + fprintf(stderr, "Replay: invalid input log file version\n"); + exit(1); + } + /* go to the beginning */ + fseek(replay_file, HEADER_SIZE, SEEK_SET); + replay_fetch_data_kind(); + } + + replay_init_events(); +} + +void replay_configure(QemuOpts *opts) +{ + const char *fname; + const char *rr; + ReplayMode mode = REPLAY_MODE_NONE; + + rr = qemu_opt_get(opts, "rr"); + if (!rr) { + /* Just enabling icount */ + return; + } else if (!strcmp(rr, "record")) { + mode = REPLAY_MODE_RECORD; + } else if (!strcmp(rr, "replay")) { + mode = REPLAY_MODE_PLAY; + } else { + error_report("Invalid icount rr option: %s", rr); + exit(1); + } + + fname = qemu_opt_get(opts, "rrfile"); + if (!fname) { + error_report("File name not specified for replay"); + exit(1); + } + + replay_enable(fname, mode); +} + +void replay_start(void) +{ + if (replay_mode == REPLAY_MODE_NONE) { + return; + } + + if (replay_blockers) { + error_report("Record/replay: %s", + error_get_pretty(replay_blockers->data)); + exit(1); + } + if (!use_icount) { + error_report("Please enable icount to use record/replay"); + exit(1); + } + + /* Timer for snapshotting will be set up here. */ + + replay_enable_events(); +} + +void replay_finish(void) +{ + if (replay_mode == REPLAY_MODE_NONE) { + return; + } + + replay_save_instructions(); + + /* finalize the file */ + if (replay_file) { + if (replay_mode == REPLAY_MODE_RECORD) { + /* write end event */ + replay_put_event(EVENT_END); + + /* write header */ + fseek(replay_file, 0, SEEK_SET); + replay_put_dword(REPLAY_VERSION); + } + + fclose(replay_file); + replay_file = NULL; + } + if (replay_filename) { + g_free(replay_filename); + replay_filename = NULL; + } + + replay_finish_events(); + replay_mutex_destroy(); +} + +void replay_add_blocker(Error *reason) +{ + replay_blockers = g_slist_prepend(replay_blockers, reason); +} diff --git a/roms/SLOF b/roms/SLOF -Subproject 811277ac91f674a9273e2b529791e9b75350f3e +Subproject b4c93802a5b2c72f096649c497ec9ff5708e445 diff --git a/roms/openbios b/roms/openbios -Subproject 18f02b14de795c1aab4fe23c1810bfd0944da6a +Subproject 3caee1794ac3f742315823d8447d21f33ce019e diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py index f6894bece9..14553876a2 100755 --- a/scripts/analyze-migration.py +++ b/scripts/analyze-migration.py @@ -252,6 +252,15 @@ class HTABSection(object): def getDict(self): return "" + +class ConfigurationSection(object): + def __init__(self, file): + self.file = file + + def read(self): + name_len = self.file.read32() + name = self.file.readstr(len = name_len) + class VMSDFieldGeneric(object): def __init__(self, desc, file): self.file = file @@ -474,6 +483,7 @@ class MigrationDump(object): QEMU_VM_SECTION_FULL = 0x04 QEMU_VM_SUBSECTION = 0x05 QEMU_VM_VMDESCRIPTION = 0x06 + QEMU_VM_CONFIGURATION = 0x07 QEMU_VM_SECTION_FOOTER= 0x7e def __init__(self, filename): @@ -514,6 +524,9 @@ class MigrationDump(object): section_type = file.read8() if section_type == self.QEMU_VM_EOF: break + elif section_type == self.QEMU_VM_CONFIGURATION: + section = ConfigurationSection(file) + section.read() elif section_type == self.QEMU_VM_SECTION_START or section_type == self.QEMU_VM_SECTION_FULL: section_id = file.read32() name = file.readstr() diff --git a/scripts/qapi-commands.py b/scripts/qapi-commands.py index 43a893b4eb..561e47a42b 100644 --- a/scripts/qapi-commands.py +++ b/scripts/qapi-commands.py @@ -175,7 +175,9 @@ def gen_marshal(name, arg_type, ret_type): ret += gen_marshal_input_visit(arg_type) ret += gen_call(name, arg_type, ret_type) - if re.search('^ *goto out;', ret, re.MULTILINE): + # 'goto out' produced by gen_marshal_input_visit->gen_visit_fields() + # for each arg_type member, and by gen_call() for ret_type + if (arg_type and arg_type.members) or ret_type: ret += mcgen(''' out: diff --git a/scripts/qapi-introspect.py b/scripts/qapi-introspect.py index c0dad6679c..64f2cd0631 100644 --- a/scripts/qapi-introspect.py +++ b/scripts/qapi-introspect.py @@ -107,10 +107,12 @@ const char %(c_name)s[] = %(c_string)s; # characters. if isinstance(typ, QAPISchemaBuiltinType): return typ.name + if isinstance(typ, QAPISchemaArrayType): + return '[' + self._use_type(typ.element_type) + ']' return self._name(typ.name) def _gen_json(self, name, mtype, obj): - if mtype != 'command' and mtype != 'event' and mtype != 'builtin': + if mtype not in ('command', 'event', 'builtin', 'array'): name = self._name(name) obj['name'] = name obj['meta-type'] = mtype @@ -136,8 +138,8 @@ const char %(c_name)s[] = %(c_string)s; self._gen_json(name, 'enum', {'values': values}) def visit_array_type(self, name, info, element_type): - self._gen_json(name, 'array', - {'element-type': self._use_type(element_type)}) + element = self._use_type(element_type) + self._gen_json('[' + element + ']', 'array', {'element-type': element}) def visit_object_type_flat(self, name, info, members, variants): obj = {'members': [self._gen_member(m) for m in members]} diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py index 4fe618ef3c..b37900f6fc 100644 --- a/scripts/qapi-types.py +++ b/scripts/qapi-types.py @@ -36,26 +36,37 @@ struct %(c_name)s { c_name=c_name(name), c_type=element_type.c_type()) -def gen_struct_field(name, typ, optional): +def gen_struct_field(member): ret = '' - if optional: + if member.optional: ret += mcgen(''' bool has_%(c_name)s; ''', - c_name=c_name(name)) + c_name=c_name(member.name)) ret += mcgen(''' %(c_type)s %(c_name)s; ''', - c_type=typ.c_type(), c_name=c_name(name)) + c_type=member.type.c_type(), c_name=c_name(member.name)) return ret -def gen_struct_fields(members): +def gen_struct_fields(local_members, base=None): ret = '' - for memb in members: - ret += gen_struct_field(memb.name, memb.type, memb.optional) + if base: + ret += mcgen(''' + /* Members inherited from %(c_name)s: */ +''', + c_name=base.c_name()) + for memb in base.members: + ret += gen_struct_field(memb) + ret += mcgen(''' + /* Own members: */ +''') + + for memb in local_members: + ret += gen_struct_field(memb) return ret @@ -66,16 +77,13 @@ struct %(c_name)s { ''', c_name=c_name(name)) - if base: - ret += gen_struct_field('base', base, False) - - ret += gen_struct_fields(members) + ret += gen_struct_fields(members, base) # Make sure that all structs have at least one field; this avoids # potential issues with attempting to malloc space for zero-length # structs in C, and also incompatibility with C++ (where an empty # struct is size 1). - if not base and not members: + if not (base and base.members) and not members: ret += mcgen(''' char qapi_dummy_field_for_empty_struct; ''') @@ -87,6 +95,19 @@ struct %(c_name)s { return ret +def gen_upcast(name, base): + # C makes const-correctness ugly. We have to cast away const to let + # this function work for both const and non-const obj. + return mcgen(''' + +static inline %(base)s *qapi_%(c_name)s_base(const %(c_name)s *obj) +{ + return (%(base)s *)obj; +} +''', + c_name=c_name(name), base=base.c_name()) + + def gen_alternate_qtypes_decl(name): return mcgen(''' @@ -126,19 +147,9 @@ struct %(c_name)s { ''', c_name=c_name(name)) if base: - ret += mcgen(''' - /* Members inherited from %(c_name)s: */ -''', - c_name=c_name(base.name)) - ret += gen_struct_fields(base.members) - ret += mcgen(''' - /* Own members: */ -''') + ret += gen_struct_fields([], base) else: - ret += mcgen(''' - %(c_type)s kind; -''', - c_type=c_name(variants.tag_member.type.name)) + ret += gen_struct_field(variants.tag_member) # FIXME: What purpose does data serve, besides preventing a union that # has a branch named 'data'? We use it in qapi-visit.py to decide @@ -152,10 +163,7 @@ struct %(c_name)s { union { /* union tag is @%(c_name)s */ void *data; ''', - # TODO ugly special case for simple union - # Use same tag name in C as on the wire to get rid of - # it, then: c_name=c_name(variants.tag_member.name) - c_name=c_name(variants.tag_name or 'kind')) + c_name=c_name(variants.tag_member.name)) for var in variants.variants: # Ugly special case for simple union TODO get rid of it @@ -167,7 +175,7 @@ struct %(c_name)s { c_name=c_name(var.name)) ret += mcgen(''' - }; + } u; }; ''') @@ -265,6 +273,8 @@ class QAPISchemaGenTypeVisitor(QAPISchemaVisitor): self.decl += gen_union(name, base, variants) else: self.decl += gen_struct(name, base, members) + if base: + self.decl += gen_upcast(name, base) self._gen_type_cleanup(name) def visit_alternate_type(self, name, info, variants): diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py index d0759d739a..3ef5c16a66 100644 --- a/scripts/qapi-visit.py +++ b/scripts/qapi-visit.py @@ -15,7 +15,12 @@ from qapi import * import re +# visit_type_FOO_implicit() is emitted as needed; track if it has already +# been output. implicit_structs_seen = set() + +# visit_type_FOO_fields() is always emitted; track if a forward declaration +# or implementation has already been output. struct_fields_seen = set() @@ -29,19 +34,24 @@ void visit_type_%(c_name)s(Visitor *v, %(c_type)sobj, const char *name, Error ** c_name=c_name(name), c_type=c_type) -def gen_visit_implicit_struct(typ): - if typ in implicit_structs_seen: - return '' - implicit_structs_seen.add(typ) - +def gen_visit_fields_decl(typ): ret = '' if typ.name not in struct_fields_seen: - # Need a forward declaration ret += mcgen(''' static void visit_type_%(c_type)s_fields(Visitor *v, %(c_type)s **obj, Error **errp); ''', c_type=typ.c_name()) + struct_fields_seen.add(typ.name) + return ret + + +def gen_visit_implicit_struct(typ): + if typ in implicit_structs_seen: + return '' + implicit_structs_seen.add(typ) + + ret = gen_visit_fields_decl(typ) ret += mcgen(''' @@ -62,13 +72,12 @@ static void visit_type_implicit_%(c_type)s(Visitor *v, %(c_type)s **obj, Error * def gen_visit_struct_fields(name, base, members): - struct_fields_seen.add(name) - ret = '' if base: - ret += gen_visit_implicit_struct(base) + ret += gen_visit_fields_decl(base) + struct_fields_seen.add(name) ret += mcgen(''' static void visit_type_%(c_name)s_fields(Visitor *v, %(c_name)s **obj, Error **errp) @@ -80,14 +89,15 @@ static void visit_type_%(c_name)s_fields(Visitor *v, %(c_name)s **obj, Error **e if base: ret += mcgen(''' - visit_type_implicit_%(c_type)s(v, &(*obj)->%(c_name)s, &err); + visit_type_%(c_type)s_fields(v, (%(c_type)s **)obj, &err); ''', - c_type=base.c_name(), c_name=c_name('base')) + c_type=base.c_name()) ret += gen_err_check() ret += gen_visit_fields(members, prefix='(*obj)->') - if re.search('^ *goto out;', ret, re.MULTILINE): + # 'goto out' produced for base, and by gen_visit_fields() for each member + if base or members: ret += mcgen(''' out: @@ -128,6 +138,10 @@ void visit_type_%(c_name)s(Visitor *v, %(c_name)s **obj, const char *name, Error def gen_visit_list(name, element_type): + # FIXME: if *obj is NULL on entry, and the first visit_next_list() + # assigns to *obj, while a later one fails, we should clean up *obj + # rather than leaving it non-NULL. As currently written, the caller must + # call qapi_free_FOOList() to avoid a memory leak of the partial FOOList. return mcgen(''' void visit_type_%(c_name)s(Visitor *v, %(c_name)s **obj, const char *name, Error **errp) @@ -179,18 +193,18 @@ void visit_type_%(c_name)s(Visitor *v, %(c_name)s **obj, const char *name, Error if (err) { goto out; } - visit_get_next_type(v, (int*) &(*obj)->kind, %(c_name)s_qtypes, name, &err); + visit_get_next_type(v, (int*) &(*obj)->type, %(c_name)s_qtypes, name, &err); if (err) { goto out_obj; } - switch ((*obj)->kind) { + switch ((*obj)->type) { ''', c_name=c_name(name)) for var in variants.variants: ret += mcgen(''' case %(case)s: - visit_type_%(c_type)s(v, &(*obj)->%(c_name)s, name, &err); + visit_type_%(c_type)s(v, &(*obj)->u.%(c_name)s, name, &err); break; ''', case=c_enum_const(variants.tag_member.type.name, @@ -218,8 +232,7 @@ def gen_visit_union(name, base, variants): ret = '' if base: - members = [m for m in base.members if m != variants.tag_member] - ret += gen_visit_struct_fields(name, None, members) + ret += gen_visit_fields_decl(base) for var in variants.variants: # Ugly special case for simple union TODO get rid of it @@ -244,31 +257,24 @@ void visit_type_%(c_name)s(Visitor *v, %(c_name)s **obj, const char *name, Error if base: ret += mcgen(''' - visit_type_%(c_name)s_fields(v, obj, &err); + visit_type_%(c_name)s_fields(v, (%(c_name)s **)obj, &err); ''', - c_name=c_name(name)) - ret += gen_err_check(label='out_obj') - - tag_key = variants.tag_member.name - if not variants.tag_name: - # we pointlessly use a different key for simple unions - tag_key = 'type' - ret += mcgen(''' + c_name=base.c_name()) + else: + ret += mcgen(''' visit_type_%(c_type)s(v, &(*obj)->%(c_name)s, "%(name)s", &err); - if (err) { - goto out_obj; - } - if (!visit_start_union(v, !!(*obj)->data, &err) || err) { +''', + c_type=variants.tag_member.type.c_name(), + c_name=c_name(variants.tag_member.name), + name=variants.tag_member.name) + ret += gen_err_check(label='out_obj') + ret += mcgen(''' + if (!visit_start_union(v, !!(*obj)->u.data, &err) || err) { goto out_obj; } switch ((*obj)->%(c_name)s) { ''', - c_type=variants.tag_member.type.c_name(), - # TODO ugly special case for simple union - # Use same tag name in C as on the wire to get rid of - # it, then: c_name=c_name(variants.tag_member.name) - c_name=c_name(variants.tag_name or 'kind'), - name=tag_key) + c_name=c_name(variants.tag_member.name)) for var in variants.variants: # TODO ugly special case for simple union @@ -280,13 +286,13 @@ void visit_type_%(c_name)s(Visitor *v, %(c_name)s **obj, const char *name, Error var.name)) if simple_union_type: ret += mcgen(''' - visit_type_%(c_type)s(v, &(*obj)->%(c_name)s, "data", &err); + visit_type_%(c_type)s(v, &(*obj)->u.%(c_name)s, "data", &err); ''', c_type=simple_union_type.c_name(), c_name=c_name(var.name)) else: ret += mcgen(''' - visit_type_implicit_%(c_type)s(v, &(*obj)->%(c_name)s, &err); + visit_type_implicit_%(c_type)s(v, &(*obj)->u.%(c_name)s, &err); ''', c_type=var.type.c_name(), c_name=c_name(var.name)) @@ -302,7 +308,7 @@ out_obj: error_propagate(errp, err); err = NULL; if (*obj) { - visit_end_union(v, !!(*obj)->data, &err); + visit_end_union(v, !!(*obj)->u.data, &err); } error_propagate(errp, err); err = NULL; diff --git a/scripts/qapi.py b/scripts/qapi.py index 9d53255320..7c50cc4c87 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -172,7 +172,7 @@ class QAPISchemaParser(object): if self.tok == '#': self.cursor = self.src.find('\n', self.cursor) - elif self.tok in ['{', '}', ':', ',', '[', ']']: + elif self.tok in "{}:,[]": return elif self.tok == "'": string = '' @@ -376,7 +376,9 @@ def check_name(expr_info, source, name, allow_optional=False, # code always prefixes it with the enum name if enum_member: membername = '_' + membername - if not valid_name.match(membername): + # Reserve the entire 'q_' namespace for c_name() + if not valid_name.match(membername) or \ + c_name(membername, False).startswith('q_'): raise QAPIExprError(expr_info, "%s uses invalid name '%s'" % (source, name)) @@ -390,10 +392,10 @@ def add_name(name, info, meta, implicit=False): raise QAPIExprError(info, "%s '%s' is already defined" % (all_names[name], name)) - if not implicit and name[-4:] == 'Kind': + if not implicit and (name.endswith('Kind') or name.endswith('List')): raise QAPIExprError(info, - "%s '%s' should not end in 'Kind'" - % (meta, name)) + "%s '%s' should not end in '%s'" + % (meta, name, name[-4:])) all_names[name] = meta @@ -488,6 +490,10 @@ def check_type(expr_info, source, value, allow_array=False, for (key, arg) in value.items(): check_name(expr_info, "Member of %s" % source, key, allow_optional=allow_optional) + if c_name(key, False) == 'u' or c_name(key, False).startswith('has_'): + raise QAPIExprError(expr_info, + "Member of %s uses reserved name '%s'" + % (source, key)) # Todo: allow dictionaries to represent default values of # an optional argument. check_type(expr_info, "Member '%s' of %s" % (key, source), arg, @@ -542,7 +548,8 @@ def check_union(expr, expr_info): base = expr.get('base') discriminator = expr.get('discriminator') members = expr['data'] - values = {'MAX': '(automatic)', 'KIND': '(automatic)'} + values = {'MAX': '(automatic)', 'KIND': '(automatic)', + 'TYPE': '(automatic)'} # Two types of unions, determined by discriminator. @@ -910,7 +917,7 @@ class QAPISchemaEnumType(QAPISchemaType): def is_implicit(self): # See QAPISchema._make_implicit_enum_type() - return self.name[-4:] == 'Kind' + return self.name.endswith('Kind') def c_type(self, is_param=False): return c_name(self.name) @@ -1196,9 +1203,7 @@ class QAPISchema(object): return name def _make_array_type(self, element_type, info): - # TODO fooList namespace is not reserved; user can create collisions, - # or abuse our type system with ['fooList'] for 2D array - name = element_type + 'List' + name = element_type + 'List' # Use namespace reserved by add_name() if not self.lookup_type(name): self._def_entity(QAPISchemaArrayType(name, info, element_type)) return name diff --git a/scripts/qemu-gdb.py b/scripts/qemu-gdb.py index d6f2e5a903..b3f8e04f77 100644 --- a/scripts/qemu-gdb.py +++ b/scripts/qemu-gdb.py @@ -26,7 +26,7 @@ import os, sys sys.path.append(os.path.dirname(__file__)) -from qemugdb import mtree, coroutine +from qemugdb import aio, mtree, coroutine class QemuCommand(gdb.Command): '''Prefix for QEMU debug support commands''' @@ -37,6 +37,10 @@ class QemuCommand(gdb.Command): QemuCommand() coroutine.CoroutineCommand() mtree.MtreeCommand() +aio.HandlersCommand() + +coroutine.CoroutineSPFunction() +coroutine.CoroutinePCFunction() # Default to silently passing through SIGUSR1, because QEMU sends it # to itself a lot. diff --git a/scripts/qemugdb/aio.py b/scripts/qemugdb/aio.py new file mode 100644 index 0000000000..2ba00c4444 --- /dev/null +++ b/scripts/qemugdb/aio.py @@ -0,0 +1,58 @@ +#!/usr/bin/python + +# GDB debugging support: aio/iohandler debug +# +# Copyright (c) 2015 Red Hat, Inc. +# +# Author: Dr. David Alan Gilbert <dgilbert@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. +# + +import gdb +from qemugdb import coroutine + +def isnull(ptr): + return ptr == gdb.Value(0).cast(ptr.type) + +def dump_aiocontext(context, verbose): + '''Display a dump and backtrace for an aiocontext''' + cur = context['aio_handlers']['lh_first'] + # Get pointers to functions we're going to process specially + sym_fd_coroutine_enter = gdb.parse_and_eval('fd_coroutine_enter') + + while not isnull(cur): + entry = cur.dereference() + gdb.write('----\n%s\n' % entry) + if verbose and cur['io_read'] == sym_fd_coroutine_enter: + coptr = (cur['opaque'].cast(gdb.lookup_type('FDYieldUntilData').pointer()))['co'] + coptr = coptr.cast(gdb.lookup_type('CoroutineUContext').pointer()) + coroutine.bt_jmpbuf(coptr['env']['__jmpbuf']) + cur = cur['node']['le_next']; + + gdb.write('----\n') + +class HandlersCommand(gdb.Command): + '''Display aio handlers''' + def __init__(self): + gdb.Command.__init__(self, 'qemu handlers', gdb.COMMAND_DATA, + gdb.COMPLETE_NONE) + + def invoke(self, arg, from_tty): + verbose = False + argv = gdb.string_to_argv(arg) + + if len(argv) > 0 and argv[0] == '--verbose': + verbose = True + argv.pop(0) + + if len(argv) > 1: + gdb.write('usage: qemu handlers [--verbose] [handler]\n') + return + + if len(argv) == 1: + handlers_name = argv[0] + else: + handlers_name = 'qemu_aio_context' + dump_aiocontext(gdb.parse_and_eval(handlers_name), verbose) diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py index 3c54918b5d..ab699794ab 100644 --- a/scripts/qemugdb/coroutine.py +++ b/scripts/qemugdb/coroutine.py @@ -15,8 +15,11 @@ import gdb +VOID_PTR = gdb.lookup_type('void').pointer() + def get_fs_base(): - '''Fetch %fs base value using arch_prctl(ARCH_GET_FS)''' + '''Fetch %fs base value using arch_prctl(ARCH_GET_FS). This is + pthread_self().''' # %rsp - 120 is scratch space according to the SystemV ABI old = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)') gdb.execute('call arch_prctl(0x1003, $rsp - 120)', False, True) @@ -24,17 +27,29 @@ def get_fs_base(): gdb.execute('set *(uint64_t*)($rsp - 120) = %s' % old, False, True) return fs_base +def pthread_self(): + '''Fetch pthread_self() from the glibc start_thread function.''' + f = gdb.newest_frame() + while f.name() != 'start_thread': + f = f.older() + if f is None: + return get_fs_base() + + try: + return f.read_var("arg") + except ValueError: + return get_fs_base() + def get_glibc_pointer_guard(): '''Fetch glibc pointer guard value''' - fs_base = get_fs_base() + fs_base = pthread_self() return gdb.parse_and_eval('*(uint64_t*)((uint64_t)%s + 0x30)' % fs_base) def glibc_ptr_demangle(val, pointer_guard): '''Undo effect of glibc's PTR_MANGLE()''' return gdb.parse_and_eval('(((uint64_t)%s >> 0x11) | ((uint64_t)%s << (64 - 0x11))) ^ (uint64_t)%s' % (val, val, pointer_guard)) -def bt_jmpbuf(jmpbuf): - '''Backtrace a jmpbuf''' +def get_jmpbuf_regs(jmpbuf): JB_RBX = 0 JB_RBP = 1 JB_R12 = 2 @@ -44,35 +59,35 @@ def bt_jmpbuf(jmpbuf): JB_RSP = 6 JB_PC = 7 - old_rbx = gdb.parse_and_eval('(uint64_t)$rbx') - old_rbp = gdb.parse_and_eval('(uint64_t)$rbp') - old_rsp = gdb.parse_and_eval('(uint64_t)$rsp') - old_r12 = gdb.parse_and_eval('(uint64_t)$r12') - old_r13 = gdb.parse_and_eval('(uint64_t)$r13') - old_r14 = gdb.parse_and_eval('(uint64_t)$r14') - old_r15 = gdb.parse_and_eval('(uint64_t)$r15') - old_rip = gdb.parse_and_eval('(uint64_t)$rip') - pointer_guard = get_glibc_pointer_guard() - gdb.execute('set $rbx = %s' % jmpbuf[JB_RBX]) - gdb.execute('set $rbp = %s' % glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard)) - gdb.execute('set $rsp = %s' % glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard)) - gdb.execute('set $r12 = %s' % jmpbuf[JB_R12]) - gdb.execute('set $r13 = %s' % jmpbuf[JB_R13]) - gdb.execute('set $r14 = %s' % jmpbuf[JB_R14]) - gdb.execute('set $r15 = %s' % jmpbuf[JB_R15]) - gdb.execute('set $rip = %s' % glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard)) + return {'rbx': jmpbuf[JB_RBX], + 'rbp': glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard), + 'rsp': glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard), + 'r12': jmpbuf[JB_R12], + 'r13': jmpbuf[JB_R13], + 'r14': jmpbuf[JB_R14], + 'r15': jmpbuf[JB_R15], + 'rip': glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard) } + +def bt_jmpbuf(jmpbuf): + '''Backtrace a jmpbuf''' + regs = get_jmpbuf_regs(jmpbuf) + old = dict() + + for i in regs: + old[i] = gdb.parse_and_eval('(uint64_t)$%s' % i) + + for i in regs: + gdb.execute('set $%s = %s' % (i, regs[i])) gdb.execute('bt') - gdb.execute('set $rbx = %s' % old_rbx) - gdb.execute('set $rbp = %s' % old_rbp) - gdb.execute('set $rsp = %s' % old_rsp) - gdb.execute('set $r12 = %s' % old_r12) - gdb.execute('set $r13 = %s' % old_r13) - gdb.execute('set $r14 = %s' % old_r14) - gdb.execute('set $r15 = %s' % old_r15) - gdb.execute('set $rip = %s' % old_rip) + for i in regs: + gdb.execute('set $%s = %s' % (i, old[i])) + +def coroutine_to_jmpbuf(co): + coroutine_pointer = co.cast(gdb.lookup_type('CoroutineUContext').pointer()) + return coroutine_pointer['env']['__jmpbuf'] class CoroutineCommand(gdb.Command): @@ -87,5 +102,18 @@ class CoroutineCommand(gdb.Command): gdb.write('usage: qemu coroutine <coroutine-pointer>\n') return - coroutine_pointer = gdb.parse_and_eval(argv[0]).cast(gdb.lookup_type('CoroutineUContext').pointer()) - bt_jmpbuf(coroutine_pointer['env']['__jmpbuf']) + bt_jmpbuf(coroutine_to_jmpbuf(gdb.parse_and_eval(argv[0]))) + +class CoroutineSPFunction(gdb.Function): + def __init__(self): + gdb.Function.__init__(self, 'qemu_coroutine_sp') + + def invoke(self, addr): + return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rsp'].cast(VOID_PTR) + +class CoroutinePCFunction(gdb.Function): + def __init__(self): + gdb.Function.__init__(self, 'qemu_coroutine_pc') + + def invoke(self, addr): + return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rip'].cast(VOID_PTR) diff --git a/scripts/texi2pod.pl b/scripts/texi2pod.pl index 94097fb065..8767662d30 100755 --- a/scripts/texi2pod.pl +++ b/scripts/texi2pod.pl @@ -317,7 +317,7 @@ while(<$inf>) { @columns = (); for $column (split (/\s*\@tab\s*/, $1)) { # @strong{...} is used a @headitem work-alike - $column =~ s/^\@strong{(.*)}$/$1/; + $column =~ s/^\@strong\{(.*)\}$/$1/; push @columns, $column; } $_ = "\n=item ".join (" : ", @columns)."\n"; diff --git a/slirp/bootp.c b/slirp/bootp.c index b7db9fa335..1baaab1ab1 100644 --- a/slirp/bootp.c +++ b/slirp/bootp.c @@ -23,6 +23,12 @@ */ #include <slirp.h> +#if defined(_WIN32) +/* Windows ntohl() returns an u_long value. + * Add a type cast to match the format strings. */ +# define ntohl(n) ((uint32_t)ntohl(n)) +#endif + /* XXX: only DHCP is supported */ #define LEASE_TIME (24 * 3600) @@ -155,7 +161,7 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) dhcp_decode(bp, &dhcp_msg_type, &preq_addr); DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); if (preq_addr.s_addr != htonl(0L)) - DPRINTF(" req_addr=%08x\n", ntohl(preq_addr.s_addr)); + DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); else DPRINTF("\n"); @@ -234,7 +240,7 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) q += 4; if (bc) { - DPRINTF("%s addr=%08x\n", + DPRINTF("%s addr=%08" PRIx32 "\n", (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", ntohl(daddr.sin_addr.s_addr)); @@ -302,7 +308,7 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) } else { static const char nak_msg[] = "requested address not available"; - DPRINTF("nak'ed addr=%08x\n", ntohl(preq_addr.s_addr)); + DPRINTF("nak'ed addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); *q++ = RFC2132_MSG_TYPE; *q++ = 1; diff --git a/slirp/if.c b/slirp/if.c index fb7acf87dd..8325a2afb5 100644 --- a/slirp/if.c +++ b/slirp/if.c @@ -53,8 +53,8 @@ if_output(struct socket *so, struct mbuf *ifm) int on_fastq = 1; DEBUG_CALL("if_output"); - DEBUG_ARG("so = %lx", (long)so); - DEBUG_ARG("ifm = %lx", (long)ifm); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("ifm = %p", ifm); /* * First remove the mbuf from m_usedlist, diff --git a/slirp/ip_icmp.c b/slirp/ip_icmp.c index 9f1cb08a18..23b9f0fa45 100644 --- a/slirp/ip_icmp.c +++ b/slirp/ip_icmp.c @@ -125,7 +125,7 @@ icmp_input(struct mbuf *m, int hlen) Slirp *slirp = m->slirp; DEBUG_CALL("icmp_input"); - DEBUG_ARG("m = %lx", (long )m); + DEBUG_ARG("m = %p", m); DEBUG_ARG("m_len = %d", m->m_len); /* @@ -252,7 +252,7 @@ icmp_error(struct mbuf *msrc, u_char type, u_char code, int minsize, register struct mbuf *m; DEBUG_CALL("icmp_error"); - DEBUG_ARG("msrc = %lx", (long )msrc); + DEBUG_ARG("msrc = %p", msrc); DEBUG_ARG("msrc_len = %d", msrc->m_len); if(type!=ICMP_UNREACH && type!=ICMP_TIMXCEED) goto end_error; diff --git a/slirp/ip_input.c b/slirp/ip_input.c index 880bdfd3cc..7d436e6ecc 100644 --- a/slirp/ip_input.c +++ b/slirp/ip_input.c @@ -80,7 +80,7 @@ ip_input(struct mbuf *m) int hlen; DEBUG_CALL("ip_input"); - DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("m = %p", m); DEBUG_ARG("m_len = %d", m->m_len); if (m->m_len < sizeof (struct ip)) { @@ -232,9 +232,9 @@ ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) int i, next; DEBUG_CALL("ip_reass"); - DEBUG_ARG("ip = %lx", (long)ip); - DEBUG_ARG("fp = %lx", (long)fp); - DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("ip = %p", ip); + DEBUG_ARG("fp = %p", fp); + DEBUG_ARG("m = %p", m); /* * Presence of header sizes in mbufs @@ -400,7 +400,7 @@ static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) { DEBUG_CALL("ip_enq"); - DEBUG_ARG("prev = %lx", (long)prev); + DEBUG_ARG("prev = %p", prev); p->ipf_prev = prev; p->ipf_next = prev->ipf_next; ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; diff --git a/slirp/ip_output.c b/slirp/ip_output.c index c82830fe7d..1254d0d585 100644 --- a/slirp/ip_output.c +++ b/slirp/ip_output.c @@ -60,8 +60,8 @@ ip_output(struct socket *so, struct mbuf *m0) int len, off, error = 0; DEBUG_CALL("ip_output"); - DEBUG_ARG("so = %lx", (long)so); - DEBUG_ARG("m0 = %lx", (long)m0); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m0 = %p", m0); ip = mtod(m, struct ip *); /* diff --git a/slirp/mbuf.c b/slirp/mbuf.c index 4fefb043bf..795fc29f98 100644 --- a/slirp/mbuf.c +++ b/slirp/mbuf.c @@ -94,7 +94,7 @@ m_get(Slirp *slirp) m->arp_requested = false; m->expiration_date = (uint64_t)-1; end_error: - DEBUG_ARG("m = %lx", (long )m); + DEBUG_ARG("m = %p", m); return m; } @@ -103,7 +103,7 @@ m_free(struct mbuf *m) { DEBUG_CALL("m_free"); - DEBUG_ARG("m = %lx", (long )m); + DEBUG_ARG("m = %p", m); if(m) { /* Remove from m_usedlist */ @@ -221,7 +221,7 @@ dtom(Slirp *slirp, void *dat) struct mbuf *m; DEBUG_CALL("dtom"); - DEBUG_ARG("dat = %lx", (long )dat); + DEBUG_ARG("dat = %p", dat); /* bug corrected for M_EXT buffers */ for (m = slirp->m_usedlist.m_next; m != &slirp->m_usedlist; diff --git a/slirp/misc.c b/slirp/misc.c index 578e8b2c16..5497161f13 100644 --- a/slirp/misc.c +++ b/slirp/misc.c @@ -123,9 +123,9 @@ fork_exec(struct socket *so, const char *ex, int do_pty) pid_t pid; DEBUG_CALL("fork_exec"); - DEBUG_ARG("so = %lx", (long)so); - DEBUG_ARG("ex = %lx", (long)ex); - DEBUG_ARG("do_pty = %lx", (long)do_pty); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("ex = %p", ex); + DEBUG_ARG("do_pty = %x", do_pty); if (do_pty == 2) { return 0; diff --git a/slirp/sbuf.c b/slirp/sbuf.c index 08ec2b4f44..b8c3db744f 100644 --- a/slirp/sbuf.c +++ b/slirp/sbuf.c @@ -72,8 +72,8 @@ sbappend(struct socket *so, struct mbuf *m) int ret = 0; DEBUG_CALL("sbappend"); - DEBUG_ARG("so = %lx", (long)so); - DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m = %p", m); DEBUG_ARG("m->m_len = %d", m->m_len); /* Shouldn't happen, but... e.g. foreign host closes connection */ diff --git a/slirp/socket.c b/slirp/socket.c index 37ac5cf2fb..1673e3afce 100644 --- a/slirp/socket.c +++ b/slirp/socket.c @@ -91,7 +91,7 @@ size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) int mss = so->so_tcpcb->t_maxseg; DEBUG_CALL("sopreprbuf"); - DEBUG_ARG("so = %lx", (long )so); + DEBUG_ARG("so = %p", so); if (len <= 0) return 0; @@ -155,7 +155,7 @@ soread(struct socket *so) struct iovec iov[2]; DEBUG_CALL("soread"); - DEBUG_ARG("so = %lx", (long )so); + DEBUG_ARG("so = %p", so); /* * No need to check if there's enough room to read. @@ -215,7 +215,7 @@ int soreadbuf(struct socket *so, const char *buf, int size) struct iovec iov[2]; DEBUG_CALL("soreadbuf"); - DEBUG_ARG("so = %lx", (long )so); + DEBUG_ARG("so = %p", so); /* * No need to check if there's enough room to read. @@ -263,7 +263,7 @@ sorecvoob(struct socket *so) struct tcpcb *tp = sototcpcb(so); DEBUG_CALL("sorecvoob"); - DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("so = %p", so); /* * We take a guess at how much urgent data has arrived. @@ -293,7 +293,7 @@ sosendoob(struct socket *so) int n, len; DEBUG_CALL("sosendoob"); - DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("so = %p", so); DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); if (so->so_urgc > 2048) @@ -351,7 +351,7 @@ sowrite(struct socket *so) struct iovec iov[2]; DEBUG_CALL("sowrite"); - DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("so = %p", so); if (so->so_urgc) { sosendoob(so); @@ -441,7 +441,7 @@ sorecvfrom(struct socket *so) socklen_t addrlen = sizeof(struct sockaddr_in); DEBUG_CALL("sorecvfrom"); - DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("so = %p", so); if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ char buff[256]; @@ -543,8 +543,8 @@ sosendto(struct socket *so, struct mbuf *m) struct sockaddr_in addr; DEBUG_CALL("sosendto"); - DEBUG_ARG("so = %lx", (long)so); - DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m = %p", m); addr.sin_family = AF_INET; if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c index 00a77b4a5f..6b096ecb3c 100644 --- a/slirp/tcp_input.c +++ b/slirp/tcp_input.c @@ -231,8 +231,8 @@ tcp_input(struct mbuf *m, int iphlen, struct socket *inso) Slirp *slirp; DEBUG_CALL("tcp_input"); - DEBUG_ARGS((dfd, " m = %8lx iphlen = %2d inso = %lx\n", - (long )m, iphlen, (long )inso )); + DEBUG_ARGS((dfd, " m = %p iphlen = %2d inso = %p\n", + m, iphlen, inso)); /* * If called with m == 0, then we're continuing the connect @@ -923,8 +923,8 @@ trimthenstep6: if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { - DEBUG_MISC((dfd, " dup ack m = %lx so = %lx\n", - (long )m, (long )so)); + DEBUG_MISC((dfd, " dup ack m = %p so = %p\n", + m, so)); /* * If we have outstanding data (other than * a window probe), this is a completely @@ -1302,7 +1302,7 @@ tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcpiphdr *ti) int opt, optlen; DEBUG_CALL("tcp_dooptions"); - DEBUG_ARGS((dfd, " tp = %lx cnt=%i\n", (long)tp, cnt)); + DEBUG_ARGS((dfd, " tp = %p cnt=%i\n", tp, cnt)); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[0]; @@ -1383,7 +1383,7 @@ tcp_xmit_timer(register struct tcpcb *tp, int rtt) register short delta; DEBUG_CALL("tcp_xmit_timer"); - DEBUG_ARG("tp = %lx", (long)tp); + DEBUG_ARG("tp = %p", tp); DEBUG_ARG("rtt = %d", rtt); if (tp->t_srtt != 0) { @@ -1471,7 +1471,7 @@ tcp_mss(struct tcpcb *tp, u_int offer) int mss; DEBUG_CALL("tcp_mss"); - DEBUG_ARG("tp = %lx", (long)tp); + DEBUG_ARG("tp = %p", tp); DEBUG_ARG("offer = %d", offer); mss = min(IF_MTU, IF_MRU) - sizeof(struct tcpiphdr); diff --git a/slirp/tcp_output.c b/slirp/tcp_output.c index 8aa3d90470..fafca58a0a 100644 --- a/slirp/tcp_output.c +++ b/slirp/tcp_output.c @@ -66,7 +66,7 @@ tcp_output(struct tcpcb *tp) int idle, sendalot; DEBUG_CALL("tcp_output"); - DEBUG_ARG("tp = %lx", (long )tp); + DEBUG_ARG("tp = %p", tp); /* * Determine length of data that should be transmitted, diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c index 7571c5a282..e161ed2a96 100644 --- a/slirp/tcp_subr.c +++ b/slirp/tcp_subr.c @@ -224,7 +224,7 @@ tcp_newtcpcb(struct socket *so) struct tcpcb *tcp_drop(struct tcpcb *tp, int err) { DEBUG_CALL("tcp_drop"); - DEBUG_ARG("tp = %lx", (long)tp); + DEBUG_ARG("tp = %p", tp); DEBUG_ARG("errno = %d", errno); if (TCPS_HAVERCVDSYN(tp->t_state)) { @@ -249,7 +249,7 @@ tcp_close(struct tcpcb *tp) register struct mbuf *m; DEBUG_CALL("tcp_close"); - DEBUG_ARG("tp = %lx", (long )tp); + DEBUG_ARG("tp = %p", tp); /* free the reassembly queue, if any */ t = tcpfrag_list_first(tp); @@ -290,7 +290,7 @@ tcp_sockclosed(struct tcpcb *tp) { DEBUG_CALL("tcp_sockclosed"); - DEBUG_ARG("tp = %lx", (long)tp); + DEBUG_ARG("tp = %p", tp); switch (tp->t_state) { @@ -330,7 +330,7 @@ int tcp_fconnect(struct socket *so) int ret=0; DEBUG_CALL("tcp_fconnect"); - DEBUG_ARG("so = %lx", (long )so); + DEBUG_ARG("so = %p", so); if( (ret = so->s = qemu_socket(AF_INET,SOCK_STREAM,0)) >= 0) { int opt, s=so->s; @@ -393,7 +393,7 @@ void tcp_connect(struct socket *inso) int s, opt; DEBUG_CALL("tcp_connect"); - DEBUG_ARG("inso = %lx", (long)inso); + DEBUG_ARG("inso = %p", inso); /* * If it's an SS_ACCEPTONCE socket, no need to socreate() @@ -564,8 +564,8 @@ tcp_emu(struct socket *so, struct mbuf *m) char *bptr; DEBUG_CALL("tcp_emu"); - DEBUG_ARG("so = %lx", (long)so); - DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m = %p", m); switch(so->so_emu) { int x, i; @@ -900,7 +900,7 @@ int tcp_ctl(struct socket *so) int do_pty; DEBUG_CALL("tcp_ctl"); - DEBUG_ARG("so = %lx", (long )so); + DEBUG_ARG("so = %p", so); if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { /* Check if it's pty_exec */ diff --git a/slirp/udp.c b/slirp/udp.c index f77e00f5a0..fee13b4dbd 100644 --- a/slirp/udp.c +++ b/slirp/udp.c @@ -72,7 +72,7 @@ udp_input(register struct mbuf *m, int iphlen) struct socket *so; DEBUG_CALL("udp_input"); - DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("m = %p", m); DEBUG_ARG("iphlen = %d", iphlen); /* @@ -241,8 +241,8 @@ int udp_output2(struct socket *so, struct mbuf *m, int error = 0; DEBUG_CALL("udp_output"); - DEBUG_ARG("so = %lx", (long)so); - DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m = %p", m); DEBUG_ARG("saddr = %lx", (long)saddr->sin_addr.s_addr); DEBUG_ARG("daddr = %lx", (long)daddr->sin_addr.s_addr); diff --git a/spice-qemu-char.c b/spice-qemu-char.c index a20fb5c90c..e70e0f7366 100644 --- a/spice-qemu-char.c +++ b/spice-qemu-char.c @@ -301,7 +301,7 @@ static CharDriverState *qemu_chr_open_spice_vmc(const char *id, ChardevReturn *ret, Error **errp) { - const char *type = backend->spicevmc->type; + const char *type = backend->u.spicevmc->type; const char **psubtype = spice_server_char_device_recognized_subtypes(); for (; *psubtype != NULL; ++psubtype) { @@ -324,7 +324,7 @@ static CharDriverState *qemu_chr_open_spice_port(const char *id, ChardevReturn *ret, Error **errp) { - const char *name = backend->spiceport->fqdn; + const char *name = backend->u.spiceport->fqdn; CharDriverState *chr; SpiceCharDriver *s; @@ -362,8 +362,8 @@ static void qemu_chr_parse_spice_vmc(QemuOpts *opts, ChardevBackend *backend, error_setg(errp, "chardev: spice channel: no name given"); return; } - backend->spicevmc = g_new0(ChardevSpiceChannel, 1); - backend->spicevmc->type = g_strdup(name); + backend->u.spicevmc = g_new0(ChardevSpiceChannel, 1); + backend->u.spicevmc->type = g_strdup(name); } static void qemu_chr_parse_spice_port(QemuOpts *opts, ChardevBackend *backend, @@ -375,8 +375,8 @@ static void qemu_chr_parse_spice_port(QemuOpts *opts, ChardevBackend *backend, error_setg(errp, "chardev: spice port: no name given"); return; } - backend->spiceport = g_new0(ChardevSpicePort, 1); - backend->spiceport->fqdn = g_strdup(name); + backend->u.spiceport = g_new0(ChardevSpicePort, 1); + backend->u.spiceport->fqdn = g_strdup(name); } static void register_types(void) diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index 251443b7d6..d7898a0c4a 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -21,6 +21,8 @@ stub-obj-y += mon-printf.o stub-obj-y += monitor-init.o stub-obj-y += notify-event.o stub-obj-y += qtest.o +stub-obj-y += replay.o +stub-obj-y += replay-user.o stub-obj-y += reset.o stub-obj-y += runstate-check.o stub-obj-y += set-fd-handler.o @@ -34,4 +36,5 @@ stub-obj-y += cpus.o stub-obj-y += kvm.o stub-obj-y += qmp_pc_dimm_device_list.o stub-obj-y += target-monitor-defs.o +stub-obj-y += target-get-monitor-def.o stub-obj-y += vhost.o diff --git a/stubs/qtest.c b/stubs/qtest.c index dc17594bb6..4dfde6104d 100644 --- a/stubs/qtest.c +++ b/stubs/qtest.c @@ -12,3 +12,8 @@ /* Needed for qtest_allowed() */ bool qtest_allowed; + +bool qtest_driver(void) +{ + return false; +} diff --git a/stubs/replay-user.c b/stubs/replay-user.c new file mode 100644 index 0000000000..cf330721c8 --- /dev/null +++ b/stubs/replay-user.c @@ -0,0 +1,32 @@ +/* + * replay.c + * + * Copyright (c) 2010-2015 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "sysemu/replay.h" + +bool replay_exception(void) +{ + return true; +} + +bool replay_has_exception(void) +{ + return false; +} + +bool replay_interrupt(void) +{ + return true; +} + +bool replay_has_interrupt(void) +{ + return false; +} diff --git a/stubs/replay.c b/stubs/replay.c new file mode 100644 index 0000000000..8f98790257 --- /dev/null +++ b/stubs/replay.c @@ -0,0 +1,31 @@ +#include "sysemu/replay.h" +#include <stdlib.h> +#include "sysemu/sysemu.h" + +ReplayMode replay_mode; + +int64_t replay_save_clock(unsigned int kind, int64_t clock) +{ + abort(); + return 0; +} + +int64_t replay_read_clock(unsigned int kind) +{ + abort(); + return 0; +} + +bool replay_checkpoint(ReplayCheckpoint checkpoint) +{ + return true; +} + +bool replay_events_enabled(void) +{ + return false; +} + +void replay_finish(void) +{ +} diff --git a/stubs/target-get-monitor-def.c b/stubs/target-get-monitor-def.c new file mode 100644 index 0000000000..711a9ae46b --- /dev/null +++ b/stubs/target-get-monitor-def.c @@ -0,0 +1,31 @@ +/* + * Stub for target_get_monitor_def. + * + * Copyright IBM Corp., 2015 + * + * Author: Alexey Kardashevskiy <aik@ozlabs.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "stdint.h" + +typedef struct CPUState CPUState; + +int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval); + +int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval) +{ + return -1; +} diff --git a/target-alpha/translate.c b/target-alpha/translate.c index f936d1b5b9..9909c70b1b 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -2916,7 +2916,12 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) num_insns++; if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) { - gen_excp(&ctx, EXCP_DEBUG, 0); + ret = gen_excp(&ctx, EXCP_DEBUG, 0); + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + ctx.pc += 4; break; } if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) { diff --git a/target-arm/helper.c b/target-arm/helper.c index 1966f9c045..afc4163342 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -3130,7 +3130,8 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0, - .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[1]) }, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_SVC]) }, /* We rely on the access checks not allowing the guest to write to the * state field when SPSel indicates that it's being used as the stack * pointer. @@ -3299,23 +3300,28 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "SPSR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0, - .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[6]) }, + .access = PL2_RW, + .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_HYP]) }, { .name = "SPSR_IRQ", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 0, - .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[4]) }, + .access = PL2_RW, + .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_IRQ]) }, { .name = "SPSR_ABT", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 1, - .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[2]) }, + .access = PL2_RW, + .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_ABT]) }, { .name = "SPSR_UND", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 2, - .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[3]) }, + .access = PL2_RW, + .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_UND]) }, { .name = "SPSR_FIQ", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 3, - .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[5]) }, + .access = PL2_RW, + .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_FIQ]) }, { .name = "VBAR_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0, .access = PL2_RW, .writefn = vbar_write, @@ -3552,7 +3558,8 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { { .name = "SPSR_EL3", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 0, .opc2 = 0, - .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[7]) }, + .access = PL3_RW, + .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_MON]) }, { .name = "VBAR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 0, .access = PL3_RW, .writefn = vbar_write, @@ -5183,21 +5190,21 @@ int bank_number(int mode) switch (mode) { case ARM_CPU_MODE_USR: case ARM_CPU_MODE_SYS: - return 0; + return BANK_USRSYS; case ARM_CPU_MODE_SVC: - return 1; + return BANK_SVC; case ARM_CPU_MODE_ABT: - return 2; + return BANK_ABT; case ARM_CPU_MODE_UND: - return 3; + return BANK_UND; case ARM_CPU_MODE_IRQ: - return 4; + return BANK_IRQ; case ARM_CPU_MODE_FIQ: - return 5; + return BANK_FIQ; case ARM_CPU_MODE_HYP: - return 6; + return BANK_HYP; case ARM_CPU_MODE_MON: - return 7; + return BANK_MON; } g_assert_not_reached(); } @@ -6635,6 +6642,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, int ap, ns, xn, pxn; uint32_t el = regime_el(env, mmu_idx); bool ttbr1_valid = true; + uint64_t descaddrmask; /* TODO: * This code does not handle the different format TCR for VTCR_EL2. @@ -6824,6 +6832,15 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, descaddr = extract64(ttbr, 0, 48); descaddr &= ~((1ULL << (inputsize - (stride * (4 - level)))) - 1); + /* The address field in the descriptor goes up to bit 39 for ARMv7 + * but up to bit 47 for ARMv8. + */ + if (arm_feature(env, ARM_FEATURE_V8)) { + descaddrmask = 0xfffffffff000ULL; + } else { + descaddrmask = 0xfffffff000ULL; + } + /* Secure accesses start with the page table in secure memory and * can be downgraded to non-secure at any step. Non-secure accesses * remain non-secure. We implement this by just ORing in the NSTable/NS @@ -6847,7 +6864,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, /* Invalid, or the Reserved level 3 encoding */ goto do_fault; } - descaddr = descriptor & 0xfffffff000ULL; + descaddr = descriptor & descaddrmask; if ((descriptor & 2) && (level < 3)) { /* Table entry. The top five bits are attributes which may diff --git a/target-arm/internals.h b/target-arm/internals.h index 412827bcbf..347998c8be 100644 --- a/target-arm/internals.h +++ b/target-arm/internals.h @@ -25,6 +25,16 @@ #ifndef TARGET_ARM_INTERNALS_H #define TARGET_ARM_INTERNALS_H +/* register banks for CPU modes */ +#define BANK_USRSYS 0 +#define BANK_SVC 1 +#define BANK_ABT 2 +#define BANK_UND 3 +#define BANK_IRQ 4 +#define BANK_FIQ 5 +#define BANK_HYP 6 +#define BANK_MON 7 + static inline bool excp_is_internal(int excp) { /* Return true if this exception number represents a QEMU-internal @@ -91,9 +101,9 @@ static inline void arm_log_exception(int idx) static inline unsigned int aarch64_banked_spsr_index(unsigned int el) { static const unsigned int map[4] = { - [1] = 1, /* EL1. */ - [2] = 6, /* EL2. */ - [3] = 7, /* EL3. */ + [1] = BANK_SVC, /* EL1. */ + [2] = BANK_HYP, /* EL2. */ + [3] = BANK_MON, /* EL3. */ }; assert(el >= 1 && el <= 3); return map[el]; diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c index 3ae57a6d68..df1e2b0ebf 100644 --- a/target-arm/kvm32.c +++ b/target-arm/kvm32.c @@ -280,30 +280,30 @@ static const Reg regs[] = { COREREG(usr_regs.uregs[10], usr_regs[2]), COREREG(usr_regs.uregs[11], usr_regs[3]), COREREG(usr_regs.uregs[12], usr_regs[4]), - COREREG(usr_regs.uregs[13], banked_r13[0]), - COREREG(usr_regs.uregs[14], banked_r14[0]), + COREREG(usr_regs.uregs[13], banked_r13[BANK_USRSYS]), + COREREG(usr_regs.uregs[14], banked_r14[BANK_USRSYS]), /* R13, R14, SPSR for SVC, ABT, UND, IRQ banks */ - COREREG(svc_regs[0], banked_r13[1]), - COREREG(svc_regs[1], banked_r14[1]), - COREREG64(svc_regs[2], banked_spsr[1]), - COREREG(abt_regs[0], banked_r13[2]), - COREREG(abt_regs[1], banked_r14[2]), - COREREG64(abt_regs[2], banked_spsr[2]), - COREREG(und_regs[0], banked_r13[3]), - COREREG(und_regs[1], banked_r14[3]), - COREREG64(und_regs[2], banked_spsr[3]), - COREREG(irq_regs[0], banked_r13[4]), - COREREG(irq_regs[1], banked_r14[4]), - COREREG64(irq_regs[2], banked_spsr[4]), + COREREG(svc_regs[0], banked_r13[BANK_SVC]), + COREREG(svc_regs[1], banked_r14[BANK_SVC]), + COREREG64(svc_regs[2], banked_spsr[BANK_SVC]), + COREREG(abt_regs[0], banked_r13[BANK_ABT]), + COREREG(abt_regs[1], banked_r14[BANK_ABT]), + COREREG64(abt_regs[2], banked_spsr[BANK_ABT]), + COREREG(und_regs[0], banked_r13[BANK_UND]), + COREREG(und_regs[1], banked_r14[BANK_UND]), + COREREG64(und_regs[2], banked_spsr[BANK_UND]), + COREREG(irq_regs[0], banked_r13[BANK_IRQ]), + COREREG(irq_regs[1], banked_r14[BANK_IRQ]), + COREREG64(irq_regs[2], banked_spsr[BANK_IRQ]), /* R8_fiq .. R14_fiq and SPSR_fiq */ COREREG(fiq_regs[0], fiq_regs[0]), COREREG(fiq_regs[1], fiq_regs[1]), COREREG(fiq_regs[2], fiq_regs[2]), COREREG(fiq_regs[3], fiq_regs[3]), COREREG(fiq_regs[4], fiq_regs[4]), - COREREG(fiq_regs[5], banked_r13[5]), - COREREG(fiq_regs[6], banked_r14[5]), - COREREG64(fiq_regs[7], banked_spsr[5]), + COREREG(fiq_regs[5], banked_r13[BANK_FIQ]), + COREREG(fiq_regs[6], banked_r14[BANK_FIQ]), + COREREG64(fiq_regs[7], banked_spsr[BANK_FIQ]), /* R15 */ COREREG(usr_regs.uregs[15], regs[15]), /* VFP system registers */ diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index a4c4ebf9cd..6cd54c8f7a 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -392,9 +392,9 @@ uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno) uint32_t val; if (regno == 13) { - val = env->banked_r13[0]; + val = env->banked_r13[BANK_USRSYS]; } else if (regno == 14) { - val = env->banked_r14[0]; + val = env->banked_r14[BANK_USRSYS]; } else if (regno >= 8 && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) { val = env->usr_regs[regno - 8]; @@ -407,9 +407,9 @@ uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno) void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val) { if (regno == 13) { - env->banked_r13[0] = val; + env->banked_r13[BANK_USRSYS] = val; } else if (regno == 14) { - env->banked_r14[0] = val; + env->banked_r14[BANK_USRSYS] = val; } else if (regno >= 8 && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) { env->usr_regs[regno - 8] = val; @@ -917,7 +917,13 @@ void arm_debug_excp_handler(CPUState *cs) uint64_t pc = is_a64(env) ? env->pc : env->regs[15]; bool same_el = (arm_debug_target_el(env) == arm_current_el(env)); - if (cpu_breakpoint_test(cs, pc, BP_GDB)) { + /* (1) GDB breakpoints should be handled first. + * (2) Do not raise a CPU exception if no CPU breakpoint has fired, + * since singlestep is also done by generating a debug internal + * exception. + */ + if (cpu_breakpoint_test(cs, pc, BP_GDB) + || !cpu_breakpoint_test(cs, pc, BP_CPU)) { return; } diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 19f9d8d2c8..14e8131b05 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -126,6 +126,8 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f, CPUARMState *env = &cpu->env; uint32_t psr = pstate_read(env); int i; + int el = arm_current_el(env); + const char *ns_status; cpu_fprintf(f, "PC=%016"PRIx64" SP=%016"PRIx64"\n", env->pc, env->xregs[31]); @@ -137,13 +139,22 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f, cpu_fprintf(f, " "); } } - cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n", + + if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) { + ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S "; + } else { + ns_status = ""; + } + + cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n", psr, psr & PSTATE_N ? 'N' : '-', psr & PSTATE_Z ? 'Z' : '-', psr & PSTATE_C ? 'C' : '-', - psr & PSTATE_V ? 'V' : '-'); - cpu_fprintf(f, "\n"); + psr & PSTATE_V ? 'V' : '-', + ns_status, + el, + psr & PSTATE_SP ? 'h' : 't'); if (flags & CPU_DUMP_FPU) { int numvfpregs = 32; @@ -1805,9 +1816,6 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, * o2: 0 -> exclusive, 1 -> not * o1: 0 -> single register, 1 -> register pair * o0: 1 -> load-acquire/store-release, 0 -> not - * - * o0 == 0 AND o2 == 1 is un-allocated - * o1 == 1 is un-allocated except for 32 and 64 bit sizes */ static void disas_ldst_excl(DisasContext *s, uint32_t insn) { @@ -1822,7 +1830,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) int size = extract32(insn, 30, 2); TCGv_i64 tcg_addr; - if ((!is_excl && !is_lasr) || + if ((!is_excl && !is_pair && !is_lasr) || + (!is_excl && is_pair) || (is_pair && size < 2)) { unallocated_encoding(s); return; @@ -1851,15 +1860,6 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } else { do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false); } - if (is_pair) { - TCGv_i64 tcg_rt2 = cpu_reg(s, rt); - tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size); - if (is_store) { - do_gpr_st(s, tcg_rt2, tcg_addr, size); - } else { - do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false); - } - } } } @@ -11091,13 +11091,17 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) QTAILQ_FOREACH(bp, &cs->breakpoints, entry) { if (bp->pc == dc->pc) { if (bp->flags & BP_CPU) { + gen_a64_set_pc_im(dc->pc); gen_helper_check_breakpoints(cpu_env); /* End the TB early; it likely won't be executed */ dc->is_jmp = DISAS_UPDATE; } else { gen_exception_internal_insn(dc, 0, EXCP_DEBUG); - /* Advance PC so that clearing the breakpoint will - invalidate this TB. */ + /* The address covered by the breakpoint must be + included in [tb->pc, tb->pc + tb->size) in order + to for it to be properly cleared -- thus we + increment the PC here so that the logic setting + tb->size below does the right thing. */ dc->pc += 4; goto done_generating; } diff --git a/target-arm/translate.c b/target-arm/translate.c index 6be2c728f0..5d22879755 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -870,7 +870,7 @@ static inline void gen_bx_im(DisasContext *s, uint32_t addr) { TCGv_i32 tmp; - s->is_jmp = DISAS_UPDATE; + s->is_jmp = DISAS_JUMP; if (s->thumb != (addr & 1)) { tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, addr & 1); @@ -883,7 +883,7 @@ static inline void gen_bx_im(DisasContext *s, uint32_t addr) /* Set PC and Thumb state from var. var is marked as dead. */ static inline void gen_bx(DisasContext *s, TCGv_i32 var) { - s->is_jmp = DISAS_UPDATE; + s->is_jmp = DISAS_JUMP; tcg_gen_andi_i32(cpu_R[15], var, ~1); tcg_gen_andi_i32(var, var, 1); store_cpu_field(var, thumb); @@ -1062,7 +1062,7 @@ static void gen_exception_insn(DisasContext *s, int offset, int excp, static inline void gen_lookup_tb(DisasContext *s) { tcg_gen_movi_i32(cpu_R[15], s->pc & ~1); - s->is_jmp = DISAS_UPDATE; + s->is_jmp = DISAS_JUMP; } static inline void gen_add_data_offset(DisasContext *s, unsigned int insn, @@ -4096,7 +4096,7 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc) tmp = load_cpu_field(spsr); gen_set_cpsr(tmp, CPSR_ERET_MASK); tcg_temp_free_i32(tmp); - s->is_jmp = DISAS_UPDATE; + s->is_jmp = DISAS_JUMP; } /* Generate a v6 exception return. Marks both values as dead. */ @@ -4105,7 +4105,7 @@ static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr) gen_set_cpsr(cpsr, CPSR_ERET_MASK); tcg_temp_free_i32(cpsr); store_reg(s, 15, pc); - s->is_jmp = DISAS_UPDATE; + s->is_jmp = DISAS_JUMP; } static void gen_nop_hint(DisasContext *s, int val) @@ -7210,6 +7210,7 @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn) break; } + gen_set_condexec(s); gen_set_pc_im(s, s->pc - 4); tmpptr = tcg_const_ptr(ri); tcg_syn = tcg_const_i32(syndrome); @@ -9035,7 +9036,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) tmp = load_cpu_field(spsr); gen_set_cpsr(tmp, CPSR_ERET_MASK); tcg_temp_free_i32(tmp); - s->is_jmp = DISAS_UPDATE; + s->is_jmp = DISAS_JUMP; } } break; @@ -11355,7 +11356,7 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) /* We always get here via a jump, so know we are not in a conditional execution block. */ gen_exception_internal(EXCP_KERNEL_TRAP); - dc->is_jmp = DISAS_UPDATE; + dc->is_jmp = DISAS_EXC; break; } #else @@ -11363,7 +11364,7 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) /* We always get here via a jump, so know we are not in a conditional execution block. */ gen_exception_internal(EXCP_EXCEPTION_EXIT); - dc->is_jmp = DISAS_UPDATE; + dc->is_jmp = DISAS_EXC; break; } #endif @@ -11373,13 +11374,18 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) QTAILQ_FOREACH(bp, &cs->breakpoints, entry) { if (bp->pc == dc->pc) { if (bp->flags & BP_CPU) { + gen_set_condexec(dc); + gen_set_pc_im(dc, dc->pc); gen_helper_check_breakpoints(cpu_env); /* End the TB early; it's likely not going to be executed */ dc->is_jmp = DISAS_UPDATE; } else { gen_exception_internal_insn(dc, 0, EXCP_DEBUG); - /* Advance PC so that clearing the breakpoint will - invalidate this TB. */ + /* The address covered by the breakpoint must be + included in [tb->pc, tb->pc + tb->size) in order + to for it to be properly cleared -- thus we + increment the PC here so that the logic setting + tb->size below does the right thing. */ /* TODO: Advance PC by correct instruction length to * avoid disassembler error messages */ dc->pc += 2; @@ -11494,7 +11500,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) } gen_set_label(dc->condlabel); } - if (dc->condjmp || !dc->is_jmp) { + if (dc->condjmp || dc->is_jmp == DISAS_NEXT || + dc->is_jmp == DISAS_UPDATE) { gen_set_pc_im(dc, dc->pc); dc->condjmp = 0; } @@ -11530,9 +11537,11 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) case DISAS_NEXT: gen_goto_tb(dc, 1, dc->pc); break; - default: - case DISAS_JUMP: case DISAS_UPDATE: + gen_set_pc_im(dc, dc->pc); + /* fall through */ + case DISAS_JUMP: + default: /* indicate that the hash table must be used to find the next TB */ tcg_gen_exit_tb(0); break; @@ -11599,6 +11608,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, CPUARMState *env = &cpu->env; int i; uint32_t psr; + const char *ns_status; if (is_a64(env)) { aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags); @@ -11613,13 +11623,22 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, cpu_fprintf(f, " "); } psr = cpsr_read(env); - cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%d\n", + + if (arm_feature(env, ARM_FEATURE_EL3) && + (psr & CPSR_M) != ARM_CPU_MODE_MON) { + ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S "; + } else { + ns_status = ""; + } + + cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%s%d\n", psr, psr & (1 << 31) ? 'N' : '-', psr & (1 << 30) ? 'Z' : '-', psr & (1 << 29) ? 'C' : '-', psr & (1 << 28) ? 'V' : '-', psr & CPSR_T ? 'T' : 'A', + ns_status, cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26); if (flags & CPU_DUMP_FPU) { diff --git a/target-cris/translate.c b/target-cris/translate.c index 964845c461..2d710cc108 100644 --- a/target-cris/translate.c +++ b/target-cris/translate.c @@ -3166,6 +3166,11 @@ void gen_intermediate_code(CPUCRISState *env, struct TranslationBlock *tb) tcg_gen_movi_tl(env_pc, dc->pc); t_gen_raise_exception(EXCP_DEBUG); dc->is_jmp = DISAS_UPDATE; + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + dc->pc += 2; break; } diff --git a/target-i386/cpu.c b/target-i386/cpu.c index c1a9e09743..11e5e39a75 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -259,8 +259,8 @@ static const char *svm_feature_name[] = { static const char *cpuid_7_0_ebx_feature_name[] = { "fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep", "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL, - "avx512f", NULL, "rdseed", "adx", "smap", NULL, NULL, NULL, - NULL, NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL, + "avx512f", NULL, "rdseed", "adx", "smap", NULL, "pcommit", "clflushopt", + "clwb", NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL, }; static const char *cpuid_apm_edx_feature_name[] = { @@ -345,7 +345,9 @@ static const char *cpuid_6_feature_name[] = { #define TCG_SVM_FEATURES 0 #define TCG_KVM_FEATURES 0 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP | \ - CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX) + CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX | \ + CPUID_7_0_EBX_PCOMMIT | CPUID_7_0_EBX_CLFLUSHOPT | \ + CPUID_7_0_EBX_CLWB) /* missing: CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2, CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, @@ -671,12 +673,11 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | CPUID_PSE36, .features[FEAT_1_ECX] = - CPUID_EXT_SSE3 | CPUID_EXT_CX16 | CPUID_EXT_POPCNT, + CPUID_EXT_SSE3 | CPUID_EXT_CX16, .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | - CPUID_EXT3_ABM | CPUID_EXT3_SSE4A, + CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, .xlevel = 0x8000000A, }, { @@ -772,7 +773,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .features[FEAT_1_EDX] = PPRO_FEATURES, .features[FEAT_1_ECX] = - CPUID_EXT_SSE3 | CPUID_EXT_POPCNT, + CPUID_EXT_SSE3, .xlevel = 0x80000004, }, { @@ -1243,8 +1244,9 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_DE | CPUID_FP87, .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, + /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_FXSR | + CPUID_EXT2_LM | CPUID_EXT2_FXSR | CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE | CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | @@ -1272,8 +1274,9 @@ static X86CPUDefinition builtin_x86_defs[] = { .features[FEAT_1_ECX] = CPUID_EXT_POPCNT | CPUID_EXT_CX16 | CPUID_EXT_MONITOR | CPUID_EXT_SSE3, + /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_FXSR | + CPUID_EXT2_LM | CPUID_EXT2_FXSR | CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE | CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | @@ -1304,8 +1307,9 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, + /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | + CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_FXSR | CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE | @@ -1339,8 +1343,9 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, + /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | + CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_FXSR | CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE | @@ -2243,7 +2248,7 @@ void x86_cpudef_setup(void) pstrcpy(def->model_id, sizeof(def->model_id), "QEMU Virtual CPU version "); pstrcat(def->model_id, sizeof(def->model_id), - qemu_get_version()); + qemu_hw_version()); break; } } @@ -3141,7 +3146,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-reset", X86CPU, hyperv_reset, false), DEFINE_PROP_BOOL("hv-vpindex", X86CPU, hyperv_vpindex, false), DEFINE_PROP_BOOL("hv-runtime", X86CPU, hyperv_runtime, false), - DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, false), + DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, 0), diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 62f78798b6..84edfd0d8a 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -286,6 +286,8 @@ #define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P) #define MCE_BANKS_DEF 10 +#define MCG_CAP_BANKS_MASK 0xff + #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ @@ -576,6 +578,9 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EBX_RDSEED (1U << 18) #define CPUID_7_0_EBX_ADX (1U << 19) #define CPUID_7_0_EBX_SMAP (1U << 20) +#define CPUID_7_0_EBX_PCOMMIT (1U << 22) /* Persistent Commit */ +#define CPUID_7_0_EBX_CLFLUSHOPT (1U << 23) /* Flush a Cache Line Optimized */ +#define CPUID_7_0_EBX_CLWB (1U << 24) /* Cache Line Write Back */ #define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */ #define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */ #define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */ diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 64046cb69d..6dc9846398 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -111,6 +111,51 @@ bool kvm_allows_irq0_override(void) return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); } +static int kvm_get_tsc(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[1]; + } msr_data; + int ret; + + if (env->tsc_valid) { + return 0; + } + + msr_data.info.nmsrs = 1; + msr_data.entries[0].index = MSR_IA32_TSC; + env->tsc_valid = !runstate_is_running(); + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + if (ret < 0) { + return ret; + } + + env->tsc = msr_data.entries[0].data; + return 0; +} + +static inline void do_kvm_synchronize_tsc(void *arg) +{ + CPUState *cpu = arg; + + kvm_get_tsc(cpu); +} + +void kvm_synchronize_all_tsc(void) +{ + CPUState *cpu; + + if (kvm_enabled()) { + CPU_FOREACH(cpu) { + run_on_cpu(cpu, do_kvm_synchronize_tsc, cpu); + } + } +} + static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) { struct kvm_cpuid2 *cpuid; @@ -729,7 +774,7 @@ int kvm_arch_init_vcpu(CPUState *cs) && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == (CPUID_MCE | CPUID_MCA) && kvm_check_extension(cs->kvm_state, KVM_CAP_MCE) > 0) { - uint64_t mcg_cap; + uint64_t mcg_cap, unsupported_caps; int banks; int ret; @@ -739,18 +784,24 @@ int kvm_arch_init_vcpu(CPUState *cs) return ret; } - if (banks > MCE_BANKS_DEF) { - banks = MCE_BANKS_DEF; + if (banks < (env->mcg_cap & MCG_CAP_BANKS_MASK)) { + error_report("kvm: Unsupported MCE bank count (QEMU = %d, KVM = %d)", + (int)(env->mcg_cap & MCG_CAP_BANKS_MASK), banks); + return -ENOTSUP; } - mcg_cap &= MCE_CAP_DEF; - mcg_cap |= banks; - ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &mcg_cap); + + unsupported_caps = env->mcg_cap & ~(mcg_cap | MCG_CAP_BANKS_MASK); + if (unsupported_caps) { + error_report("warning: Unsupported MCG_CAP bits: 0x%" PRIx64, + unsupported_caps); + } + + env->mcg_cap &= mcg_cap | MCG_CAP_BANKS_MASK; + ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &env->mcg_cap); if (ret < 0) { fprintf(stderr, "KVM_X86_SETUP_MCE: %s", strerror(-ret)); return ret; } - - env->mcg_cap = mcg_cap; } qemu_add_vm_change_state_handler(cpu_update_state, env); diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h index e557e94f44..c1b312ba2a 100644 --- a/target-i386/kvm_i386.h +++ b/target-i386/kvm_i386.h @@ -15,6 +15,7 @@ bool kvm_allows_irq0_override(void); bool kvm_has_smm(void); +void kvm_synchronize_all_tsc(void); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_do_init_vcpu(X86CPU *cs); diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h index 7aa693aee9..1780d1d791 100644 --- a/target-i386/ops_sse.h +++ b/target-i386/ops_sse.h @@ -2037,10 +2037,10 @@ static inline unsigned pcmpxstrx(CPUX86State *env, Reg *d, Reg *s, } break; case 3: - for (j = valids - validd; j >= 0; j--) { + for (j = valids; j >= 0; j--) { res <<= 1; v = 1; - for (i = MIN(upper - j, validd); i >= 0; i--) { + for (i = MIN(valids - j, validd); i >= 0; i--) { v &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i)); } res |= v; diff --git a/target-i386/translate.c b/target-i386/translate.c index 764b1e44b7..a3dd167a9b 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -2432,7 +2432,7 @@ static void gen_pusha(DisasContext *s) { int i; gen_op_movl_A0_reg(R_ESP); - gen_op_addl_A0_im(-8 << s->dflag); + gen_op_addl_A0_im(-(8 << s->dflag)); if (!s->ss32) tcg_gen_ext16u_tl(cpu_A0, cpu_A0); tcg_gen_mov_tl(cpu_T[1], cpu_A0); @@ -3848,8 +3848,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; #ifdef TARGET_X86_64 case MO_64: - tcg_gen_mulu2_i64(cpu_regs[s->vex_v], cpu_regs[reg], + tcg_gen_mulu2_i64(cpu_T[0], cpu_T[1], cpu_T[0], cpu_regs[R_EDX]); + tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T[0]); + tcg_gen_mov_i64(cpu_regs[reg], cpu_T[1]); break; #endif } @@ -7716,20 +7718,43 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 5: /* lfence */ - case 6: /* mfence */ if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE2)) goto illegal_op; break; - case 7: /* sfence / clflush */ - if ((modrm & 0xc7) == 0xc0) { - /* sfence */ - /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */ - if (!(s->cpuid_features & CPUID_SSE)) + case 6: /* mfence/clwb */ + if (s->prefix & PREFIX_DATA) { + /* clwb */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) goto illegal_op; + gen_nop_modrm(env, s, modrm); } else { - /* clflush */ - if (!(s->cpuid_features & CPUID_CLFLUSH)) + /* mfence */ + if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE2)) goto illegal_op; + } + break; + case 7: /* sfence / clflush / clflushopt / pcommit */ + if ((modrm & 0xc7) == 0xc0) { + if (s->prefix & PREFIX_DATA) { + /* pcommit */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)) + goto illegal_op; + } else { + /* sfence */ + /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */ + if (!(s->cpuid_features & CPUID_SSE)) + goto illegal_op; + } + } else { + if (s->prefix & PREFIX_DATA) { + /* clflushopt */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) + goto illegal_op; + } else { + /* clflush */ + if (!(s->cpuid_features & CPUID_CLFLUSH)) + goto illegal_op; + } gen_lea_modrm(env, s, modrm); } break; @@ -7962,6 +7987,11 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb) tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY))) { gen_debug(dc, pc_ptr - dc->cs_base); + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + pc_ptr += 1; goto done_generating; } if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) { diff --git a/target-lm32/translate.c b/target-lm32/translate.c index c61ad0f9ab..fa5b0b93a3 100644 --- a/target-lm32/translate.c +++ b/target-lm32/translate.c @@ -1078,6 +1078,11 @@ void gen_intermediate_code(CPULM32State *env, struct TranslationBlock *tb) tcg_gen_movi_tl(cpu_pc, dc->pc); t_gen_raise_exception(dc, EXCP_DEBUG); dc->is_jmp = DISAS_UPDATE; + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + dc->pc += 4; break; } diff --git a/target-m68k/translate.c b/target-m68k/translate.c index 5995ccea92..41ae2c6059 100644 --- a/target-m68k/translate.c +++ b/target-m68k/translate.c @@ -3004,6 +3004,11 @@ void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb) if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) { gen_exception(dc, dc->pc, EXCP_DEBUG); dc->is_jmp = DISAS_JUMP; + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + dc->pc += 2; break; } diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c index a9c501099c..154b9d6836 100644 --- a/target-microblaze/translate.c +++ b/target-microblaze/translate.c @@ -1693,6 +1693,11 @@ void gen_intermediate_code(CPUMBState *env, struct TranslationBlock *tb) if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) { t_gen_raise_exception(dc, EXCP_DEBUG); dc->is_jmp = DISAS_UPDATE; + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + dc->pc += 4; break; } diff --git a/target-mips/cpu.c b/target-mips/cpu.c index 37880d20e0..639a24b362 100644 --- a/target-mips/cpu.c +++ b/target-mips/cpu.c @@ -53,12 +53,15 @@ static bool mips_cpu_has_work(CPUState *cs) CPUMIPSState *env = &cpu->env; bool has_work = false; - /* It is implementation dependent if non-enabled interrupts - wake-up the CPU, however most of the implementations only + /* Prior to MIPS Release 6 it is implementation dependent if non-enabled + interrupts wake-up the CPU, however most of the implementations only check for interrupts that can be taken. */ if ((cs->interrupt_request & CPU_INTERRUPT_HARD) && cpu_mips_hw_interrupts_pending(env)) { - has_work = true; + if (cpu_mips_hw_interrupts_enabled(env) || + (env->insn_flags & ISA_MIPS32R6)) { + has_work = true; + } } /* MIPS-MT has the ability to halt the CPU. */ diff --git a/target-mips/cpu.h b/target-mips/cpu.h index f32a0fd737..89c01f7a38 100644 --- a/target-mips/cpu.h +++ b/target-mips/cpu.h @@ -469,6 +469,7 @@ struct CPUMIPSState { #define CP0C5_CV 29 #define CP0C5_EVA 28 #define CP0C5_MSAEn 27 +#define CP0C5_XNP 13 #define CP0C5_UFE 9 #define CP0C5_FRE 8 #define CP0C5_SBRI 6 @@ -637,23 +638,24 @@ static inline int cpu_mmu_index (CPUMIPSState *env, bool ifetch) return env->hflags & MIPS_HFLAG_KSU; } -static inline int cpu_mips_hw_interrupts_pending(CPUMIPSState *env) +static inline bool cpu_mips_hw_interrupts_enabled(CPUMIPSState *env) { - int32_t pending; - int32_t status; - int r; - - if (!(env->CP0_Status & (1 << CP0St_IE)) || - (env->CP0_Status & (1 << CP0St_EXL)) || - (env->CP0_Status & (1 << CP0St_ERL)) || + return (env->CP0_Status & (1 << CP0St_IE)) && + !(env->CP0_Status & (1 << CP0St_EXL)) && + !(env->CP0_Status & (1 << CP0St_ERL)) && + !(env->hflags & MIPS_HFLAG_DM) && /* Note that the TCStatus IXMT field is initialized to zero, and only MT capable cores can set it to one. So we don't need to check for MT capabilities here. */ - (env->active_tc.CP0_TCStatus & (1 << CP0TCSt_IXMT)) || - (env->hflags & MIPS_HFLAG_DM)) { - /* Interrupts are disabled */ - return 0; - } + !(env->active_tc.CP0_TCStatus & (1 << CP0TCSt_IXMT)); +} + +/* Check if there is pending and not masked out interrupt */ +static inline bool cpu_mips_hw_interrupts_pending(CPUMIPSState *env) +{ + int32_t pending; + int32_t status; + bool r; pending = env->CP0_Cause & CP0Ca_IP_mask; status = env->CP0_Status & CP0Ca_IP_mask; @@ -667,7 +669,7 @@ static inline int cpu_mips_hw_interrupts_pending(CPUMIPSState *env) /* A MIPS configured with compatibility or VInt (Vectored Interrupts) treats the pending lines as individual interrupt lines, the status lines are individual masks. */ - r = pending & status; + r = (pending & status) != 0; } return r; } @@ -959,6 +961,15 @@ static inline void compute_hflags(CPUMIPSState *env) } #ifndef CONFIG_USER_ONLY +static inline void cpu_mips_tlb_flush(CPUMIPSState *env, int flush_global) +{ + MIPSCPU *cpu = mips_env_get_cpu(env); + + /* Flush qemu's TLB and discard all shadowed entries. */ + tlb_flush(CPU(cpu), flush_global); + env->tlb->tlb_in_use = env->tlb->nb_tlb; +} + /* Called for updates to CP0_Status. */ static inline void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc) { @@ -997,17 +1008,29 @@ static inline void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc) static inline void cpu_mips_store_status(CPUMIPSState *env, target_ulong val) { uint32_t mask = env->CP0_Status_rw_bitmask; + target_ulong old = env->CP0_Status; if (env->insn_flags & ISA_MIPS32R6) { bool has_supervisor = extract32(mask, CP0St_KSU, 2) == 0x3; - +#if defined(TARGET_MIPS64) + uint32_t ksux = (1 << CP0St_KX) & val; + ksux |= (ksux >> 1) & val; /* KX = 0 forces SX to be 0 */ + ksux |= (ksux >> 1) & val; /* SX = 0 forces UX to be 0 */ + val = (val & ~(7 << CP0St_UX)) | ksux; +#endif if (has_supervisor && extract32(val, CP0St_KSU, 2) == 0x3) { mask &= ~(3 << CP0St_KSU); } mask &= ~(((1 << CP0St_SR) | (1 << CP0St_NMI)) & val); } - env->CP0_Status = (env->CP0_Status & ~mask) | (val & mask); + env->CP0_Status = (old & ~mask) | (val & mask); +#if defined(TARGET_MIPS64) + if ((env->CP0_Status ^ old) & (old & (7 << CP0St_UX))) { + /* Access to at least one of the 64-bit segments has been disabled */ + cpu_mips_tlb_flush(env, 1); + } +#endif if (env->CP0_Config3 & (1 << CP0C3_MT)) { sync_c0_status(env, env, env->current_tc); } else { diff --git a/target-mips/helper.c b/target-mips/helper.c index 01c4461573..118072a9e7 100644 --- a/target-mips/helper.c +++ b/target-mips/helper.c @@ -293,9 +293,10 @@ static void raise_mmu_exception(CPUMIPSState *env, target_ulong address, (env->CP0_EntryHi & 0xFF) | (address & (TARGET_PAGE_MASK << 1)); #if defined(TARGET_MIPS64) env->CP0_EntryHi &= env->SEGMask; - env->CP0_XContext = (env->CP0_XContext & ((~0ULL) << (env->SEGBITS - 7))) | - ((address & 0xC00000000000ULL) >> (55 - env->SEGBITS)) | - ((address & ((1ULL << env->SEGBITS) - 1) & 0xFFFFFFFFFFFFE000ULL) >> 9); + env->CP0_XContext = + /* PTEBase */ (env->CP0_XContext & ((~0ULL) << (env->SEGBITS - 7))) | + /* R */ (extract64(address, 62, 2) << (env->SEGBITS - 9)) | + /* BadVPN2 */ (extract64(address, 13, env->SEGBITS - 13) << 4); #endif cs->exception_index = exception; env->error_code = error_code; @@ -523,6 +524,10 @@ void mips_cpu_do_interrupt(CPUState *cs) enter_debug_mode: if (env->insn_flags & ISA_MIPS3) { env->hflags |= MIPS_HFLAG_64; + if (!(env->insn_flags & ISA_MIPS64R6) || + env->CP0_Status & (1 << CP0St_KX)) { + env->hflags &= ~MIPS_HFLAG_AWRAP; + } } env->hflags |= MIPS_HFLAG_DM | MIPS_HFLAG_CP0; env->hflags &= ~(MIPS_HFLAG_KSU); @@ -547,6 +552,10 @@ void mips_cpu_do_interrupt(CPUState *cs) env->CP0_Status |= (1 << CP0St_ERL) | (1 << CP0St_BEV); if (env->insn_flags & ISA_MIPS3) { env->hflags |= MIPS_HFLAG_64; + if (!(env->insn_flags & ISA_MIPS64R6) || + env->CP0_Status & (1 << CP0St_KX)) { + env->hflags &= ~MIPS_HFLAG_AWRAP; + } } env->hflags |= MIPS_HFLAG_CP0; env->hflags &= ~(MIPS_HFLAG_KSU); @@ -724,6 +733,10 @@ void mips_cpu_do_interrupt(CPUState *cs) env->CP0_Status |= (1 << CP0St_EXL); if (env->insn_flags & ISA_MIPS3) { env->hflags |= MIPS_HFLAG_64; + if (!(env->insn_flags & ISA_MIPS64R6) || + env->CP0_Status & (1 << CP0St_KX)) { + env->hflags &= ~MIPS_HFLAG_AWRAP; + } } env->hflags |= MIPS_HFLAG_CP0; env->hflags &= ~(MIPS_HFLAG_KSU); @@ -759,7 +772,8 @@ bool mips_cpu_exec_interrupt(CPUState *cs, int interrupt_request) MIPSCPU *cpu = MIPS_CPU(cs); CPUMIPSState *env = &cpu->env; - if (cpu_mips_hw_interrupts_pending(env)) { + if (cpu_mips_hw_interrupts_enabled(env) && + cpu_mips_hw_interrupts_pending(env)) { /* Raise it */ cs->exception_index = EXCP_EXT_INTERRUPT; env->error_code = 0; diff --git a/target-mips/helper.h b/target-mips/helper.h index d8cc766bdf..95b9149d89 100644 --- a/target-mips/helper.h +++ b/target-mips/helper.h @@ -358,6 +358,8 @@ DEF_HELPER_1(rdhwr_cpunum, tl, env) DEF_HELPER_1(rdhwr_synci_step, tl, env) DEF_HELPER_1(rdhwr_cc, tl, env) DEF_HELPER_1(rdhwr_ccres, tl, env) +DEF_HELPER_1(rdhwr_performance, tl, env) +DEF_HELPER_1(rdhwr_xnp, tl, env) DEF_HELPER_2(pmon, void, env, int) DEF_HELPER_1(wait, void, env) diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c index 6739fff216..d2c98c9688 100644 --- a/target-mips/op_helper.c +++ b/target-mips/op_helper.c @@ -23,10 +23,6 @@ #include "exec/cpu_ldst.h" #include "sysemu/kvm.h" -#ifndef CONFIG_USER_ONLY -static inline void cpu_mips_tlb_flush (CPUMIPSState *env, int flush_global); -#endif - /*****************************************************************************/ /* Exceptions processing helpers */ @@ -1357,6 +1353,13 @@ void helper_mtc0_hwrena(CPUMIPSState *env, target_ulong arg1) { uint32_t mask = 0x0000000F; + if ((env->CP0_Config1 & (1 << CP0C1_PC)) && + (env->insn_flags & ISA_MIPS32R6)) { + mask |= (1 << 4); + } + if (env->insn_flags & ISA_MIPS32R6) { + mask |= (1 << 5); + } if (env->CP0_Config3 & (1 << CP0C3_ULRI)) { mask |= (1 << 29); @@ -1839,15 +1842,6 @@ target_ulong helper_yield(CPUMIPSState *env, target_ulong arg) #ifndef CONFIG_USER_ONLY /* TLB management */ -static void cpu_mips_tlb_flush (CPUMIPSState *env, int flush_global) -{ - MIPSCPU *cpu = mips_env_get_cpu(env); - - /* Flush qemu's TLB and discard all shadowed entries. */ - tlb_flush(CPU(cpu), flush_global); - env->tlb->tlb_in_use = env->tlb->nb_tlb; -} - static void r4k_mips_tlb_flush_extra (CPUMIPSState *env, int first) { /* Discard entries from env->tlb[first] onwards. */ @@ -2185,53 +2179,52 @@ void helper_deret(CPUMIPSState *env) } #endif /* !CONFIG_USER_ONLY */ -target_ulong helper_rdhwr_cpunum(CPUMIPSState *env) +static inline void check_hwrena(CPUMIPSState *env, int reg) { - if ((env->hflags & MIPS_HFLAG_CP0) || - (env->CP0_HWREna & (1 << 0))) - return env->CP0_EBase & 0x3ff; - else - do_raise_exception(env, EXCP_RI, GETPC()); + if ((env->hflags & MIPS_HFLAG_CP0) || (env->CP0_HWREna & (1 << reg))) { + return; + } + do_raise_exception(env, EXCP_RI, GETPC()); +} - return 0; +target_ulong helper_rdhwr_cpunum(CPUMIPSState *env) +{ + check_hwrena(env, 0); + return env->CP0_EBase & 0x3ff; } target_ulong helper_rdhwr_synci_step(CPUMIPSState *env) { - if ((env->hflags & MIPS_HFLAG_CP0) || - (env->CP0_HWREna & (1 << 1))) - return env->SYNCI_Step; - else - do_raise_exception(env, EXCP_RI, GETPC()); - - return 0; + check_hwrena(env, 1); + return env->SYNCI_Step; } target_ulong helper_rdhwr_cc(CPUMIPSState *env) { - if ((env->hflags & MIPS_HFLAG_CP0) || - (env->CP0_HWREna & (1 << 2))) { + check_hwrena(env, 2); #ifdef CONFIG_USER_ONLY - return env->CP0_Count; + return env->CP0_Count; #else - return (int32_t)cpu_mips_get_count(env); + return (int32_t)cpu_mips_get_count(env); #endif - } else { - do_raise_exception(env, EXCP_RI, GETPC()); - } - - return 0; } target_ulong helper_rdhwr_ccres(CPUMIPSState *env) { - if ((env->hflags & MIPS_HFLAG_CP0) || - (env->CP0_HWREna & (1 << 3))) - return env->CCRes; - else - do_raise_exception(env, EXCP_RI, GETPC()); + check_hwrena(env, 3); + return env->CCRes; +} - return 0; +target_ulong helper_rdhwr_performance(CPUMIPSState *env) +{ + check_hwrena(env, 4); + return env->CP0_Performance0; +} + +target_ulong helper_rdhwr_xnp(CPUMIPSState *env) +{ + check_hwrena(env, 5); + return (env->CP0_Config5 >> CP0C5_XNP) & 1; } void helper_pmon(CPUMIPSState *env, int function) diff --git a/target-mips/translate.c b/target-mips/translate.c index 897839ced9..56266471c1 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -323,6 +323,7 @@ enum { OPC_TLTIU = (0x0B << 16) | OPC_REGIMM, OPC_TEQI = (0x0C << 16) | OPC_REGIMM, OPC_TNEI = (0x0E << 16) | OPC_REGIMM, + OPC_SIGRIE = (0x17 << 16) | OPC_REGIMM, OPC_SYNCI = (0x1F << 16) | OPC_REGIMM, OPC_DAHI = (0x06 << 16) | OPC_REGIMM, @@ -10333,7 +10334,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc, } } -static void gen_rdhwr(DisasContext *ctx, int rt, int rd) +static void gen_rdhwr(DisasContext *ctx, int rt, int rd, int sel) { TCGv t0; @@ -10361,6 +10362,22 @@ static void gen_rdhwr(DisasContext *ctx, int rt, int rd) gen_helper_rdhwr_ccres(t0, cpu_env); gen_store_gpr(t0, rt); break; + case 4: + check_insn(ctx, ISA_MIPS32R6); + if (sel != 0) { + /* Performance counter registers are not implemented other than + * control register 0. + */ + generate_exception(ctx, EXCP_RI); + } + gen_helper_rdhwr_performance(t0, cpu_env); + gen_store_gpr(t0, rt); + break; + case 5: + check_insn(ctx, ISA_MIPS32R6); + gen_helper_rdhwr_xnp(t0, cpu_env); + gen_store_gpr(t0, rt); + break; case 29: #if defined(CONFIG_USER_ONLY) tcg_gen_ld_tl(t0, cpu_env, @@ -11979,6 +11996,7 @@ enum { ROTR = 0x3, SELEQZ = 0x5, SELNEZ = 0x6, + R6_RDHWR = 0x7, SLLV = 0x0, SRLV = 0x1, @@ -12009,11 +12027,13 @@ enum { MODU = 0x7, /* The following can be distinguished by their lower 6 bits. */ + BREAK32 = 0x07, INS = 0x0c, LSA = 0x0f, ALIGN = 0x1f, EXT = 0x2c, - POOL32AXF = 0x3c + POOL32AXF = 0x3c, + SIGRIE = 0x3f }; /* POOL32AXF encoding of minor opcode field extension */ @@ -12931,7 +12951,8 @@ static void gen_pool32axf (CPUMIPSState *env, DisasContext *ctx, int rt, int rs) gen_cl(ctx, mips32_op, rt, rs); break; case RDHWR: - gen_rdhwr(ctx, rt, rs); + check_insn_opc_removed(ctx, ISA_MIPS32R6); + gen_rdhwr(ctx, rt, rs, 0); break; case WSBH: gen_bshfl(ctx, OPC_WSBH, rs, rt); @@ -13486,6 +13507,10 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx) check_insn(ctx, ISA_MIPS32R6); gen_cond_move(ctx, OPC_SELNEZ, rd, rs, rt); break; + case R6_RDHWR: + check_insn(ctx, ISA_MIPS32R6); + gen_rdhwr(ctx, rt, rs, extract32(ctx->opcode, 11, 3)); + break; default: goto pool32a_invalid; } @@ -13629,9 +13654,13 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx) case POOL32AXF: gen_pool32axf(env, ctx, rt, rs); break; - case 0x07: + case BREAK32: generate_exception_end(ctx, EXCP_BREAK); break; + case SIGRIE: + check_insn(ctx, ISA_MIPS32R6); + generate_exception_end(ctx, EXCP_RI); + break; default: pool32a_invalid: MIPS_INVAL("pool32a"); @@ -17732,7 +17761,7 @@ static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx) break; #endif case OPC_RDHWR: - gen_rdhwr(ctx, rt, rd); + gen_rdhwr(ctx, rt, rd, extract32(ctx->opcode, 6, 3)); break; case OPC_FORK: check_insn(ctx, ASE_MT); @@ -18950,6 +18979,10 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) check_insn_opc_removed(ctx, ISA_MIPS32R6); gen_trap(ctx, op1, rs, -1, imm); break; + case OPC_SIGRIE: + check_insn(ctx, ISA_MIPS32R6); + generate_exception_end(ctx, EXCP_RI); + break; case OPC_SYNCI: check_insn(ctx, ISA_MIPS32R2); /* Break the TB to be able to sync copied instructions @@ -19594,8 +19627,10 @@ void gen_intermediate_code(CPUMIPSState *env, struct TranslationBlock *tb) save_cpu_state(&ctx, 1); ctx.bstate = BS_BRANCH; gen_helper_raise_exception_debug(cpu_env); - /* Include the breakpoint location or the tb won't - * be flushed when it must be. */ + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ ctx.pc += 4; goto done_generating; } diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c index 1b45884e9b..bb33c7cfeb 100644 --- a/target-mips/translate_init.c +++ b/target-mips/translate_init.c @@ -447,7 +447,7 @@ static const mips_def_t mips_defs[] = (1 << CP0C3_RXI) | (1U << CP0C3_M), .CP0_Config4 = MIPS_CONFIG4 | (0xfc << CP0C4_KScrExist) | (3 << CP0C4_IE) | (1U << CP0C4_M), - .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_LLB), + .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_LLB), .CP0_Config5_rw_bitmask = (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) | (1 << CP0C5_UFE), .CP0_LLAddr_rw_bitmask = 0, @@ -665,7 +665,7 @@ static const mips_def_t mips_defs[] = (1 << CP0C3_RXI) | (1 << CP0C3_LPA), .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) | (0xfc << CP0C4_KScrExist), - .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_LLB), + .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_LLB), .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) | (1 << CP0C5_UFE), .CP0_LLAddr_rw_bitmask = 0, diff --git a/target-moxie/translate.c b/target-moxie/translate.c index f84841efe2..6dedcb7a21 100644 --- a/target-moxie/translate.c +++ b/target-moxie/translate.c @@ -848,6 +848,11 @@ void gen_intermediate_code(CPUMoxieState *env, struct TranslationBlock *tb) tcg_gen_movi_i32(cpu_pc, ctx.pc); gen_helper_debug(cpu_env); ctx.bstate = BS_EXCP; + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + ctx.pc += 2; goto done_generating; } diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c index b66fde18fe..606490a47b 100644 --- a/target-openrisc/translate.c +++ b/target-openrisc/translate.c @@ -1665,6 +1665,11 @@ void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb) tcg_gen_movi_tl(cpu_pc, dc->pc); gen_exception(dc, EXCP_DEBUG); dc->is_jmp = DISAS_UPDATE; + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + dc->pc += 4; break; } diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h index 6967a8028b..bc20504b3d 100644 --- a/target-ppc/cpu-qom.h +++ b/target-ppc/cpu-qom.h @@ -118,6 +118,8 @@ void ppc_cpu_dump_state(CPUState *cpu, FILE *f, fprintf_function cpu_fprintf, int flags); void ppc_cpu_dump_statistics(CPUState *cpu, FILE *f, fprintf_function cpu_fprintf, int flags); +int ppc_cpu_get_monitor_def(CPUState *cs, const char *name, + uint64_t *pval); hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); int ppc_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); int ppc_cpu_gdb_read_register_apple(CPUState *cpu, uint8_t *buf, int reg); diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index b34aed6a19..9706000f8b 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -122,9 +122,15 @@ enum powerpc_mmu_t { /* Architecture 2.06 variant */ POWERPC_MMU_2_06 = POWERPC_MMU_64 | POWERPC_MMU_1TSEG | POWERPC_MMU_AMR | 0x00000003, + /* Architecture 2.06 "degraded" (no 1T segments) */ + POWERPC_MMU_2_06a = POWERPC_MMU_64 | POWERPC_MMU_AMR + | 0x00000003, /* Architecture 2.07 variant */ POWERPC_MMU_2_07 = POWERPC_MMU_64 | POWERPC_MMU_1TSEG | POWERPC_MMU_AMR | 0x00000004, + /* Architecture 2.07 "degraded" (no 1T segments) */ + POWERPC_MMU_2_07a = POWERPC_MMU_64 | POWERPC_MMU_AMR + | 0x00000004, #endif /* defined(TARGET_PPC64) */ }; @@ -678,6 +684,27 @@ enum { #define fpscr_eex (((env->fpscr) >> FPSCR_XX) & ((env->fpscr) >> FPSCR_XE) & \ 0x1F) +#define FP_FX (1ull << FPSCR_FX) +#define FP_FEX (1ull << FPSCR_FEX) +#define FP_OX (1ull << FPSCR_OX) +#define FP_OE (1ull << FPSCR_OE) +#define FP_UX (1ull << FPSCR_UX) +#define FP_UE (1ull << FPSCR_UE) +#define FP_XX (1ull << FPSCR_XX) +#define FP_XE (1ull << FPSCR_XE) +#define FP_ZX (1ull << FPSCR_ZX) +#define FP_ZE (1ull << FPSCR_ZE) +#define FP_VX (1ull << FPSCR_VX) +#define FP_VXSNAN (1ull << FPSCR_VXSNAN) +#define FP_VXISI (1ull << FPSCR_VXISI) +#define FP_VXIMZ (1ull << FPSCR_VXIMZ) +#define FP_VXZDZ (1ull << FPSCR_VXZDZ) +#define FP_VXIDI (1ull << FPSCR_VXIDI) +#define FP_VXVC (1ull << FPSCR_VXVC) +#define FP_VXCVI (1ull << FPSCR_VXCVI) +#define FP_VE (1ull << FPSCR_VE) +#define FP_FI (1ull << FPSCR_FI) + /*****************************************************************************/ /* Vector status and control register */ #define VSCR_NJ 16 /* Vector non-java */ diff --git a/target-ppc/dfp_helper.c b/target-ppc/dfp_helper.c index 49820bf21d..451e4340fd 100644 --- a/target-ppc/dfp_helper.c +++ b/target-ppc/dfp_helper.c @@ -170,27 +170,6 @@ static void dfp_prepare_decimal128(struct PPC_DFP *dfp, uint64_t *a, } } -#define FP_FX (1ull << FPSCR_FX) -#define FP_FEX (1ull << FPSCR_FEX) -#define FP_OX (1ull << FPSCR_OX) -#define FP_OE (1ull << FPSCR_OE) -#define FP_UX (1ull << FPSCR_UX) -#define FP_UE (1ull << FPSCR_UE) -#define FP_XX (1ull << FPSCR_XX) -#define FP_XE (1ull << FPSCR_XE) -#define FP_ZX (1ull << FPSCR_ZX) -#define FP_ZE (1ull << FPSCR_ZE) -#define FP_VX (1ull << FPSCR_VX) -#define FP_VXSNAN (1ull << FPSCR_VXSNAN) -#define FP_VXISI (1ull << FPSCR_VXISI) -#define FP_VXIMZ (1ull << FPSCR_VXIMZ) -#define FP_VXZDZ (1ull << FPSCR_VXZDZ) -#define FP_VXIDI (1ull << FPSCR_VXIDI) -#define FP_VXVC (1ull << FPSCR_VXVC) -#define FP_VXCVI (1ull << FPSCR_VXCVI) -#define FP_VE (1ull << FPSCR_VE) -#define FP_FI (1ull << FPSCR_FI) - static void dfp_set_FPSCR_flag(struct PPC_DFP *dfp, uint64_t flag, uint64_t enabled) { diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c index 6cceffc556..9f2d53d747 100644 --- a/target-ppc/fpu_helper.c +++ b/target-ppc/fpu_helper.c @@ -194,7 +194,7 @@ static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op, /* Update the floating-point invalid operation summary */ env->fpscr |= 1 << FPSCR_VX; /* Update the floating-point exception summary */ - env->fpscr |= 1 << FPSCR_FX; + env->fpscr |= FP_FX; if (ve != 0) { /* Update the floating-point enabled exception summary */ env->fpscr |= 1 << FPSCR_FEX; @@ -211,7 +211,7 @@ static inline void float_zero_divide_excp(CPUPPCState *env) env->fpscr |= 1 << FPSCR_ZX; env->fpscr &= ~((1 << FPSCR_FR) | (1 << FPSCR_FI)); /* Update the floating-point exception summary */ - env->fpscr |= 1 << FPSCR_FX; + env->fpscr |= FP_FX; if (fpscr_ze != 0) { /* Update the floating-point enabled exception summary */ env->fpscr |= 1 << FPSCR_FEX; @@ -228,7 +228,7 @@ static inline void float_overflow_excp(CPUPPCState *env) env->fpscr |= 1 << FPSCR_OX; /* Update the floating-point exception summary */ - env->fpscr |= 1 << FPSCR_FX; + env->fpscr |= FP_FX; if (fpscr_oe != 0) { /* XXX: should adjust the result */ /* Update the floating-point enabled exception summary */ @@ -248,7 +248,7 @@ static inline void float_underflow_excp(CPUPPCState *env) env->fpscr |= 1 << FPSCR_UX; /* Update the floating-point exception summary */ - env->fpscr |= 1 << FPSCR_FX; + env->fpscr |= FP_FX; if (fpscr_ue != 0) { /* XXX: should adjust the result */ /* Update the floating-point enabled exception summary */ @@ -265,7 +265,7 @@ static inline void float_inexact_excp(CPUPPCState *env) env->fpscr |= 1 << FPSCR_XX; /* Update the floating-point exception summary */ - env->fpscr |= 1 << FPSCR_FX; + env->fpscr |= FP_FX; if (fpscr_xe != 0) { /* Update the floating-point enabled exception summary */ env->fpscr |= 1 << FPSCR_FEX; @@ -330,31 +330,31 @@ void helper_fpscr_setbit(CPUPPCState *env, uint32_t bit) if (prev == 0) { switch (bit) { case FPSCR_VX: - env->fpscr |= 1 << FPSCR_FX; + env->fpscr |= FP_FX; if (fpscr_ve) { goto raise_ve; } break; case FPSCR_OX: - env->fpscr |= 1 << FPSCR_FX; + env->fpscr |= FP_FX; if (fpscr_oe) { goto raise_oe; } break; case FPSCR_UX: - env->fpscr |= 1 << FPSCR_FX; + env->fpscr |= FP_FX; if (fpscr_ue) { goto raise_ue; } break; case FPSCR_ZX: - env->fpscr |= 1 << FPSCR_FX; + env->fpscr |= FP_FX; if (fpscr_ze) { goto raise_ze; } break; case FPSCR_XX: - env->fpscr |= 1 << FPSCR_FX; + env->fpscr |= FP_FX; if (fpscr_xe) { goto raise_xe; } @@ -369,7 +369,7 @@ void helper_fpscr_setbit(CPUPPCState *env, uint32_t bit) case FPSCR_VXSQRT: case FPSCR_VXCVI: env->fpscr |= 1 << FPSCR_VX; - env->fpscr |= 1 << FPSCR_FX; + env->fpscr |= FP_FX; if (fpscr_ve != 0) { goto raise_ve; } diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h index 309cbe0df1..5e1333d995 100644 --- a/target-ppc/kvm_ppc.h +++ b/target-ppc/kvm_ppc.h @@ -180,7 +180,7 @@ static inline int kvmppc_remove_spapr_tce(void *table, int pfd, static inline int kvmppc_reset_htab(int shift_hint) { - return -1; + return 0; } static inline uint64_t kvmppc_rma_size(uint64_t current_size, diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c index 6d37dae7b0..7e1f234aa5 100644 --- a/target-ppc/mem_helper.c +++ b/target-ppc/mem_helper.c @@ -100,8 +100,9 @@ void helper_lswx(CPUPPCState *env, target_ulong addr, uint32_t reg, uint32_t ra, uint32_t rb) { if (likely(xer_bc != 0)) { - if (unlikely((ra != 0 && reg < ra && (reg + xer_bc) > ra) || - (reg < rb && (reg + xer_bc) > rb))) { + int num_used_regs = (xer_bc + 3) / 4; + if (unlikely((ra != 0 && reg < ra && (reg + num_used_regs) > ra) || + (reg < rb && (reg + num_used_regs) > rb))) { helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM, POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_LSWX); diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c index e52d0e56c2..30298d8d4a 100644 --- a/target-ppc/mmu_helper.c +++ b/target-ppc/mmu_helper.c @@ -1295,7 +1295,9 @@ void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env) case POWERPC_MMU_64B: case POWERPC_MMU_2_03: case POWERPC_MMU_2_06: + case POWERPC_MMU_2_06a: case POWERPC_MMU_2_07: + case POWERPC_MMU_2_07a: dump_slb(f, cpu_fprintf, env); break; #endif @@ -1435,7 +1437,9 @@ hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) case POWERPC_MMU_64B: case POWERPC_MMU_2_03: case POWERPC_MMU_2_06: + case POWERPC_MMU_2_06a: case POWERPC_MMU_2_07: + case POWERPC_MMU_2_07a: return ppc_hash64_get_phys_page_debug(env, addr); #endif @@ -1939,7 +1943,9 @@ void ppc_tlb_invalidate_all(CPUPPCState *env) case POWERPC_MMU_64B: case POWERPC_MMU_2_03: case POWERPC_MMU_2_06: + case POWERPC_MMU_2_06a: case POWERPC_MMU_2_07: + case POWERPC_MMU_2_07a: #endif /* defined(TARGET_PPC64) */ tlb_flush(CPU(cpu), 1); break; @@ -2013,7 +2019,9 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr) case POWERPC_MMU_64B: case POWERPC_MMU_2_03: case POWERPC_MMU_2_06: + case POWERPC_MMU_2_06a: case POWERPC_MMU_2_07: + case POWERPC_MMU_2_07a: /* tlbie invalidate TLBs for all segments */ /* XXX: given the fact that there are too many segments to invalidate, * and we still don't have a tlb_flush_mask(env, n, mask) in QEMU, diff --git a/target-ppc/monitor.c b/target-ppc/monitor.c index 9cb1fe97c9..bc571b87ae 100644 --- a/target-ppc/monitor.c +++ b/target-ppc/monitor.c @@ -39,18 +39,6 @@ static target_long monitor_get_ccr (const struct MonitorDef *md, int val) return u; } -static target_long monitor_get_msr (const struct MonitorDef *md, int val) -{ - CPUArchState *env = mon_get_cpu_env(); - return env->msr; -} - -static target_long monitor_get_xer (const struct MonitorDef *md, int val) -{ - CPUArchState *env = mon_get_cpu_env(); - return env->xer; -} - static target_long monitor_get_decr (const struct MonitorDef *md, int val) { CPUArchState *env = mon_get_cpu_env(); @@ -76,176 +64,19 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict) dump_mmu((FILE*)mon, (fprintf_function)monitor_printf, env1); } - const MonitorDef monitor_defs[] = { - /* General purpose registers */ - { "r0", offsetof(CPUPPCState, gpr[0]) }, - { "r1", offsetof(CPUPPCState, gpr[1]) }, - { "r2", offsetof(CPUPPCState, gpr[2]) }, - { "r3", offsetof(CPUPPCState, gpr[3]) }, - { "r4", offsetof(CPUPPCState, gpr[4]) }, - { "r5", offsetof(CPUPPCState, gpr[5]) }, - { "r6", offsetof(CPUPPCState, gpr[6]) }, - { "r7", offsetof(CPUPPCState, gpr[7]) }, - { "r8", offsetof(CPUPPCState, gpr[8]) }, - { "r9", offsetof(CPUPPCState, gpr[9]) }, - { "r10", offsetof(CPUPPCState, gpr[10]) }, - { "r11", offsetof(CPUPPCState, gpr[11]) }, - { "r12", offsetof(CPUPPCState, gpr[12]) }, - { "r13", offsetof(CPUPPCState, gpr[13]) }, - { "r14", offsetof(CPUPPCState, gpr[14]) }, - { "r15", offsetof(CPUPPCState, gpr[15]) }, - { "r16", offsetof(CPUPPCState, gpr[16]) }, - { "r17", offsetof(CPUPPCState, gpr[17]) }, - { "r18", offsetof(CPUPPCState, gpr[18]) }, - { "r19", offsetof(CPUPPCState, gpr[19]) }, - { "r20", offsetof(CPUPPCState, gpr[20]) }, - { "r21", offsetof(CPUPPCState, gpr[21]) }, - { "r22", offsetof(CPUPPCState, gpr[22]) }, - { "r23", offsetof(CPUPPCState, gpr[23]) }, - { "r24", offsetof(CPUPPCState, gpr[24]) }, - { "r25", offsetof(CPUPPCState, gpr[25]) }, - { "r26", offsetof(CPUPPCState, gpr[26]) }, - { "r27", offsetof(CPUPPCState, gpr[27]) }, - { "r28", offsetof(CPUPPCState, gpr[28]) }, - { "r29", offsetof(CPUPPCState, gpr[29]) }, - { "r30", offsetof(CPUPPCState, gpr[30]) }, - { "r31", offsetof(CPUPPCState, gpr[31]) }, - /* Floating point registers */ - { "f0", offsetof(CPUPPCState, fpr[0]) }, - { "f1", offsetof(CPUPPCState, fpr[1]) }, - { "f2", offsetof(CPUPPCState, fpr[2]) }, - { "f3", offsetof(CPUPPCState, fpr[3]) }, - { "f4", offsetof(CPUPPCState, fpr[4]) }, - { "f5", offsetof(CPUPPCState, fpr[5]) }, - { "f6", offsetof(CPUPPCState, fpr[6]) }, - { "f7", offsetof(CPUPPCState, fpr[7]) }, - { "f8", offsetof(CPUPPCState, fpr[8]) }, - { "f9", offsetof(CPUPPCState, fpr[9]) }, - { "f10", offsetof(CPUPPCState, fpr[10]) }, - { "f11", offsetof(CPUPPCState, fpr[11]) }, - { "f12", offsetof(CPUPPCState, fpr[12]) }, - { "f13", offsetof(CPUPPCState, fpr[13]) }, - { "f14", offsetof(CPUPPCState, fpr[14]) }, - { "f15", offsetof(CPUPPCState, fpr[15]) }, - { "f16", offsetof(CPUPPCState, fpr[16]) }, - { "f17", offsetof(CPUPPCState, fpr[17]) }, - { "f18", offsetof(CPUPPCState, fpr[18]) }, - { "f19", offsetof(CPUPPCState, fpr[19]) }, - { "f20", offsetof(CPUPPCState, fpr[20]) }, - { "f21", offsetof(CPUPPCState, fpr[21]) }, - { "f22", offsetof(CPUPPCState, fpr[22]) }, - { "f23", offsetof(CPUPPCState, fpr[23]) }, - { "f24", offsetof(CPUPPCState, fpr[24]) }, - { "f25", offsetof(CPUPPCState, fpr[25]) }, - { "f26", offsetof(CPUPPCState, fpr[26]) }, - { "f27", offsetof(CPUPPCState, fpr[27]) }, - { "f28", offsetof(CPUPPCState, fpr[28]) }, - { "f29", offsetof(CPUPPCState, fpr[29]) }, - { "f30", offsetof(CPUPPCState, fpr[30]) }, - { "f31", offsetof(CPUPPCState, fpr[31]) }, { "fpscr", offsetof(CPUPPCState, fpscr) }, /* Next instruction pointer */ { "nip|pc", offsetof(CPUPPCState, nip) }, { "lr", offsetof(CPUPPCState, lr) }, { "ctr", offsetof(CPUPPCState, ctr) }, { "decr", 0, &monitor_get_decr, }, - { "ccr", 0, &monitor_get_ccr, }, + { "ccr|cr", 0, &monitor_get_ccr, }, /* Machine state register */ - { "msr", 0, &monitor_get_msr, }, - { "xer", 0, &monitor_get_xer, }, + { "xer", offsetof(CPUPPCState, xer) }, + { "msr", offsetof(CPUPPCState, msr) }, { "tbu", 0, &monitor_get_tbu, }, { "tbl", 0, &monitor_get_tbl, }, - /* Segment registers */ - { "sdr1", offsetof(CPUPPCState, spr[SPR_SDR1]) }, - { "sr0", offsetof(CPUPPCState, sr[0]) }, - { "sr1", offsetof(CPUPPCState, sr[1]) }, - { "sr2", offsetof(CPUPPCState, sr[2]) }, - { "sr3", offsetof(CPUPPCState, sr[3]) }, - { "sr4", offsetof(CPUPPCState, sr[4]) }, - { "sr5", offsetof(CPUPPCState, sr[5]) }, - { "sr6", offsetof(CPUPPCState, sr[6]) }, - { "sr7", offsetof(CPUPPCState, sr[7]) }, - { "sr8", offsetof(CPUPPCState, sr[8]) }, - { "sr9", offsetof(CPUPPCState, sr[9]) }, - { "sr10", offsetof(CPUPPCState, sr[10]) }, - { "sr11", offsetof(CPUPPCState, sr[11]) }, - { "sr12", offsetof(CPUPPCState, sr[12]) }, - { "sr13", offsetof(CPUPPCState, sr[13]) }, - { "sr14", offsetof(CPUPPCState, sr[14]) }, - { "sr15", offsetof(CPUPPCState, sr[15]) }, - /* Too lazy to put BATs... */ - { "pvr", offsetof(CPUPPCState, spr[SPR_PVR]) }, - - { "srr0", offsetof(CPUPPCState, spr[SPR_SRR0]) }, - { "srr1", offsetof(CPUPPCState, spr[SPR_SRR1]) }, - { "dar", offsetof(CPUPPCState, spr[SPR_DAR]) }, - { "dsisr", offsetof(CPUPPCState, spr[SPR_DSISR]) }, - { "cfar", offsetof(CPUPPCState, spr[SPR_CFAR]) }, - { "sprg0", offsetof(CPUPPCState, spr[SPR_SPRG0]) }, - { "sprg1", offsetof(CPUPPCState, spr[SPR_SPRG1]) }, - { "sprg2", offsetof(CPUPPCState, spr[SPR_SPRG2]) }, - { "sprg3", offsetof(CPUPPCState, spr[SPR_SPRG3]) }, - { "sprg4", offsetof(CPUPPCState, spr[SPR_SPRG4]) }, - { "sprg5", offsetof(CPUPPCState, spr[SPR_SPRG5]) }, - { "sprg6", offsetof(CPUPPCState, spr[SPR_SPRG6]) }, - { "sprg7", offsetof(CPUPPCState, spr[SPR_SPRG7]) }, - { "pid", offsetof(CPUPPCState, spr[SPR_BOOKE_PID]) }, - { "csrr0", offsetof(CPUPPCState, spr[SPR_BOOKE_CSRR0]) }, - { "csrr1", offsetof(CPUPPCState, spr[SPR_BOOKE_CSRR1]) }, - { "esr", offsetof(CPUPPCState, spr[SPR_BOOKE_ESR]) }, - { "dear", offsetof(CPUPPCState, spr[SPR_BOOKE_DEAR]) }, - { "mcsr", offsetof(CPUPPCState, spr[SPR_BOOKE_MCSR]) }, - { "tsr", offsetof(CPUPPCState, spr[SPR_BOOKE_TSR]) }, - { "tcr", offsetof(CPUPPCState, spr[SPR_BOOKE_TCR]) }, - { "vrsave", offsetof(CPUPPCState, spr[SPR_VRSAVE]) }, - { "pir", offsetof(CPUPPCState, spr[SPR_BOOKE_PIR]) }, - { "mcsrr0", offsetof(CPUPPCState, spr[SPR_BOOKE_MCSRR0]) }, - { "mcsrr1", offsetof(CPUPPCState, spr[SPR_BOOKE_MCSRR1]) }, - { "decar", offsetof(CPUPPCState, spr[SPR_BOOKE_DECAR]) }, - { "ivpr", offsetof(CPUPPCState, spr[SPR_BOOKE_IVPR]) }, - { "epcr", offsetof(CPUPPCState, spr[SPR_BOOKE_EPCR]) }, - { "sprg8", offsetof(CPUPPCState, spr[SPR_BOOKE_SPRG8]) }, - { "ivor0", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR0]) }, - { "ivor1", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR1]) }, - { "ivor2", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR2]) }, - { "ivor3", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR3]) }, - { "ivor4", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR4]) }, - { "ivor5", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR5]) }, - { "ivor6", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR6]) }, - { "ivor7", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR7]) }, - { "ivor8", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR8]) }, - { "ivor9", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR9]) }, - { "ivor10", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR10]) }, - { "ivor11", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR11]) }, - { "ivor12", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR12]) }, - { "ivor13", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR13]) }, - { "ivor14", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR14]) }, - { "ivor15", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR15]) }, - { "ivor32", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR32]) }, - { "ivor33", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR33]) }, - { "ivor34", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR34]) }, - { "ivor35", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR35]) }, - { "ivor36", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR36]) }, - { "ivor37", offsetof(CPUPPCState, spr[SPR_BOOKE_IVOR37]) }, - { "mas0", offsetof(CPUPPCState, spr[SPR_BOOKE_MAS0]) }, - { "mas1", offsetof(CPUPPCState, spr[SPR_BOOKE_MAS1]) }, - { "mas2", offsetof(CPUPPCState, spr[SPR_BOOKE_MAS2]) }, - { "mas3", offsetof(CPUPPCState, spr[SPR_BOOKE_MAS3]) }, - { "mas4", offsetof(CPUPPCState, spr[SPR_BOOKE_MAS4]) }, - { "mas6", offsetof(CPUPPCState, spr[SPR_BOOKE_MAS6]) }, - { "mas7", offsetof(CPUPPCState, spr[SPR_BOOKE_MAS7]) }, - { "mmucfg", offsetof(CPUPPCState, spr[SPR_MMUCFG]) }, - { "tlb0cfg", offsetof(CPUPPCState, spr[SPR_BOOKE_TLB0CFG]) }, - { "tlb1cfg", offsetof(CPUPPCState, spr[SPR_BOOKE_TLB1CFG]) }, - { "epr", offsetof(CPUPPCState, spr[SPR_BOOKE_EPR]) }, - { "eplc", offsetof(CPUPPCState, spr[SPR_BOOKE_EPLC]) }, - { "epsc", offsetof(CPUPPCState, spr[SPR_BOOKE_EPSC]) }, - { "svr", offsetof(CPUPPCState, spr[SPR_E500_SVR]) }, - { "mcar", offsetof(CPUPPCState, spr[SPR_Exxx_MCAR]) }, - { "pid1", offsetof(CPUPPCState, spr[SPR_BOOKE_PID1]) }, - { "pid2", offsetof(CPUPPCState, spr[SPR_BOOKE_PID2]) }, - { "hid0", offsetof(CPUPPCState, spr[SPR_HID0]) }, { NULL }, }; @@ -253,3 +84,63 @@ const MonitorDef *target_monitor_defs(void) { return monitor_defs; } + +static int ppc_cpu_get_reg_num(const char *numstr, int maxnum, int *pregnum) +{ + int regnum; + char *endptr = NULL; + + if (!*numstr) { + return false; + } + + regnum = strtoul(numstr, &endptr, 10); + if (*endptr || (regnum >= maxnum)) { + return false; + } + *pregnum = regnum; + + return true; +} + +int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval) +{ + int i, regnum; + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + + /* General purpose registers */ + if ((tolower(name[0]) == 'r') && + ppc_cpu_get_reg_num(name + 1, ARRAY_SIZE(env->gpr), ®num)) { + *pval = env->gpr[regnum]; + return 0; + } + + /* Floating point registers */ + if ((tolower(name[0]) == 'f') && + ppc_cpu_get_reg_num(name + 1, ARRAY_SIZE(env->fpr), ®num)) { + *pval = env->fpr[regnum]; + return 0; + } + + /* Special purpose registers */ + for (i = 0; i < ARRAY_SIZE(env->spr_cb); ++i) { + ppc_spr_t *spr = &env->spr_cb[i]; + + if (spr->name && (strcasecmp(name, spr->name) == 0)) { + *pval = env->spr[i]; + return 0; + } + } + + /* Segment registers */ +#if !defined(CONFIG_USER_ONLY) + if ((strncasecmp(name, "sr", 2) == 0) && + ppc_cpu_get_reg_num(name + 2, ARRAY_SIZE(env->sr), ®num)) { + *pval = env->sr[regnum]; + return 0; + } +#endif + + return -EINVAL; +} diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 453509a425..41a7258486 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -9884,7 +9884,7 @@ GEN_HANDLER(mtcrf, 0x1F, 0x10, 0x04, 0x00000801, PPC_MISC), GEN_HANDLER(mtmsrd, 0x1F, 0x12, 0x05, 0x001EF801, PPC_64B), #endif GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001FF801, PPC_MISC), -GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x00000001, PPC_MISC), +GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x00000000, PPC_MISC), GEN_HANDLER(dcbf, 0x1F, 0x16, 0x02, 0x03C00001, PPC_CACHE), GEN_HANDLER(dcbi, 0x1F, 0x16, 0x0E, 0x03E00001, PPC_CACHE), GEN_HANDLER(dcbst, 0x1F, 0x16, 0x01, 0x03E00001, PPC_CACHE), @@ -11488,6 +11488,11 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) if (unlikely(cpu_breakpoint_test(cs, ctx.nip, BP_ANY))) { gen_debug_exception(ctxp); + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + ctx.nip += 4; break; } diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 4934c80b8f..e88dc7fc7a 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -305,7 +305,7 @@ static void spr_read_ibat (DisasContext *ctx, int gprn, int sprn) static void spr_read_ibat_h (DisasContext *ctx, int gprn, int sprn) { - tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env, offsetof(CPUPPCState, IBAT[sprn & 1][(sprn - SPR_IBAT4U) / 2])); + tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env, offsetof(CPUPPCState, IBAT[sprn & 1][((sprn - SPR_IBAT4U) / 2) + 4])); } static void spr_write_ibatu (DisasContext *ctx, int sprn, int gprn) diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index c3be180de2..75a0e5d1c3 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -258,7 +258,9 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); - kvm_s390_enable_cmma(s); + if (!mem_path) { + kvm_s390_enable_cmma(s); + } if (!kvm_check_extension(s, KVM_CAP_S390_GMAP) || !kvm_check_extension(s, KVM_CAP_S390_COW)) { diff --git a/target-s390x/translate.c b/target-s390x/translate.c index 05d51fe84a..c79a2cb57a 100644 --- a/target-s390x/translate.c +++ b/target-s390x/translate.c @@ -5360,6 +5360,11 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb) if (unlikely(cpu_breakpoint_test(cs, dc.pc, BP_ANY))) { status = EXIT_PC_STALE; do_debug = true; + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + dc.pc += 2; break; } diff --git a/target-sh4/translate.c b/target-sh4/translate.c index f764bc2539..7bc621649a 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -1855,6 +1855,11 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) tcg_gen_movi_i32(cpu_pc, ctx.pc); gen_helper_debug(cpu_env); ctx.bstate = BS_BRANCH; + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + ctx.pc += 2; break; } diff --git a/target-sparc/translate.c b/target-sparc/translate.c index b59742ad2e..41a33193d8 100644 --- a/target-sparc/translate.c +++ b/target-sparc/translate.c @@ -5247,6 +5247,7 @@ void gen_intermediate_code(CPUSPARCState * env, TranslationBlock * tb) tcg_gen_insn_start(dc->pc, dc->npc); } num_insns++; + last_pc = dc->pc; if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) { if (dc->pc != pc_start) { @@ -5262,7 +5263,6 @@ void gen_intermediate_code(CPUSPARCState * env, TranslationBlock * tb) gen_io_start(); } - last_pc = dc->pc; insn = cpu_ldl_code(env, dc->pc); disas_sparc_insn(dc, insn); diff --git a/target-sparc/vis_helper.c b/target-sparc/vis_helper.c index 383cc8bdff..45fc7dbe81 100644 --- a/target-sparc/vis_helper.c +++ b/target-sparc/vis_helper.c @@ -447,7 +447,7 @@ uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) for (word = 0; word < 2; word++) { uint32_t val; int32_t src = rs2 >> (word * 32); - int64_t scaled = src << scale; + int64_t scaled = (int64_t)src << scale; int64_t from_fixed = scaled >> 16; val = (from_fixed < -32768 ? -32768 : diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c index 34d45f87b9..354f25a52b 100644 --- a/target-tilegx/translate.c +++ b/target-tilegx/translate.c @@ -2105,38 +2105,44 @@ static TileExcp decode_y2(DisasContext *dc, tilegx_bundle_bits bundle) unsigned srcbdest = get_SrcBDest_Y2(bundle); const char *mnemonic; TCGMemOp memop; + bool prefetch_nofault = false; switch (OEY2(opc, mode)) { case OEY2(LD1S_OPCODE_Y2, MODE_OPCODE_YA2): memop = MO_SB; - mnemonic = "ld1s"; + mnemonic = "ld1s"; /* prefetch_l1_fault */ goto do_load; case OEY2(LD1U_OPCODE_Y2, MODE_OPCODE_YA2): memop = MO_UB; - mnemonic = "ld1u"; + mnemonic = "ld1u"; /* prefetch, prefetch_l1 */ + prefetch_nofault = (srcbdest == TILEGX_R_ZERO); goto do_load; case OEY2(LD2S_OPCODE_Y2, MODE_OPCODE_YA2): memop = MO_TESW; - mnemonic = "ld2s"; + mnemonic = "ld2s"; /* prefetch_l2_fault */ goto do_load; case OEY2(LD2U_OPCODE_Y2, MODE_OPCODE_YA2): memop = MO_TEUW; - mnemonic = "ld2u"; + mnemonic = "ld2u"; /* prefetch_l2 */ + prefetch_nofault = (srcbdest == TILEGX_R_ZERO); goto do_load; case OEY2(LD4S_OPCODE_Y2, MODE_OPCODE_YB2): memop = MO_TESL; - mnemonic = "ld4s"; + mnemonic = "ld4s"; /* prefetch_l3_fault */ goto do_load; case OEY2(LD4U_OPCODE_Y2, MODE_OPCODE_YB2): memop = MO_TEUL; - mnemonic = "ld4u"; + mnemonic = "ld4u"; /* prefetch_l3 */ + prefetch_nofault = (srcbdest == TILEGX_R_ZERO); goto do_load; case OEY2(LD_OPCODE_Y2, MODE_OPCODE_YB2): memop = MO_TEQ; mnemonic = "ld"; do_load: - tcg_gen_qemu_ld_tl(dest_gr(dc, srcbdest), load_gr(dc, srca), - dc->mmuidx, memop); + if (!prefetch_nofault) { + tcg_gen_qemu_ld_tl(dest_gr(dc, srcbdest), load_gr(dc, srca), + dc->mmuidx, memop); + } qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s", mnemonic, reg_names[srcbdest], reg_names[srca]); return TILEGX_EXCP_NONE; diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c index 48f89fb1c5..d2f92f02fc 100644 --- a/target-unicore32/translate.c +++ b/target-unicore32/translate.c @@ -1917,9 +1917,11 @@ void gen_intermediate_code(CPUUniCore32State *env, TranslationBlock *tb) gen_set_pc_im(dc->pc); gen_exception(EXCP_DEBUG); dc->is_jmp = DISAS_JUMP; - /* Advance PC so that clearing the breakpoint will - invalidate this TB. */ - dc->pc += 2; /* FIXME */ + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + dc->pc += 4; goto done_generating; } diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c index aa0c527dc4..06b0163412 100644 --- a/target-xtensa/translate.c +++ b/target-xtensa/translate.c @@ -3088,6 +3088,11 @@ void gen_intermediate_code(CPUXtensaState *env, TranslationBlock *tb) tcg_gen_movi_i32(cpu_pc, dc.pc); gen_exception(&dc, EXCP_DEBUG); dc.is_jmp = DISAS_UPDATE; + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + dc.pc += 2; break; } @@ -388,7 +388,11 @@ void tcg_prologue_init(TCGContext *s) /* Compute a high-water mark, at which we voluntarily flush the buffer and start over. The size here is arbitrary, significantly larger than we expect the code generation for any one opcode to require. */ - s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024); + /* ??? We currently have no good estimate for, or checks in, + tcg_out_tb_finalize. If there are quite a lot of guest memory ops, + the number of out-of-line fragments could be quite high. In the + short-term, increase the highwater buffer. */ + s->code_gen_highwater = s->code_gen_buffer + (total_size - 64*1024); tcg_register_jit(s->code_gen_buffer, total_size); @@ -2443,7 +2447,7 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf) one operation beginning below the high water mark cannot overrun the buffer completely. Thus we can test for overflow after generating code without having to check during generation. */ - if (unlikely(s->code_gen_ptr > s->code_gen_highwater)) { + if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { return -1; } } diff --git a/tests/.gitignore b/tests/.gitignore index 65496aa5a6..1e55722b6a 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -9,6 +9,7 @@ check-qom-proplist rcutorture test-aio test-bitops +test-blockjob-txn test-coroutine test-crypto-cipher test-crypto-hash @@ -30,6 +31,7 @@ test-qapi-types.[ch] test-qapi-visit.[ch] test-qdev-global-props test-qemu-opts +test-qga test-qmp-commands test-qmp-commands.h test-qmp-event @@ -44,6 +46,7 @@ test-string-input-visitor test-string-output-visitor test-thread-pool test-throttle +test-timed-average test-visitor-serialization test-vmstate test-write-threshold diff --git a/tests/Makefile b/tests/Makefile index 1c57e39c53..a1d03b457d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -47,6 +47,8 @@ check-unit-y += tests/test-thread-pool$(EXESUF) gcov-files-test-thread-pool-y = thread-pool.c gcov-files-test-hbitmap-y = util/hbitmap.c check-unit-y += tests/test-hbitmap$(EXESUF) +gcov-files-test-hbitmap-y = blockjob.c +check-unit-y += tests/test-blockjob-txn$(EXESUF) check-unit-y += tests/test-x86-cpuid$(EXESUF) # all code tested by test-x86-cpuid is inside topology.h gcov-files-test-x86-cpuid-y = @@ -81,6 +83,7 @@ check-unit-y += tests/test-crypto-cipher$(EXESUF) check-unit-$(CONFIG_GNUTLS) += tests/test-crypto-tlscredsx509$(EXESUF) check-unit-$(CONFIG_GNUTLS) += tests/test-crypto-tlssession$(EXESUF) check-unit-$(CONFIG_LINUX) += tests/test-qga$(EXESUF) +check-unit-y += tests/test-timed-average$(EXESUF) check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh @@ -194,8 +197,9 @@ gcov-files-i386-y += hw/usb/hcd-xhci.c check-qtest-i386-y += tests/pc-cpu-test$(EXESUF) check-qtest-i386-y += tests/q35-test$(EXESUF) gcov-files-i386-y += hw/pci-host/q35.c -ifeq ($(CONFIG_VHOST_NET),y) -check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF) +check-qtest-i386-$(CONFIG_VHOST_NET_TEST_i386) += tests/vhost-user-test$(EXESUF) +ifeq ($(CONFIG_VHOST_NET_TEST_i386),) +check-qtest-x86_64-$(CONFIG_VHOST_NET_TEST_x86_64) += tests/vhost-user-test$(EXESUF) endif check-qtest-i386-y += tests/test-netfilter$(EXESUF) check-qtest-x86_64-y = $(check-qtest-i386-y) @@ -265,7 +269,6 @@ qapi-schema += enum-dict-member.json qapi-schema += enum-int-member.json qapi-schema += enum-max-member.json qapi-schema += enum-missing-data.json -qapi-schema += enum-union-clash.json qapi-schema += enum-wrong-data.json qapi-schema += escape-outside-string.json qapi-schema += escape-too-big.json @@ -316,12 +319,17 @@ qapi-schema += redefined-builtin.json qapi-schema += redefined-command.json qapi-schema += redefined-event.json qapi-schema += redefined-type.json +qapi-schema += reserved-command-q.json +qapi-schema += reserved-member-has.json +qapi-schema += reserved-member-q.json +qapi-schema += reserved-member-u.json +qapi-schema += reserved-type-kind.json +qapi-schema += reserved-type-list.json qapi-schema += returns-alternate.json qapi-schema += returns-array-bad.json qapi-schema += returns-dict.json qapi-schema += returns-unknown.json qapi-schema += returns-whitelist.json -qapi-schema += struct-base-clash-base.json qapi-schema += struct-base-clash-deep.json qapi-schema += struct-base-clash.json qapi-schema += struct-data-invalid.json @@ -386,6 +394,7 @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y) tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y) tests/test-rfifolock$(EXESUF): tests/test-rfifolock.o $(test-util-obj-y) tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y) +tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y) tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y) tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y) tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y) @@ -405,6 +414,8 @@ tests/test-vmstate$(EXESUF): tests/test-vmstate.o \ migration/vmstate.o migration/qemu-file.o migration/qemu-file-buf.o \ migration/qemu-file-unix.o qjson.o \ $(test-qom-obj-y) +tests/test-timed-average$(EXESUF): tests/test-timed-average.o qemu-timer.o \ + $(test-util-obj-y) tests/test-qapi-types.c tests/test-qapi-types.h :\ $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py) @@ -525,6 +536,7 @@ tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y) tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y) tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) +tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o ifeq ($(CONFIG_POSIX),y) LIBS += -lutil @@ -600,7 +612,7 @@ $(patsubst %, check-%, $(check-unit-y)): check-%: % $(patsubst %, check-report-qtest-%.xml, $(QTEST_TARGETS)): check-report-qtest-%.xml: $(check-qtest-y) $(call quiet-command,QTEST_QEMU_BINARY=$*-softmmu/qemu-system-$* \ QTEST_QEMU_IMG=qemu-img$(EXESUF) \ - gtester -q $(GTESTER_OPTIONS) -o $@ -m=$(SPEED) $(check-qtest-$*-y),"GTESTER $@") + gtester -q $(GTESTER_OPTIONS) -o $@ -m=$(SPEED) $(check-qtest-$*-y) $(check-qtest-generic-y),"GTESTER $@") check-report-unit.xml: $(check-unit-y) $(call quiet-command,gtester -q $(GTESTER_OPTIONS) -o $@ -m=$(SPEED) $^, "GTESTER $@") @@ -648,7 +660,7 @@ check: check-qapi-schema check-unit check-qtest check-clean: $(MAKE) -C tests/tcg clean rm -rf $(check-unit-y) tests/*.o $(QEMU_IOTESTS_HELPERS-y) - rm -rf $(sort $(foreach target,$(SYSEMU_TARGET_LIST), $(check-qtest-$(target)-y))) + rm -rf $(sort $(foreach target,$(SYSEMU_TARGET_LIST), $(check-qtest-$(target)-y)) $(check-qtest-generic-y)) clean: check-clean diff --git a/tests/ahci-test.c b/tests/ahci-test.c index 59d387c6d0..088850642e 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -39,15 +39,17 @@ #include "hw/pci/pci_ids.h" #include "hw/pci/pci_regs.h" -/* Test-specific defines -- in MiB */ -#define TEST_IMAGE_SIZE_MB (200 * 1024) -#define TEST_IMAGE_SECTORS ((TEST_IMAGE_SIZE_MB / AHCI_SECTOR_SIZE) \ - * 1024 * 1024) +/* Test images sizes in MB */ +#define TEST_IMAGE_SIZE_MB_LARGE (200 * 1024) +#define TEST_IMAGE_SIZE_MB_SMALL 64 /*** Globals ***/ static char tmp_path[] = "/tmp/qtest.XXXXXX"; static char debug_path[] = "/tmp/qtest-blkdebug.XXXXXX"; +static char mig_socket[] = "/tmp/qtest-migration.XXXXXX"; static bool ahci_pedantic; +static const char *imgfmt; +static unsigned test_image_size_mb; /*** Function Declarations ***/ static void ahci_test_port_spec(AHCIQState *ahci, uint8_t port); @@ -60,6 +62,11 @@ static void ahci_test_pmcap(AHCIQState *ahci, uint8_t offset); /*** Utilities ***/ +static uint64_t mb_to_sectors(uint64_t image_size_mb) +{ + return (image_size_mb * 1024 * 1024) / AHCI_SECTOR_SIZE; +} + static void string_bswap16(uint16_t *s, size_t bytes) { g_assert_cmphex((bytes & 1), ==, 0); @@ -114,8 +121,11 @@ static void ahci_migrate(AHCIQState *from, AHCIQState *to, const char *uri) { QOSState *tmp = to->parent; QPCIDevice *dev = to->dev; + char *uri_local = NULL; + if (uri == NULL) { - uri = "tcp:127.0.0.1:1234"; + uri_local = g_strdup_printf("%s%s", "unix:", mig_socket); + uri = uri_local; } /* context will be 'to' after completion. */ @@ -135,6 +145,7 @@ static void ahci_migrate(AHCIQState *from, AHCIQState *to, const char *uri) from->dev = dev; verify_state(to); + g_free(uri_local); } /*** Test Setup & Teardown ***/ @@ -170,11 +181,11 @@ static AHCIQState *ahci_boot(const char *cli, ...) va_end(ap); } else { cli = "-drive if=none,id=drive0,file=%s,cache=writeback,serial=%s" - ",format=qcow2" + ",format=%s" " -M q35 " "-device ide-hd,drive=drive0 " "-global ide-hd.ver=%s"; - s = ahci_boot(cli, tmp_path, "testdisk", "version"); + s = ahci_boot(cli, tmp_path, "testdisk", imgfmt, "version"); } return s; @@ -900,7 +911,7 @@ static void ahci_test_max(AHCIQState *ahci) uint64_t nsect; uint8_t port; uint8_t cmd; - uint64_t config_sect = TEST_IMAGE_SECTORS - 1; + uint64_t config_sect = mb_to_sectors(test_image_size_mb) - 1; if (config_sect > 0xFFFFFF) { cmd = CMD_READ_MAX_EXT; @@ -1073,12 +1084,12 @@ static void test_flush_retry(void) prepare_blkdebug_script(debug_path, "flush_to_disk"); ahci = ahci_boot_and_enable("-drive file=blkdebug:%s:%s,if=none,id=drive0," - "format=qcow2,cache=writeback," + "format=%s,cache=writeback," "rerror=stop,werror=stop " "-M q35 " "-device ide-hd,drive=drive0 ", debug_path, - tmp_path); + tmp_path, imgfmt); /* Issue Flush Command and wait for error */ port = ahci_port_select(ahci); @@ -1105,18 +1116,19 @@ static void test_flush_retry(void) static void test_migrate_sanity(void) { AHCIQState *src, *dst; - const char *uri = "tcp:127.0.0.1:1234"; + char *uri = g_strdup_printf("unix:%s", mig_socket); src = ahci_boot("-m 1024 -M q35 " - "-hda %s ", tmp_path); + "-drive if=ide,file=%s,format=%s ", tmp_path, imgfmt); dst = ahci_boot("-m 1024 -M q35 " - "-hda %s " - "-incoming %s", tmp_path, uri); + "-drive if=ide,file=%s,format=%s " + "-incoming %s", tmp_path, imgfmt, uri); ahci_migrate(src, dst, uri); ahci_shutdown(src); ahci_shutdown(dst); + g_free(uri); } /** @@ -1129,13 +1141,14 @@ static void ahci_migrate_simple(uint8_t cmd_read, uint8_t cmd_write) size_t bufsize = 4096; unsigned char *tx = g_malloc(bufsize); unsigned char *rx = g_malloc0(bufsize); - const char *uri = "tcp:127.0.0.1:1234"; + char *uri = g_strdup_printf("unix:%s", mig_socket); src = ahci_boot_and_enable("-m 1024 -M q35 " - "-hda %s ", tmp_path); + "-drive if=ide,format=%s,file=%s ", + imgfmt, tmp_path); dst = ahci_boot("-m 1024 -M q35 " - "-hda %s " - "-incoming %s", tmp_path, uri); + "-drive if=ide,format=%s,file=%s " + "-incoming %s", imgfmt, tmp_path, uri); set_context(src->parent); @@ -1158,6 +1171,7 @@ static void ahci_migrate_simple(uint8_t cmd_read, uint8_t cmd_write) ahci_shutdown(dst); g_free(rx); g_free(tx); + g_free(uri); } static void test_migrate_dma(void) @@ -1190,12 +1204,12 @@ static void ahci_halted_io_test(uint8_t cmd_read, uint8_t cmd_write) prepare_blkdebug_script(debug_path, "write_aio"); ahci = ahci_boot_and_enable("-drive file=blkdebug:%s:%s,if=none,id=drive0," - "format=qcow2,cache=writeback," + "format=%s,cache=writeback," "rerror=stop,werror=stop " "-M q35 " "-device ide-hd,drive=drive0 ", debug_path, - tmp_path); + tmp_path, imgfmt); /* Initialize and prepare */ port = ahci_port_select(ahci); @@ -1251,25 +1265,25 @@ static void ahci_migrate_halted_io(uint8_t cmd_read, uint8_t cmd_write) unsigned char *rx = g_malloc0(bufsize); uint64_t ptr; AHCICommand *cmd; - const char *uri = "tcp:127.0.0.1:1234"; + char *uri = g_strdup_printf("unix:%s", mig_socket); prepare_blkdebug_script(debug_path, "write_aio"); src = ahci_boot_and_enable("-drive file=blkdebug:%s:%s,if=none,id=drive0," - "format=qcow2,cache=writeback," + "format=%s,cache=writeback," "rerror=stop,werror=stop " "-M q35 " "-device ide-hd,drive=drive0 ", debug_path, - tmp_path); + tmp_path, imgfmt); dst = ahci_boot("-drive file=%s,if=none,id=drive0," - "format=qcow2,cache=writeback," + "format=%s,cache=writeback," "rerror=stop,werror=stop " "-M q35 " "-device ide-hd,drive=drive0 " "-incoming %s", - tmp_path, uri); + tmp_path, imgfmt, uri); set_context(src->parent); @@ -1301,6 +1315,7 @@ static void ahci_migrate_halted_io(uint8_t cmd_read, uint8_t cmd_write) ahci_shutdown(dst); g_free(rx); g_free(tx); + g_free(uri); } static void test_migrate_halted_dma(void) @@ -1322,20 +1337,22 @@ static void test_flush_migrate(void) AHCICommand *cmd; uint8_t px; const char *s; - const char *uri = "tcp:127.0.0.1:1234"; + char *uri = g_strdup_printf("unix:%s", mig_socket); prepare_blkdebug_script(debug_path, "flush_to_disk"); src = ahci_boot_and_enable("-drive file=blkdebug:%s:%s,if=none,id=drive0," - "cache=writeback,rerror=stop,werror=stop " + "cache=writeback,rerror=stop,werror=stop," + "format=%s " "-M q35 " "-device ide-hd,drive=drive0 ", - debug_path, tmp_path); + debug_path, tmp_path, imgfmt); dst = ahci_boot("-drive file=%s,if=none,id=drive0," - "cache=writeback,rerror=stop,werror=stop " + "cache=writeback,rerror=stop,werror=stop," + "format=%s " "-M q35 " "-device ide-hd,drive=drive0 " - "-incoming %s", tmp_path, uri); + "-incoming %s", tmp_path, imgfmt, uri); set_context(src->parent); @@ -1360,6 +1377,7 @@ static void test_flush_migrate(void) ahci_command_free(cmd); ahci_shutdown(src); ahci_shutdown(dst); + g_free(uri); } static void test_max(void) @@ -1476,7 +1494,7 @@ static uint64_t offset_sector(enum OffsetType ofst, return 1; case OFFSET_HIGH: ceil = (addr_type == ADDR_MODE_LBA28) ? 0xfffffff : 0xffffffffffff; - ceil = MIN(ceil, TEST_IMAGE_SECTORS - 1); + ceil = MIN(ceil, mb_to_sectors(test_image_size_mb) - 1); nsectors = buffsize / AHCI_SECTOR_SIZE; return ceil - nsectors + 1; default: @@ -1558,8 +1576,9 @@ static void create_ahci_io_test(enum IOMode type, enum AddrMode addr, enum BuffLen len, enum OffsetType offset) { char *name; - AHCIIOTestOptions *opts = g_malloc(sizeof(AHCIIOTestOptions)); + AHCIIOTestOptions *opts; + opts = g_malloc(sizeof(AHCIIOTestOptions)); opts->length = len; opts->address_type = addr; opts->io_type = type; @@ -1571,6 +1590,13 @@ static void create_ahci_io_test(enum IOMode type, enum AddrMode addr, buff_len_str[len], offset_str[offset]); + if ((addr == ADDR_MODE_LBA48) && (offset == OFFSET_HIGH) && + (mb_to_sectors(test_image_size_mb) <= 0xFFFFFFF)) { + g_test_message("%s: skipped; test image too small", name); + g_free(name); + return; + } + qtest_add_data_func(name, opts, test_io_interface); g_free(name); } @@ -1617,15 +1643,33 @@ int main(int argc, char **argv) return 0; } - /* Create a temporary qcow2 image */ - close(mkstemp(tmp_path)); - mkqcow2(tmp_path, TEST_IMAGE_SIZE_MB); + /* Create a temporary image */ + fd = mkstemp(tmp_path); + g_assert(fd >= 0); + if (have_qemu_img()) { + imgfmt = "qcow2"; + test_image_size_mb = TEST_IMAGE_SIZE_MB_LARGE; + mkqcow2(tmp_path, TEST_IMAGE_SIZE_MB_LARGE); + } else { + g_test_message("QTEST_QEMU_IMG not set or qemu-img missing; " + "skipping LBA48 high-sector tests"); + imgfmt = "raw"; + test_image_size_mb = TEST_IMAGE_SIZE_MB_SMALL; + ret = ftruncate(fd, test_image_size_mb * 1024 * 1024); + g_assert(ret == 0); + } + close(fd); /* Create temporary blkdebug instructions */ fd = mkstemp(debug_path); g_assert(fd >= 0); close(fd); + /* Reserve a hollow file to use as a socket for migration tests */ + fd = mkstemp(mig_socket); + g_assert(fd >= 0); + close(fd); + /* Run the tests */ qtest_add_func("/ahci/sanity", test_sanity); qtest_add_func("/ahci/pci_spec", test_pci_spec); @@ -1668,6 +1712,7 @@ int main(int argc, char **argv) /* Cleanup */ unlink(tmp_path); unlink(debug_path); + unlink(mig_socket); return ret; } diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 1cfffa5934..61e9bfbabf 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -1484,6 +1484,30 @@ static void unterminated_literal(void) g_assert(obj == NULL); } +static char *make_nest(char *buf, size_t cnt) +{ + memset(buf, '[', cnt - 1); + buf[cnt - 1] = '{'; + buf[cnt] = '}'; + memset(buf + cnt + 1, ']', cnt - 1); + buf[2 * cnt] = 0; + return buf; +} + +static void limits_nesting(void) +{ + enum { max_nesting = 1024 }; /* see qobject/json-streamer.c */ + char buf[2 * (max_nesting + 1) + 1]; + QObject *obj; + + obj = qobject_from_json(make_nest(buf, max_nesting)); + g_assert(obj != NULL); + qobject_decref(obj); + + obj = qobject_from_json(make_nest(buf, max_nesting + 1)); + g_assert(obj == NULL); +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); @@ -1519,6 +1543,7 @@ int main(int argc, char **argv) g_test_add_func("/errors/invalid_array_comma", invalid_array_comma); g_test_add_func("/errors/invalid_dict_comma", invalid_dict_comma); g_test_add_func("/errors/unterminated/literal", unterminated_literal); + g_test_add_func("/errors/limits/nesting", limits_nesting); return g_test_run(); } diff --git a/tests/check-qom-proplist.c b/tests/check-qom-proplist.c index 7400b1fce9..e674c0fa89 100644 --- a/tests/check-qom-proplist.c +++ b/tests/check-qom-proplist.c @@ -152,6 +152,148 @@ static const TypeInfo dummy_info = { .class_size = sizeof(DummyObjectClass), }; + +/* + * The following 3 object classes are used to + * simulate the kind of relationships seen in + * qdev, which result in complex object + * property destruction ordering. + * + * DummyDev has a 'bus' child to a DummyBus + * DummyBus has a 'backend' child to a DummyBackend + * DummyDev has a 'backend' link to DummyBackend + * + * When DummyDev is finalized, it unparents the + * DummyBackend, which unparents the DummyDev + * which deletes the 'backend' link from DummyDev + * to DummyBackend. This illustrates that the + * object_property_del_all() method needs to + * cope with the list of properties being changed + * while it iterates over them. + */ +typedef struct DummyDev DummyDev; +typedef struct DummyDevClass DummyDevClass; +typedef struct DummyBus DummyBus; +typedef struct DummyBusClass DummyBusClass; +typedef struct DummyBackend DummyBackend; +typedef struct DummyBackendClass DummyBackendClass; + +#define TYPE_DUMMY_DEV "qemu-dummy-dev" +#define TYPE_DUMMY_BUS "qemu-dummy-bus" +#define TYPE_DUMMY_BACKEND "qemu-dummy-backend" + +#define DUMMY_DEV(obj) \ + OBJECT_CHECK(DummyDev, (obj), TYPE_DUMMY_DEV) +#define DUMMY_BUS(obj) \ + OBJECT_CHECK(DummyBus, (obj), TYPE_DUMMY_BUS) +#define DUMMY_BACKEND(obj) \ + OBJECT_CHECK(DummyBackend, (obj), TYPE_DUMMY_BACKEND) + +struct DummyDev { + Object parent_obj; + + DummyBus *bus; +}; + +struct DummyDevClass { + ObjectClass parent_class; +}; + +struct DummyBus { + Object parent_obj; + + DummyBackend *backend; +}; + +struct DummyBusClass { + ObjectClass parent_class; +}; + +struct DummyBackend { + Object parent_obj; +}; + +struct DummyBackendClass { + ObjectClass parent_class; +}; + + +static void dummy_dev_init(Object *obj) +{ + DummyDev *dev = DUMMY_DEV(obj); + DummyBus *bus = DUMMY_BUS(object_new(TYPE_DUMMY_BUS)); + DummyBackend *backend = DUMMY_BACKEND(object_new(TYPE_DUMMY_BACKEND)); + + object_property_add_child(obj, "bus", OBJECT(bus), NULL); + dev->bus = bus; + object_property_add_child(OBJECT(bus), "backend", OBJECT(backend), NULL); + bus->backend = backend; + + object_property_add_link(obj, "backend", TYPE_DUMMY_BACKEND, + (Object **)&bus->backend, NULL, 0, NULL); +} + +static void dummy_dev_unparent(Object *obj) +{ + DummyDev *dev = DUMMY_DEV(obj); + object_unparent(OBJECT(dev->bus)); +} + +static void dummy_dev_class_init(ObjectClass *klass, void *opaque) +{ + klass->unparent = dummy_dev_unparent; +} + + +static void dummy_bus_init(Object *obj) +{ +} + +static void dummy_bus_unparent(Object *obj) +{ + DummyBus *bus = DUMMY_BUS(obj); + object_property_del(obj->parent, "backend", NULL); + object_unparent(OBJECT(bus->backend)); +} + +static void dummy_bus_class_init(ObjectClass *klass, void *opaque) +{ + klass->unparent = dummy_bus_unparent; +} + +static void dummy_backend_init(Object *obj) +{ +} + + +static const TypeInfo dummy_dev_info = { + .name = TYPE_DUMMY_DEV, + .parent = TYPE_OBJECT, + .instance_size = sizeof(DummyDev), + .instance_init = dummy_dev_init, + .class_size = sizeof(DummyDevClass), + .class_init = dummy_dev_class_init, +}; + +static const TypeInfo dummy_bus_info = { + .name = TYPE_DUMMY_BUS, + .parent = TYPE_OBJECT, + .instance_size = sizeof(DummyBus), + .instance_init = dummy_bus_init, + .class_size = sizeof(DummyBusClass), + .class_init = dummy_bus_class_init, +}; + +static const TypeInfo dummy_backend_info = { + .name = TYPE_DUMMY_BACKEND, + .parent = TYPE_OBJECT, + .instance_size = sizeof(DummyBackend), + .instance_init = dummy_backend_init, + .class_size = sizeof(DummyBackendClass), +}; + + + static void test_dummy_createv(void) { Error *err = NULL; @@ -283,20 +425,83 @@ static void test_dummy_getenum(void) &err); g_assert(err != NULL); error_free(err); + + object_unparent(OBJECT(dobj)); } +static void test_dummy_iterator(void) +{ + Object *parent = object_get_objects_root(); + DummyObject *dobj = DUMMY_OBJECT( + object_new_with_props(TYPE_DUMMY, + parent, + "dummy0", + &error_abort, + "bv", "yes", + "sv", "Hiss hiss hiss", + "av", "platypus", + NULL)); + + ObjectProperty *prop; + ObjectPropertyIterator *iter; + bool seenbv = false, seensv = false, seenav = false, seentype; + + iter = object_property_iter_init(OBJECT(dobj)); + while ((prop = object_property_iter_next(iter))) { + if (g_str_equal(prop->name, "bv")) { + seenbv = true; + } else if (g_str_equal(prop->name, "sv")) { + seensv = true; + } else if (g_str_equal(prop->name, "av")) { + seenav = true; + } else if (g_str_equal(prop->name, "type")) { + /* This prop comes from the base Object class */ + seentype = true; + } else { + g_printerr("Found prop '%s'\n", prop->name); + g_assert_not_reached(); + } + } + object_property_iter_free(iter); + g_assert(seenbv); + g_assert(seenav); + g_assert(seensv); + g_assert(seentype); + + object_unparent(OBJECT(dobj)); +} + + +static void test_dummy_delchild(void) +{ + Object *parent = object_get_objects_root(); + DummyDev *dev = DUMMY_DEV( + object_new_with_props(TYPE_DUMMY_DEV, + parent, + "dev0", + &error_abort, + NULL)); + + object_unparent(OBJECT(dev)); +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); module_call_init(MODULE_INIT_QOM); type_register_static(&dummy_info); + type_register_static(&dummy_dev_info); + type_register_static(&dummy_bus_info); + type_register_static(&dummy_backend_info); g_test_add_func("/qom/proplist/createlist", test_dummy_createlist); g_test_add_func("/qom/proplist/createv", test_dummy_createv); g_test_add_func("/qom/proplist/badenum", test_dummy_badenum); g_test_add_func("/qom/proplist/getenum", test_dummy_getenum); + g_test_add_func("/qom/proplist/iterator", test_dummy_iterator); + g_test_add_func("/qom/proplist/delchild", test_dummy_delchild); return g_test_run(); } diff --git a/tests/crypto-tls-x509-helpers.c b/tests/crypto-tls-x509-helpers.c index c5de67baaf..47b4c7ba53 100644 --- a/tests/crypto-tls-x509-helpers.c +++ b/tests/crypto-tls-x509-helpers.c @@ -153,6 +153,7 @@ test_tls_get_ipaddr(const char *addrstr, *datalen = res->ai_addrlen; *data = g_new(char, *datalen); memcpy(*data, res->ai_addr, *datalen); + freeaddrinfo(res); } /* @@ -465,6 +466,7 @@ void test_tls_write_cert_chain(const char *filename, if (!g_file_set_contents(filename, buffer, offset, NULL)) { abort(); } + g_free(buffer); } diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c index d0bc8de25a..7fa170990f 100644 --- a/tests/i440fx-test.c +++ b/tests/i440fx-test.c @@ -191,7 +191,7 @@ static void write_area(uint32_t start, uint32_t end, uint8_t value) uint32_t size = end - start + 1; uint8_t *data; - data = g_malloc0(size); + data = g_malloc(size); memset(data, value, size); memwrite(start, data, size); diff --git a/tests/ide-test.c b/tests/ide-test.c index d1014bbc46..b864701356 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -593,12 +593,12 @@ static void test_flush_nodev(void) ide_test_quit(); } -static void test_pci_retry_flush(const char *machine) +static void test_pci_retry_flush(void) { test_retry_flush("pc"); } -static void test_isa_retry_flush(const char *machine) +static void test_isa_retry_flush(void) { test_retry_flush("isapc"); } @@ -642,15 +642,19 @@ static void nsleep(int64_t nsecs) static uint8_t ide_wait_clear(uint8_t flag) { - int i; uint8_t data; + time_t st; /* Wait with a 5 second timeout */ - for (i = 0; i <= 12500000; i++) { + time(&st); + while (true) { data = inb(IDE_BASE + reg_status); if (!(data & flag)) { return data; } + if (difftime(time(NULL), st) > 5.0) { + break; + } nsleep(400); } g_assert_not_reached(); @@ -658,14 +662,18 @@ static uint8_t ide_wait_clear(uint8_t flag) static void ide_wait_intr(int irq) { - int i; + time_t st; bool intr; - for (i = 0; i <= 12500000; i++) { + time(&st); + while (true) { intr = get_irq(irq); if (intr) { return; } + if (difftime(time(NULL), st) > 5.0) { + break; + } nsleep(400); } @@ -699,24 +707,16 @@ static void cdrom_pio_impl(int nblocks) outb(IDE_BASE + reg_lba_middle, BYTE_COUNT_LIMIT & 0xFF); outb(IDE_BASE + reg_lba_high, (BYTE_COUNT_LIMIT >> 8 & 0xFF)); outb(IDE_BASE + reg_command, CMD_PACKET); - /* HPD0: Check_Status_A State */ + /* HP0: Check_Status_A State */ nsleep(400); data = ide_wait_clear(BSY); - /* HPD1: Send_Packet State */ + /* HP1: Send_Packet State */ assert_bit_set(data, DRQ | DRDY); assert_bit_clear(data, ERR | DF | BSY); /* SCSI CDB (READ10) -- read n*2048 bytes from block 0 */ send_scsi_cdb_read10(0, nblocks); - /* HPD3: INTRQ_Wait */ - ide_wait_intr(IDE_PRIMARY_IRQ); - - /* HPD2: Check_Status_B */ - data = ide_wait_clear(BSY); - assert_bit_set(data, DRQ | DRDY); - assert_bit_clear(data, ERR | DF | BSY); - /* Read data back: occurs in bursts of 'BYTE_COUNT_LIMIT' bytes. * If BYTE_COUNT_LIMIT is odd, we transfer BYTE_COUNT_LIMIT - 1 bytes. * We allow an odd limit only when the remaining transfer size is @@ -728,11 +728,25 @@ static void cdrom_pio_impl(int nblocks) for (i = 0; i < DIV_ROUND_UP(rxsize, limit); i++) { size_t offset = i * (limit / 2); size_t rem = (rxsize / 2) - offset; + + /* HP3: INTRQ_Wait */ + ide_wait_intr(IDE_PRIMARY_IRQ); + + /* HP2: Check_Status_B (and clear IRQ) */ + data = ide_wait_clear(BSY); + assert_bit_set(data, DRQ | DRDY); + assert_bit_clear(data, ERR | DF | BSY); + + /* HP4: Transfer_Data */ for (j = 0; j < MIN((limit / 2), rem); j++) { rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data)); } - ide_wait_intr(IDE_PRIMARY_IRQ); } + + /* Check for final completion IRQ */ + ide_wait_intr(IDE_PRIMARY_IRQ); + + /* Sanity check final state */ data = ide_wait_clear(DRQ); assert_bit_set(data, DRDY); assert_bit_clear(data, DRQ | ERR | DF | BSY); diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c index c8f0cf0f70..03c7b962a3 100644 --- a/tests/ivshmem-test.c +++ b/tests/ivshmem-test.c @@ -40,6 +40,7 @@ static QPCIDevice *get_device(void) QPCIBus *pcibus; pcibus = qpci_init_pc(); + dev = NULL; qpci_device_foreach(pcibus, 0x1af4, 0x1110, save_fn, &dev); g_assert(dev != NULL); @@ -392,7 +393,7 @@ static void test_ivshmem_memdev(void) /* just for the sake of checking memory-backend property */ setup_vm_cmd(&state, "-object memory-backend-ram,size=1M,id=mb1" - " -device ivshmem,memdev=mb1", false); + " -device ivshmem,x-memdev=mb1", false); qtest_quit(state.qtest); } @@ -478,10 +479,12 @@ int main(int argc, char **argv) tmpserver = g_strconcat(tmpdir, "/server", NULL); qtest_add_func("/ivshmem/single", test_ivshmem_single); - qtest_add_func("/ivshmem/pair", test_ivshmem_pair); - qtest_add_func("/ivshmem/server", test_ivshmem_server); qtest_add_func("/ivshmem/hotplug", test_ivshmem_hotplug); qtest_add_func("/ivshmem/memdev", test_ivshmem_memdev); + if (g_test_slow()) { + qtest_add_func("/ivshmem/pair", test_ivshmem_pair); + qtest_add_func("/ivshmem/server", test_ivshmem_server); + } ret = g_test_run(); diff --git a/tests/libqos/libqos.c b/tests/libqos/libqos.c index 8d7c5a9db8..2d1a802dbe 100644 --- a/tests/libqos/libqos.c +++ b/tests/libqos/libqos.c @@ -147,6 +147,23 @@ void migrate(QOSState *from, QOSState *to, const char *uri) set_context(to); } +bool have_qemu_img(void) +{ + char *rpath; + const char *path = getenv("QTEST_QEMU_IMG"); + if (!path) { + return false; + } + + rpath = realpath(path, NULL); + if (!rpath) { + return false; + } else { + free(rpath); + return true; + } +} + void mkimg(const char *file, const char *fmt, unsigned size_mb) { gchar *cli; @@ -155,13 +172,14 @@ void mkimg(const char *file, const char *fmt, unsigned size_mb) GError *err = NULL; char *qemu_img_path; gchar *out, *out2; - char *abs_path; + char *qemu_img_abs_path; qemu_img_path = getenv("QTEST_QEMU_IMG"); - abs_path = realpath(qemu_img_path, NULL); - assert(qemu_img_path); + g_assert(qemu_img_path); + qemu_img_abs_path = realpath(qemu_img_path, NULL); + g_assert(qemu_img_abs_path); - cli = g_strdup_printf("%s create -f %s %s %uM", abs_path, + cli = g_strdup_printf("%s create -f %s %s %uM", qemu_img_abs_path, fmt, file, size_mb); ret = g_spawn_command_line_sync(cli, &out, &out2, &rc, &err); if (err) { @@ -183,7 +201,7 @@ void mkimg(const char *file, const char *fmt, unsigned size_mb) g_free(out); g_free(out2); g_free(cli); - free(abs_path); + free(qemu_img_abs_path); } void mkqcow2(const char *file, unsigned size_mb) diff --git a/tests/libqos/libqos.h b/tests/libqos/libqos.h index 492a651f5b..ca14d2e9fe 100644 --- a/tests/libqos/libqos.h +++ b/tests/libqos/libqos.h @@ -19,6 +19,7 @@ typedef struct QOSState { QOSState *qtest_vboot(QOSOps *ops, const char *cmdline_fmt, va_list ap); QOSState *qtest_boot(QOSOps *ops, const char *cmdline_fmt, ...); void qtest_shutdown(QOSState *qs); +bool have_qemu_img(void); void mkimg(const char *file, const char *fmt, unsigned size_mb); void mkqcow2(const char *file, unsigned size_mb); void set_context(QOSState *s); diff --git a/tests/libqtest.c b/tests/libqtest.c index f6f3d7a950..fa314e1ee7 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -110,6 +110,11 @@ static void kill_qemu(QTestState *s) } } +static void kill_qemu_hook_func(void *s) +{ + kill_qemu(s); +} + static void sigabrt_handler(int signo) { g_hook_list_invoke(&abrt_hooks, FALSE); @@ -133,7 +138,7 @@ static void cleanup_sigabrt_handler(void) sigaction(SIGABRT, &sigact_old, NULL); } -void qtest_add_abrt_handler(void (*fn), const void *data) +void qtest_add_abrt_handler(GHookFunc fn, const void *data) { GHook *hook; @@ -170,7 +175,7 @@ QTestState *qtest_init(const char *extra_args) sock = init_socket(socket_path); qmpsock = init_socket(qmp_socket_path); - qtest_add_abrt_handler(kill_qemu, s); + qtest_add_abrt_handler(kill_qemu_hook_func, s); s->qemu_pid = fork(); if (s->qemu_pid == 0) { @@ -351,7 +356,7 @@ typedef struct { QDict *response; } QMPResponseParser; -static void qmp_response(JSONMessageParser *parser, QList *tokens) +static void qmp_response(JSONMessageParser *parser, GQueue *tokens) { QMPResponseParser *qmp = container_of(parser, QMPResponseParser, parser); QObject *obj; @@ -755,14 +760,15 @@ void qtest_memread(QTestState *s, uint64_t addr, void *data, size_t size) g_strfreev(args); } -void qtest_add_func(const char *str, void (*fn)) +void qtest_add_func(const char *str, void (*fn)(void)) { gchar *path = g_strdup_printf("/%s/%s", qtest_get_arch(), str); g_test_add_func(path, fn); g_free(path); } -void qtest_add_data_func(const char *str, const void *data, void (*fn)) +void qtest_add_data_func(const char *str, const void *data, + void (*fn)(const void *)) { gchar *path = g_strdup_printf("/%s/%s", qtest_get_arch(), str); g_test_add_data_func(path, data, fn); diff --git a/tests/libqtest.h b/tests/libqtest.h index df087452ee..ebdd5bbe53 100644 --- a/tests/libqtest.h +++ b/tests/libqtest.h @@ -416,7 +416,7 @@ const char *qtest_get_arch(void); * The path is prefixed with the architecture under test, as * returned by qtest_get_arch(). */ -void qtest_add_func(const char *str, void (*fn)); +void qtest_add_func(const char *str, void (*fn)(void)); /** * qtest_add_data_func: @@ -428,7 +428,8 @@ void qtest_add_func(const char *str, void (*fn)); * The path is prefixed with the architecture under test, as * returned by qtest_get_arch(). */ -void qtest_add_data_func(const char *str, const void *data, void (*fn)); +void qtest_add_data_func(const char *str, const void *data, + void (*fn)(const void *)); /** * qtest_add: @@ -450,7 +451,7 @@ void qtest_add_data_func(const char *str, const void *data, void (*fn)); g_free(path); \ } while (0) -void qtest_add_abrt_handler(void (*fn), const void *data); +void qtest_add_abrt_handler(GHookFunc fn, const void *data); /** * qtest_start: diff --git a/tests/qapi-schema/enum-union-clash.err b/tests/qapi-schema/enum-union-clash.err deleted file mode 100644 index c04e1a8064..0000000000 --- a/tests/qapi-schema/enum-union-clash.err +++ /dev/null @@ -1 +0,0 @@ -tests/qapi-schema/enum-union-clash.json:2: enum 'UnionKind' should not end in 'Kind' diff --git a/tests/qapi-schema/qapi-schema-test.json b/tests/qapi-schema/qapi-schema-test.json index 4e2d7c2063..44638da948 100644 --- a/tests/qapi-schema/qapi-schema-test.json +++ b/tests/qapi-schema/qapi-schema-test.json @@ -3,6 +3,9 @@ # This file is a stress test of supported qapi constructs that must # parse and compile correctly. +{ 'struct': 'TestStruct', + 'data': { 'integer': 'int', 'boolean': 'bool', 'string': 'str' } } + # for testing enums { 'struct': 'NestedEnumsOne', 'data': { 'enum1': 'EnumOne', # Intentional forward reference @@ -11,6 +14,10 @@ # An empty enum, although unusual, is currently acceptable { 'enum': 'MyEnum', 'data': [ ] } +# Likewise for an empty struct, including an empty base +{ 'struct': 'Empty1', 'data': { } } +{ 'struct': 'Empty2', 'base': 'Empty1', 'data': { } } + # for testing override of default naming heuristic { 'enum': 'QEnumTwo', 'prefix': 'QENUM_TWO', @@ -42,7 +49,8 @@ # dummy struct to force generation of array types not otherwise mentioned { 'struct': 'ForceArrays', - 'data': { 'unused1':['UserDefOne'], 'unused2':['UserDefTwo'] } } + 'data': { 'unused1':['UserDefOne'], 'unused2':['UserDefTwo'], + 'unused3':['TestStruct'] } } # for testing unions # Among other things, test that a name collision between branches does diff --git a/tests/qapi-schema/qapi-schema-test.out b/tests/qapi-schema/qapi-schema-test.out index a6c80e04d7..e20a8239ad 100644 --- a/tests/qapi-schema/qapi-schema-test.out +++ b/tests/qapi-schema/qapi-schema-test.out @@ -81,6 +81,9 @@ event EVENT_A None event EVENT_B None event EVENT_C :obj-EVENT_C-arg event EVENT_D :obj-EVENT_D-arg +object Empty1 +object Empty2 + base Empty1 enum EnumOne ['value1', 'value2', 'value3'] object EventStructOne member struct1: UserDefOne optional=False @@ -89,6 +92,7 @@ object EventStructOne object ForceArrays member unused1: UserDefOneList optional=False member unused2: UserDefTwoList optional=False + member unused3: TestStructList optional=False enum MyEnum [] object NestedEnumsOne member enum1: EnumOne optional=False @@ -97,6 +101,10 @@ object NestedEnumsOne member enum4: EnumOne optional=True enum QEnumTwo ['value1', 'value2'] prefix QENUM_TWO +object TestStruct + member integer: int optional=False + member boolean: bool optional=False + member string: str optional=False object UserDefA member boolean: bool optional=False member a_b: int optional=True diff --git a/tests/qapi-schema/reserved-command-q.err b/tests/qapi-schema/reserved-command-q.err new file mode 100644 index 0000000000..f939e044eb --- /dev/null +++ b/tests/qapi-schema/reserved-command-q.err @@ -0,0 +1 @@ +tests/qapi-schema/reserved-command-q.json:5: 'command' uses invalid name 'q-unix' diff --git a/tests/qapi-schema/enum-union-clash.exit b/tests/qapi-schema/reserved-command-q.exit index d00491fd7e..d00491fd7e 100644 --- a/tests/qapi-schema/enum-union-clash.exit +++ b/tests/qapi-schema/reserved-command-q.exit diff --git a/tests/qapi-schema/reserved-command-q.json b/tests/qapi-schema/reserved-command-q.json new file mode 100644 index 0000000000..99f8aae314 --- /dev/null +++ b/tests/qapi-schema/reserved-command-q.json @@ -0,0 +1,5 @@ +# C entity name collision +# We reject names like 'q-unix', because they can collide with the mangled +# name for 'unix' in generated C. +{ 'command': 'unix' } +{ 'command': 'q-unix' } diff --git a/tests/qapi-schema/enum-union-clash.out b/tests/qapi-schema/reserved-command-q.out index e69de29bb2..e69de29bb2 100644 --- a/tests/qapi-schema/enum-union-clash.out +++ b/tests/qapi-schema/reserved-command-q.out diff --git a/tests/qapi-schema/reserved-member-has.err b/tests/qapi-schema/reserved-member-has.err new file mode 100644 index 0000000000..e755771446 --- /dev/null +++ b/tests/qapi-schema/reserved-member-has.err @@ -0,0 +1 @@ +tests/qapi-schema/reserved-member-has.json:5: Member of 'data' for command 'oops' uses reserved name 'has-a' diff --git a/tests/qapi-schema/reserved-member-has.exit b/tests/qapi-schema/reserved-member-has.exit new file mode 100644 index 0000000000..d00491fd7e --- /dev/null +++ b/tests/qapi-schema/reserved-member-has.exit @@ -0,0 +1 @@ +1 diff --git a/tests/qapi-schema/reserved-member-has.json b/tests/qapi-schema/reserved-member-has.json new file mode 100644 index 0000000000..45b9109bdc --- /dev/null +++ b/tests/qapi-schema/reserved-member-has.json @@ -0,0 +1,5 @@ +# C member name collision +# We reject names like 'has-a', because they can collide with the flag +# for an optional 'a' in generated C. +# TODO we could munge the optional flag name to avoid the collision. +{ 'command': 'oops', 'data': { '*a': 'str', 'has-a': 'str' } } diff --git a/tests/qapi-schema/struct-base-clash-base.err b/tests/qapi-schema/reserved-member-has.out index e69de29bb2..e69de29bb2 100644 --- a/tests/qapi-schema/struct-base-clash-base.err +++ b/tests/qapi-schema/reserved-member-has.out diff --git a/tests/qapi-schema/reserved-member-q.err b/tests/qapi-schema/reserved-member-q.err new file mode 100644 index 0000000000..f3d5dd7818 --- /dev/null +++ b/tests/qapi-schema/reserved-member-q.err @@ -0,0 +1 @@ +tests/qapi-schema/reserved-member-q.json:4: Member of 'data' for struct 'Foo' uses invalid name 'q-unix' diff --git a/tests/qapi-schema/reserved-member-q.exit b/tests/qapi-schema/reserved-member-q.exit new file mode 100644 index 0000000000..d00491fd7e --- /dev/null +++ b/tests/qapi-schema/reserved-member-q.exit @@ -0,0 +1 @@ +1 diff --git a/tests/qapi-schema/reserved-member-q.json b/tests/qapi-schema/reserved-member-q.json new file mode 100644 index 0000000000..62fed8fddf --- /dev/null +++ b/tests/qapi-schema/reserved-member-q.json @@ -0,0 +1,4 @@ +# C member name collision +# We reject names like 'q-unix', because they can collide with the mangled +# name for 'unix' in generated C. +{ 'struct': 'Foo', 'data': { 'unix':'int', 'q-unix':'bool' } } diff --git a/tests/qapi-schema/reserved-member-q.out b/tests/qapi-schema/reserved-member-q.out new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/qapi-schema/reserved-member-q.out diff --git a/tests/qapi-schema/reserved-member-u.err b/tests/qapi-schema/reserved-member-u.err new file mode 100644 index 0000000000..87d42296cc --- /dev/null +++ b/tests/qapi-schema/reserved-member-u.err @@ -0,0 +1 @@ +tests/qapi-schema/reserved-member-u.json:7: Member of 'data' for struct 'Oops' uses reserved name 'u' diff --git a/tests/qapi-schema/reserved-member-u.exit b/tests/qapi-schema/reserved-member-u.exit new file mode 100644 index 0000000000..d00491fd7e --- /dev/null +++ b/tests/qapi-schema/reserved-member-u.exit @@ -0,0 +1 @@ +1 diff --git a/tests/qapi-schema/reserved-member-u.json b/tests/qapi-schema/reserved-member-u.json new file mode 100644 index 0000000000..1eaf0f301c --- /dev/null +++ b/tests/qapi-schema/reserved-member-u.json @@ -0,0 +1,7 @@ +# Potential C member name collision +# We reject use of 'u' as a member name, to allow it for internal use in +# putting union branch members in a separate namespace from QMP members. +# This is true even for non-unions, because it is possible to convert a +# struct to flat union while remaining backwards compatible in QMP. +# TODO - we could munge the member name to 'q_u' to avoid the collision +{ 'struct': 'Oops', 'data': { 'u': 'str' } } diff --git a/tests/qapi-schema/reserved-member-u.out b/tests/qapi-schema/reserved-member-u.out new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/qapi-schema/reserved-member-u.out diff --git a/tests/qapi-schema/reserved-type-kind.err b/tests/qapi-schema/reserved-type-kind.err new file mode 100644 index 0000000000..0a38efaad8 --- /dev/null +++ b/tests/qapi-schema/reserved-type-kind.err @@ -0,0 +1 @@ +tests/qapi-schema/reserved-type-kind.json:2: enum 'UnionKind' should not end in 'Kind' diff --git a/tests/qapi-schema/reserved-type-kind.exit b/tests/qapi-schema/reserved-type-kind.exit new file mode 100644 index 0000000000..d00491fd7e --- /dev/null +++ b/tests/qapi-schema/reserved-type-kind.exit @@ -0,0 +1 @@ +1 diff --git a/tests/qapi-schema/enum-union-clash.json b/tests/qapi-schema/reserved-type-kind.json index 593282b6cf..9ecaba12bc 100644 --- a/tests/qapi-schema/enum-union-clash.json +++ b/tests/qapi-schema/reserved-type-kind.json @@ -1,4 +1,2 @@ # we reject types that would conflict with implicit union enum { 'enum': 'UnionKind', 'data': [ 'oops' ] } -{ 'union': 'Union', - 'data': { 'a': 'int' } } diff --git a/tests/qapi-schema/reserved-type-kind.out b/tests/qapi-schema/reserved-type-kind.out new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/qapi-schema/reserved-type-kind.out diff --git a/tests/qapi-schema/reserved-type-list.err b/tests/qapi-schema/reserved-type-list.err new file mode 100644 index 0000000000..4510fa6d90 --- /dev/null +++ b/tests/qapi-schema/reserved-type-list.err @@ -0,0 +1 @@ +tests/qapi-schema/reserved-type-list.json:5: struct 'FooList' should not end in 'List' diff --git a/tests/qapi-schema/reserved-type-list.exit b/tests/qapi-schema/reserved-type-list.exit new file mode 100644 index 0000000000..d00491fd7e --- /dev/null +++ b/tests/qapi-schema/reserved-type-list.exit @@ -0,0 +1 @@ +1 diff --git a/tests/qapi-schema/reserved-type-list.json b/tests/qapi-schema/reserved-type-list.json new file mode 100644 index 0000000000..98d53bf808 --- /dev/null +++ b/tests/qapi-schema/reserved-type-list.json @@ -0,0 +1,5 @@ +# Potential C name collision +# We reserve names ending in 'List' for use by array types. +# TODO - we could choose array names to avoid collision with user types, +# in order to let this compile +{ 'struct': 'FooList', 'data': { 's': 'str' } } diff --git a/tests/qapi-schema/reserved-type-list.out b/tests/qapi-schema/reserved-type-list.out new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/qapi-schema/reserved-type-list.out diff --git a/tests/qapi-schema/struct-base-clash-base.exit b/tests/qapi-schema/struct-base-clash-base.exit deleted file mode 100644 index 573541ac97..0000000000 --- a/tests/qapi-schema/struct-base-clash-base.exit +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/tests/qapi-schema/struct-base-clash-base.json b/tests/qapi-schema/struct-base-clash-base.json deleted file mode 100644 index 0c840258c9..0000000000 --- a/tests/qapi-schema/struct-base-clash-base.json +++ /dev/null @@ -1,9 +0,0 @@ -# Struct member 'base' -# FIXME: this parses, but then fails to compile due to a duplicate 'base' -# (one explicit in QMP, the other used to box the base class members). -# We should either reject the collision at parse time, or change the -# generated struct to allow this to compile. -{ 'struct': 'Base', 'data': {} } -{ 'struct': 'Sub', - 'base': 'Base', - 'data': { 'base': 'str' } } diff --git a/tests/qapi-schema/struct-base-clash-base.out b/tests/qapi-schema/struct-base-clash-base.out deleted file mode 100644 index e69a416560..0000000000 --- a/tests/qapi-schema/struct-base-clash-base.out +++ /dev/null @@ -1,5 +0,0 @@ -object :empty -object Base -object Sub - base Base - member base: str optional=False diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 952a524ec7..32469efd76 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -245,6 +245,7 @@ class TestEIO(TestErrors): while not completed: for event in self.vm.get_qmp_events(wait=True): if event['event'] == 'BLOCK_JOB_ERROR': + error = True self.assert_qmp(event, 'data/device', 'drive0') self.assert_qmp(event, 'data/operation', 'read') @@ -257,9 +258,11 @@ class TestEIO(TestErrors): self.assert_qmp(result, 'return', {}) result = self.vm.qmp('query-block-jobs') + if result == {'return': []}: + # Race; likely already finished. Check. + continue self.assert_qmp(result, 'return[0]/paused', False) self.assert_qmp(result, 'return[0]/io-status', 'ok') - error = True elif event['event'] == 'BLOCK_JOB_COMPLETED': self.assertTrue(error, 'job completed unexpectedly') self.assert_qmp(event, 'data/type', 'stream') diff --git a/tests/qemu-iotests/039.out b/tests/qemu-iotests/039.out index 03a31c5943..32c884694c 100644 --- a/tests/qemu-iotests/039.out +++ b/tests/qemu-iotests/039.out @@ -11,7 +11,11 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) +./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then + exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +else + exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +fi ) incompatible_features 0x1 ERROR cluster 5 refcount=0 reference=1 ERROR OFLAG_COPIED data cluster: l2_entry=8000000000050000 refcount=0 @@ -46,7 +50,11 @@ read 512/512 bytes at offset 0 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) +./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then + exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +else + exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +fi ) incompatible_features 0x1 ERROR cluster 5 refcount=0 reference=1 Rebuilding refcount structure @@ -60,7 +68,11 @@ incompatible_features 0x0 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) +./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then + exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +else + exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +fi ) incompatible_features 0x0 No errors were found on the image. @@ -79,7 +91,11 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) +./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then + exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +else + exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +fi ) incompatible_features 0x1 ERROR cluster 5 refcount=0 reference=1 ERROR OFLAG_COPIED data cluster: l2_entry=8000000000050000 refcount=0 @@ -89,7 +105,11 @@ Data may be corrupted, or further writes to the image may corrupt it. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) +./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then + exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +else + exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +fi ) incompatible_features 0x0 No errors were found on the image. *** done diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 index ea2f98e51d..5bdaf3d48d 100755 --- a/tests/qemu-iotests/040 +++ b/tests/qemu-iotests/040 @@ -41,6 +41,7 @@ class ImageCommitTestCase(iotests.QMPTestCase): while not completed: for event in self.vm.get_qmp_events(wait=True): if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp_absent(event, 'data/error') self.assert_qmp(event, 'data/type', 'commit') self.assert_qmp(event, 'data/device', 'drive0') self.assert_qmp(event, 'data/offset', event['data']['len']) @@ -251,5 +252,34 @@ class TestSetSpeed(ImageCommitTestCase): class TestActiveZeroLengthImage(TestSingleDrive): image_len = 0 +class TestReopenOverlay(ImageCommitTestCase): + image_len = 1024 * 1024 + img0 = os.path.join(iotests.test_dir, '0.img') + img1 = os.path.join(iotests.test_dir, '1.img') + img2 = os.path.join(iotests.test_dir, '2.img') + img3 = os.path.join(iotests.test_dir, '3.img') + + def setUp(self): + iotests.create_image(self.img0, self.image_len) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % self.img0, self.img1) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % self.img1, self.img2) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % self.img2, self.img3) + qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0xab 0 128K', self.img1) + self.vm = iotests.VM().add_drive(self.img3) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(self.img0) + os.remove(self.img1) + os.remove(self.img2) + os.remove(self.img3) + + # This tests what happens when the overlay image of the 'top' node + # needs to be reopened in read-write mode in order to update the + # backing image string. + def test_reopen_overlay(self): + self.run_commit_test(self.img1, self.img0) + if __name__ == '__main__': iotests.main(supported_fmts=['qcow2', 'qed']) diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out index 42314e9c00..4fd1c2dcd2 100644 --- a/tests/qemu-iotests/040.out +++ b/tests/qemu-iotests/040.out @@ -1,5 +1,5 @@ -........................ +......................... ---------------------------------------------------------------------- -Ran 24 tests +Ran 25 tests OK diff --git a/tests/qemu-iotests/056 b/tests/qemu-iotests/056 index 54e4bd0692..04f2c3c841 100755 --- a/tests/qemu-iotests/056 +++ b/tests/qemu-iotests/056 @@ -82,6 +82,31 @@ class TestSyncModesNoneAndTop(iotests.QMPTestCase): time.sleep(1) self.assertEqual(-1, qemu_io('-c', 'read -P0x41 0 512', target_img).find("verification failed")) +class TestBeforeWriteNotifier(iotests.QMPTestCase): + def setUp(self): + self.vm = iotests.VM().add_drive_raw("file=blkdebug::null-co://,id=drive0,align=65536,driver=blkdebug") + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(target_img) + + def test_before_write_notifier(self): + self.vm.pause_drive("drive0") + result = self.vm.qmp('drive-backup', device='drive0', + sync='full', target=target_img, + format="file", speed=1) + self.assert_qmp(result, 'return', {}) + result = self.vm.qmp('block-job-pause', device="drive0") + self.assert_qmp(result, 'return', {}) + # Speed is low enough that this must be an uncopied range, which will + # trigger the before write notifier + self.vm.hmp_qemu_io('drive0', 'aio_write -P 1 512512 512') + self.vm.resume_drive("drive0") + result = self.vm.qmp('block-job-resume', device="drive0") + self.assert_qmp(result, 'return', {}) + event = self.cancel_and_wait() + self.assert_qmp(event, 'data/type', 'backup') if __name__ == '__main__': iotests.main(supported_fmts=['qcow2', 'qed']) diff --git a/tests/qemu-iotests/056.out b/tests/qemu-iotests/056.out index fbc63e62f8..8d7e996700 100644 --- a/tests/qemu-iotests/056.out +++ b/tests/qemu-iotests/056.out @@ -1,5 +1,5 @@ -.. +... ---------------------------------------------------------------------- -Ran 2 tests +Ran 3 tests OK diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058 index f2bdd0bffc..63a6598784 100755 --- a/tests/qemu-iotests/058 +++ b/tests/qemu-iotests/058 @@ -32,11 +32,17 @@ status=1 # failure is the default! nbd_unix_socket=$TEST_DIR/test_qemu_nbd_socket nbd_snapshot_img="nbd:unix:$nbd_unix_socket" +rm -f "${TEST_DIR}/qemu-nbd.pid" _cleanup_nbd() { - if [ -n "$NBD_SNAPSHOT_PID" ]; then - kill "$NBD_SNAPSHOT_PID" + local NBD_SNAPSHOT_PID + if [ -f "${TEST_DIR}/qemu-nbd.pid" ]; then + read NBD_SNAPSHOT_PID < "${TEST_DIR}/qemu-nbd.pid" + rm -f "${TEST_DIR}/qemu-nbd.pid" + if [ -n "$NBD_SNAPSHOT_PID" ]; then + kill "$NBD_SNAPSHOT_PID" + fi fi rm -f "$nbd_unix_socket" } @@ -60,7 +66,6 @@ _export_nbd_snapshot() { _cleanup_nbd $QEMU_NBD -v -t -k "$nbd_unix_socket" "$TEST_IMG" -l $1 & - NBD_SNAPSHOT_PID=$! _wait_for_nbd } @@ -68,7 +73,6 @@ _export_nbd_snapshot1() { _cleanup_nbd $QEMU_NBD -v -t -k "$nbd_unix_socket" "$TEST_IMG" -l snapshot.name=$1 & - NBD_SNAPSHOT_PID=$! _wait_for_nbd } diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out index b16bea95d2..f2598a8f9d 100644 --- a/tests/qemu-iotests/061.out +++ b/tests/qemu-iotests/061.out @@ -57,7 +57,11 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) +./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then + exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +else + exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +fi ) magic 0x514649fb version 3 backing_file_offset 0x0 @@ -215,7 +219,11 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) +./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then + exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +else + exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +fi ) magic 0x514649fb version 3 backing_file_offset 0x0 diff --git a/tests/qemu-iotests/085 b/tests/qemu-iotests/085 index 56cd6f89b7..aa77eca77d 100755 --- a/tests/qemu-iotests/085 +++ b/tests/qemu-iotests/085 @@ -7,6 +7,7 @@ # snapshots are performed. # # Copyright (C) 2014 Red Hat, Inc. +# Copyright (C) 2015 Igalia, S.L. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -34,17 +35,17 @@ status=1 # failure is the default! snapshot_virt0="snapshot-v0.qcow2" snapshot_virt1="snapshot-v1.qcow2" -MAX_SNAPSHOTS=10 +SNAPSHOTS=10 _cleanup() { _cleanup_qemu - for i in $(seq 1 ${MAX_SNAPSHOTS}) + for i in $(seq 1 ${SNAPSHOTS}) do rm -f "${TEST_DIR}/${i}-${snapshot_virt0}" rm -f "${TEST_DIR}/${i}-${snapshot_virt1}" done - _cleanup_test_img + rm -f "${TEST_IMG}.1" "${TEST_IMG}.2" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -64,7 +65,7 @@ function create_single_snapshot() { cmd="{ 'execute': 'blockdev-snapshot-sync', 'arguments': { 'device': 'virtio0', - 'snapshot-file':'"${TEST_DIR}/${1}-${snapshot_virt0}"', + 'snapshot-file':'${TEST_DIR}/${1}-${snapshot_virt0}', 'format': 'qcow2' } }" _send_qemu_cmd $h "${cmd}" "return" } @@ -76,27 +77,60 @@ function create_group_snapshot() {'actions': [ { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', - 'snapshot-file': '"${TEST_DIR}/${1}-${snapshot_virt0}"' } }, + 'snapshot-file': '${TEST_DIR}/${1}-${snapshot_virt0}' } }, { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio1', - 'snapshot-file': '"${TEST_DIR}/${1}-${snapshot_virt1}"' } } ] + 'snapshot-file': '${TEST_DIR}/${1}-${snapshot_virt1}' } } ] } }" _send_qemu_cmd $h "${cmd}" "return" } +# ${1}: unique identifier for the snapshot filename +# ${2}: true: open backing images; false: don't open them (default) +function add_snapshot_image() +{ + if [ "${2}" = "true" ]; then + extra_params="" + else + extra_params="'backing': '', " + fi + base_image="${TEST_DIR}/$((${1}-1))-${snapshot_virt0}" + snapshot_file="${TEST_DIR}/${1}-${snapshot_virt0}" + _make_test_img -b "${base_image}" "$size" + mv "${TEST_IMG}" "${snapshot_file}" + cmd="{ 'execute': 'blockdev-add', 'arguments': + { 'options': + { 'driver': 'qcow2', 'node-name': 'snap_${1}', ${extra_params} + 'file': + { 'driver': 'file', 'filename': '${snapshot_file}', + 'node-name': 'file_${1}' } } } }" + _send_qemu_cmd $h "${cmd}" "return" +} + +# ${1}: unique identifier for the snapshot filename +# ${2}: expected response, defaults to 'return' +function blockdev_snapshot() +{ + cmd="{ 'execute': 'blockdev-snapshot', + 'arguments': { 'node': 'virtio0', + 'overlay':'snap_${1}' } }" + _send_qemu_cmd $h "${cmd}" "${2:-return}" +} + size=128M _make_test_img $size -mv "${TEST_IMG}" "${TEST_IMG}.orig" +mv "${TEST_IMG}" "${TEST_IMG}.1" _make_test_img $size +mv "${TEST_IMG}" "${TEST_IMG}.2" echo echo === Running QEMU === echo qemu_comm_method="qmp" -_launch_qemu -drive file="${TEST_IMG}.orig",if=virtio -drive file="${TEST_IMG}",if=virtio +_launch_qemu -drive file="${TEST_IMG}.1",if=virtio -drive file="${TEST_IMG}.2",if=virtio h=$QEMU_HANDLE echo @@ -105,6 +139,8 @@ echo _send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" "return" +# Tests for the blockdev-snapshot-sync command + echo echo === Create a single snapshot on virtio0 === echo @@ -117,7 +153,7 @@ echo === Invalid command - missing device and nodename === echo _send_qemu_cmd $h "{ 'execute': 'blockdev-snapshot-sync', - 'arguments': { 'snapshot-file':'"${TEST_DIR}/1-${snapshot_virt0}"', + 'arguments': { 'snapshot-file':'${TEST_DIR}/1-${snapshot_virt0}', 'format': 'qcow2' } }" "error" echo @@ -132,11 +168,75 @@ echo echo === Create several transactional group snapshots === echo -for i in $(seq 2 ${MAX_SNAPSHOTS}) +for i in $(seq 2 ${SNAPSHOTS}) do create_group_snapshot ${i} done +# Tests for the blockdev-snapshot command + +echo +echo === Create a couple of snapshots using blockdev-snapshot === +echo + +SNAPSHOTS=$((${SNAPSHOTS}+1)) +add_snapshot_image ${SNAPSHOTS} +blockdev_snapshot ${SNAPSHOTS} + +SNAPSHOTS=$((${SNAPSHOTS}+1)) +add_snapshot_image ${SNAPSHOTS} +blockdev_snapshot ${SNAPSHOTS} + +echo +echo === Invalid command - cannot create a snapshot using a file BDS === +echo + +_send_qemu_cmd $h "{ 'execute': 'blockdev-snapshot', + 'arguments': { 'node':'virtio0', + 'overlay':'file_${SNAPSHOTS}' } + }" "error" + +echo +echo === Invalid command - snapshot node used as active layer === +echo + +blockdev_snapshot ${SNAPSHOTS} error + +_send_qemu_cmd $h "{ 'execute': 'blockdev-snapshot', + 'arguments': { 'node':'virtio0', + 'overlay':'virtio0' } + }" "error" + +_send_qemu_cmd $h "{ 'execute': 'blockdev-snapshot', + 'arguments': { 'node':'virtio0', + 'overlay':'virtio1' } + }" "error" + +echo +echo === Invalid command - snapshot node used as backing hd === +echo + +blockdev_snapshot $((${SNAPSHOTS}-1)) error + +echo +echo === Invalid command - snapshot node has a backing image === +echo + +SNAPSHOTS=$((${SNAPSHOTS}+1)) +add_snapshot_image ${SNAPSHOTS} true +blockdev_snapshot ${SNAPSHOTS} error + +echo +echo === Invalid command - The node does not exist === +echo + +blockdev_snapshot $((${SNAPSHOTS}+1)) error + +_send_qemu_cmd $h "{ 'execute': 'blockdev-snapshot', + 'arguments': { 'node':'nodevice', + 'overlay':'snap_${SNAPSHOTS}' } + }" "error" + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out index a6cf19e57a..01c78d6894 100644 --- a/tests/qemu-iotests/085.out +++ b/tests/qemu-iotests/085.out @@ -11,7 +11,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 === Create a single snapshot on virtio0 === -Formatting 'TEST_DIR/1-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file=TEST_DIR/t.qcow2.orig backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/1-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file=TEST_DIR/t.qcow2.1 backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} === Invalid command - missing device and nodename === @@ -26,7 +26,7 @@ Formatting 'TEST_DIR/1-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file === Create several transactional group snapshots === Formatting 'TEST_DIR/2-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file=TEST_DIR/1-snapshot-v0.qcow2 backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/2-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file=TEST_DIR/t.qcow2 backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/2-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file=TEST_DIR/t.qcow2.2 backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} Formatting 'TEST_DIR/3-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file=TEST_DIR/2-snapshot-v0.qcow2 backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 Formatting 'TEST_DIR/3-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file=TEST_DIR/2-snapshot-v1.qcow2 backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 @@ -52,4 +52,38 @@ Formatting 'TEST_DIR/9-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file Formatting 'TEST_DIR/10-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file=TEST_DIR/9-snapshot-v0.qcow2 backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 Formatting 'TEST_DIR/10-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file=TEST_DIR/9-snapshot-v1.qcow2 backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} + +=== Create a couple of snapshots using blockdev-snapshot === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/10-snapshot-v0.IMGFMT +{"return": {}} +{"return": {}} +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/11-snapshot-v0.IMGFMT +{"return": {}} +{"return": {}} + +=== Invalid command - cannot create a snapshot using a file BDS === + +{"error": {"class": "GenericError", "desc": "The snapshot does not support backing images"}} + +=== Invalid command - snapshot node used as active layer === + +{"error": {"class": "GenericError", "desc": "The snapshot is already in use by virtio0"}} +{"error": {"class": "GenericError", "desc": "The snapshot is already in use by virtio0"}} +{"error": {"class": "GenericError", "desc": "The snapshot is already in use by virtio1"}} + +=== Invalid command - snapshot node used as backing hd === + +{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'virtio0'"}} + +=== Invalid command - snapshot node has a backing image === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/12-snapshot-v0.IMGFMT +{"return": {}} +{"error": {"class": "GenericError", "desc": "The snapshot already has a backing image"}} + +=== Invalid command - The node does not exist === + +{"error": {"class": "GenericError", "desc": "Cannot find device=snap_14 nor node_name=snap_14"}} +{"error": {"class": "GenericError", "desc": "Cannot find device=nodevice nor node_name=nodevice"}} *** done diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118 new file mode 100755 index 0000000000..a2bcd548c1 --- /dev/null +++ b/tests/qemu-iotests/118 @@ -0,0 +1,720 @@ +#!/usr/bin/env python +# +# Test case for the QMP 'change' command and all other associated +# commands +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os +import stat +import time +import iotests +from iotests import qemu_img + +old_img = os.path.join(iotests.test_dir, 'test0.img') +new_img = os.path.join(iotests.test_dir, 'test1.img') + +class ChangeBaseClass(iotests.QMPTestCase): + has_opened = False + has_closed = False + + def process_events(self): + for event in self.vm.get_qmp_events(wait=False): + if (event['event'] == 'DEVICE_TRAY_MOVED' and + event['data']['device'] == 'drive0'): + if event['data']['tray-open'] == False: + self.has_closed = True + else: + self.has_opened = True + + def wait_for_open(self): + timeout = time.clock() + 3 + while not self.has_opened and time.clock() < timeout: + self.process_events() + if not self.has_opened: + self.fail('Timeout while waiting for the tray to open') + + def wait_for_close(self): + timeout = time.clock() + 3 + while not self.has_closed and time.clock() < timeout: + self.process_events() + if not self.has_opened: + self.fail('Timeout while waiting for the tray to close') + +class GeneralChangeTestsBaseClass(ChangeBaseClass): + def test_change(self): + result = self.vm.qmp('change', device='drive0', target=new_img, + arg=iotests.imgfmt) + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + def test_blockdev_change_medium(self): + result = self.vm.qmp('blockdev-change-medium', device='drive0', + filename=new_img, + format=iotests.imgfmt) + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + def test_eject(self): + result = self.vm.qmp('eject', device='drive0', force=True) + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', True) + self.assert_qmp_absent(result, 'return[0]/inserted') + + def test_tray_eject_change(self): + result = self.vm.qmp('eject', device='drive0', force=True) + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', True) + self.assert_qmp_absent(result, 'return[0]/inserted') + + result = self.vm.qmp('blockdev-change-medium', device='drive0', + filename=new_img, + format=iotests.imgfmt) + self.assert_qmp(result, 'return', {}) + + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + def test_tray_open_close(self): + result = self.vm.qmp('blockdev-open-tray', device='drive0', force=True) + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', True) + if self.was_empty == True: + self.assert_qmp_absent(result, 'return[0]/inserted') + else: + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-close-tray', device='drive0') + self.assert_qmp(result, 'return', {}) + + if self.has_real_tray or not self.was_empty: + self.wait_for_close() + + result = self.vm.qmp('query-block') + if self.has_real_tray or not self.was_empty: + self.assert_qmp(result, 'return[0]/tray_open', False) + else: + self.assert_qmp(result, 'return[0]/tray_open', True) + if self.was_empty == True: + self.assert_qmp_absent(result, 'return[0]/inserted') + else: + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + def test_tray_eject_close(self): + result = self.vm.qmp('eject', device='drive0', force=True) + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', True) + self.assert_qmp_absent(result, 'return[0]/inserted') + + result = self.vm.qmp('blockdev-close-tray', device='drive0') + self.assert_qmp(result, 'return', {}) + + if self.has_real_tray: + self.wait_for_close() + + result = self.vm.qmp('query-block') + if self.has_real_tray: + self.assert_qmp(result, 'return[0]/tray_open', False) + else: + self.assert_qmp(result, 'return[0]/tray_open', True) + self.assert_qmp_absent(result, 'return[0]/inserted') + + def test_tray_open_change(self): + result = self.vm.qmp('blockdev-open-tray', device='drive0', force=True) + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', True) + if self.was_empty == True: + self.assert_qmp_absent(result, 'return[0]/inserted') + else: + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-change-medium', device='drive0', + filename=new_img, + format=iotests.imgfmt) + self.assert_qmp(result, 'return', {}) + + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + def test_cycle(self): + result = self.vm.qmp('blockdev-add', + options={'node-name': 'new', + 'driver': iotests.imgfmt, + 'file': {'filename': new_img, + 'driver': 'file'}}) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('blockdev-open-tray', device='drive0', force=True) + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', True) + if self.was_empty == True: + self.assert_qmp_absent(result, 'return[0]/inserted') + else: + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-remove-medium', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', True) + self.assert_qmp_absent(result, 'return[0]/inserted') + + result = self.vm.qmp('blockdev-insert-medium', device='drive0', + node_name='new') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + result = self.vm.qmp('blockdev-close-tray', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + def test_close_on_closed(self): + result = self.vm.qmp('blockdev-close-tray', device='drive0') + # Should be a no-op + self.assert_qmp(result, 'return', {}) + self.assertEquals(self.vm.get_qmp_events(wait=False), []) + + def test_remove_on_closed(self): + if self.has_opened: + # Empty floppy drive + return + + result = self.vm.qmp('blockdev-remove-medium', device='drive0') + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_insert_on_closed(self): + if self.has_opened: + # Empty floppy drive + return + + result = self.vm.qmp('blockdev-add', + options={'node-name': 'new', + 'driver': iotests.imgfmt, + 'file': {'filename': new_img, + 'driver': 'file'}}) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('blockdev-insert-medium', device='drive0', + node_name='new') + self.assert_qmp(result, 'error/class', 'GenericError') + +class TestInitiallyFilled(GeneralChangeTestsBaseClass): + was_empty = False + + def setUp(self, media, interface): + qemu_img('create', '-f', iotests.imgfmt, old_img, '1440k') + qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k') + self.vm = iotests.VM().add_drive(old_img, 'media=%s' % media, interface) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(old_img) + os.remove(new_img) + + def test_insert_on_filled(self): + result = self.vm.qmp('blockdev-add', + options={'node-name': 'new', + 'driver': iotests.imgfmt, + 'file': {'filename': new_img, + 'driver': 'file'}}) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('blockdev-open-tray', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + + result = self.vm.qmp('blockdev-insert-medium', device='drive0', + node_name='new') + self.assert_qmp(result, 'error/class', 'GenericError') + +class TestInitiallyEmpty(GeneralChangeTestsBaseClass): + was_empty = True + + def setUp(self, media, interface): + qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k') + self.vm = iotests.VM().add_drive(None, 'media=%s' % media, interface) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(new_img) + + def test_remove_on_empty(self): + result = self.vm.qmp('blockdev-open-tray', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + + result = self.vm.qmp('blockdev-remove-medium', device='drive0') + # Should be a no-op + self.assert_qmp(result, 'return', {}) + +class TestCDInitiallyFilled(TestInitiallyFilled): + TestInitiallyFilled = TestInitiallyFilled + has_real_tray = True + + def setUp(self): + self.TestInitiallyFilled.setUp(self, 'cdrom', 'ide') + +class TestCDInitiallyEmpty(TestInitiallyEmpty): + TestInitiallyEmpty = TestInitiallyEmpty + has_real_tray = True + + def setUp(self): + self.TestInitiallyEmpty.setUp(self, 'cdrom', 'ide') + +class TestFloppyInitiallyFilled(TestInitiallyFilled): + TestInitiallyFilled = TestInitiallyFilled + has_real_tray = False + + def setUp(self): + self.TestInitiallyFilled.setUp(self, 'disk', 'floppy') + +class TestFloppyInitiallyEmpty(TestInitiallyEmpty): + TestInitiallyEmpty = TestInitiallyEmpty + has_real_tray = False + + def setUp(self): + self.TestInitiallyEmpty.setUp(self, 'disk', 'floppy') + # FDDs not having a real tray and there not being a medium inside the + # tray at startup means the tray will be considered open + self.has_opened = True + +class TestChangeReadOnly(ChangeBaseClass): + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, old_img, '1440k') + qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k') + self.vm = iotests.VM() + + def tearDown(self): + self.vm.shutdown() + os.chmod(old_img, 0666) + os.chmod(new_img, 0666) + os.remove(old_img) + os.remove(new_img) + + def test_ro_ro_retain(self): + os.chmod(old_img, 0444) + os.chmod(new_img, 0444) + self.vm.add_drive(old_img, 'media=disk,read-only=on', 'floppy') + self.vm.launch() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-change-medium', device='drive0', + filename=new_img, + format=iotests.imgfmt, + read_only_mode='retain') + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + def test_ro_rw_retain(self): + os.chmod(old_img, 0444) + self.vm.add_drive(old_img, 'media=disk,read-only=on', 'floppy') + self.vm.launch() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-change-medium', device='drive0', + filename=new_img, + format=iotests.imgfmt, + read_only_mode='retain') + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + def test_rw_ro_retain(self): + os.chmod(new_img, 0444) + self.vm.add_drive(old_img, 'media=disk', 'floppy') + self.vm.launch() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-change-medium', device='drive0', + filename=new_img, + format=iotests.imgfmt, + read_only_mode='retain') + self.assert_qmp(result, 'error/class', 'GenericError') + + self.assertEquals(self.vm.get_qmp_events(wait=False), []) + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + def test_ro_rw(self): + os.chmod(old_img, 0444) + self.vm.add_drive(old_img, 'media=disk,read-only=on', 'floppy') + self.vm.launch() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-change-medium', + device='drive0', + filename=new_img, + format=iotests.imgfmt, + read_only_mode='read-write') + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + def test_rw_ro(self): + os.chmod(new_img, 0444) + self.vm.add_drive(old_img, 'media=disk', 'floppy') + self.vm.launch() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-change-medium', + device='drive0', + filename=new_img, + format=iotests.imgfmt, + read_only_mode='read-only') + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + def test_make_rw_ro(self): + self.vm.add_drive(old_img, 'media=disk', 'floppy') + self.vm.launch() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-change-medium', + device='drive0', + filename=new_img, + format=iotests.imgfmt, + read_only_mode='read-only') + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + def test_make_ro_rw(self): + os.chmod(new_img, 0444) + self.vm.add_drive(old_img, 'media=disk', 'floppy') + self.vm.launch() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-change-medium', + device='drive0', + filename=new_img, + format=iotests.imgfmt, + read_only_mode='read-write') + self.assert_qmp(result, 'error/class', 'GenericError') + + self.assertEquals(self.vm.get_qmp_events(wait=False), []) + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + def test_make_rw_ro_by_retain(self): + os.chmod(old_img, 0444) + self.vm.add_drive(old_img, 'media=disk,read-only=on', 'floppy') + self.vm.launch() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-change-medium', device='drive0', + filename=new_img, + format=iotests.imgfmt, + read_only_mode='retain') + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + def test_make_ro_rw_by_retain(self): + os.chmod(new_img, 0444) + self.vm.add_drive(old_img, 'media=disk', 'floppy') + self.vm.launch() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-change-medium', device='drive0', + filename=new_img, + format=iotests.imgfmt, + read_only_mode='retain') + self.assert_qmp(result, 'error/class', 'GenericError') + + self.assertEquals(self.vm.get_qmp_events(wait=False), []) + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + def test_rw_ro_cycle(self): + os.chmod(new_img, 0444) + self.vm.add_drive(old_img, 'media=disk', 'floppy') + self.vm.launch() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-add', + options={'node-name': 'new', + 'driver': iotests.imgfmt, + 'read-only': True, + 'file': {'filename': new_img, + 'driver': 'file'}}) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('blockdev-open-tray', device='drive0', force=True) + self.assert_qmp(result, 'return', {}) + + self.wait_for_open() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', True) + self.assert_qmp(result, 'return[0]/inserted/ro', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + result = self.vm.qmp('blockdev-remove-medium', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', True) + self.assert_qmp_absent(result, 'return[0]/inserted') + + result = self.vm.qmp('blockdev-insert-medium', device='drive0', + node_name='new') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', True) + self.assert_qmp(result, 'return[0]/inserted/ro', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + + result = self.vm.qmp('blockdev-close-tray', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.wait_for_close() + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/ro', True) + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + +GeneralChangeTestsBaseClass = None +TestInitiallyFilled = None +TestInitiallyEmpty = None + + +class TestBlockJobsAfterCycle(ChangeBaseClass): + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, old_img, '1M') + + self.vm = iotests.VM() + self.vm.launch() + + result = self.vm.qmp('blockdev-add', + options={'id': 'drive0', + 'driver': 'null-co'}) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/image/format', 'null-co') + + # For device-less BBs, calling blockdev-open-tray or blockdev-close-tray + # is not necessary + result = self.vm.qmp('blockdev-remove-medium', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block') + self.assert_qmp_absent(result, 'return[0]/inserted') + + result = self.vm.qmp('blockdev-add', + options={'node-name': 'node0', + 'driver': iotests.imgfmt, + 'file': {'filename': old_img, + 'driver': 'file'}}) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('blockdev-insert-medium', device='drive0', + node_name='node0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/tray_open', False) + self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img) + + def tearDown(self): + self.vm.shutdown() + os.remove(old_img) + try: + os.remove(new_img) + except OSError: + pass + + def test_snapshot_and_commit(self): + # We need backing file support + if iotests.imgfmt != 'qcow2' and iotests.imgfmt != 'qed': + return + + result = self.vm.qmp('blockdev-snapshot-sync', device='drive0', + snapshot_file=new_img, + format=iotests.imgfmt) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img) + self.assert_qmp(result, + 'return[0]/inserted/image/backing-image/filename', + old_img) + + result = self.vm.qmp('block-commit', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.vm.event_wait(name='BLOCK_JOB_READY') + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + + result = self.vm.qmp('block-job-complete', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.vm.event_wait(name='BLOCK_JOB_COMPLETED') + + +if __name__ == '__main__': + if iotests.qemu_default_machine != 'pc': + # We need floppy and IDE CD-ROM + iotests.notrun('not suitable for this machine type: %s' % + iotests.qemu_default_machine) + iotests.main() diff --git a/tests/qemu-iotests/118.out b/tests/qemu-iotests/118.out new file mode 100644 index 0000000000..6a917130b6 --- /dev/null +++ b/tests/qemu-iotests/118.out @@ -0,0 +1,5 @@ +........................................................... +---------------------------------------------------------------------- +Ran 59 tests + +OK diff --git a/tests/qemu-iotests/119 b/tests/qemu-iotests/119 index 9a11f1b921..cc6ec07705 100755 --- a/tests/qemu-iotests/119 +++ b/tests/qemu-iotests/119 @@ -49,7 +49,7 @@ echo "{'execute': 'qmp_capabilities'} {'execute': 'human-monitor-command', 'arguments': {'command-line': 'qemu-io drv \"read -P 0 0 64k\"'}} {'execute': 'quit'}" \ - | $QEMU -drive id=drv,if=none,file="$TEST_IMG",driver=nbd \ + | $QEMU -nographic -drive id=drv,if=none,file="$TEST_IMG",driver=nbd \ -qmp stdio -nodefaults \ | _filter_qmp | _filter_qemu_io diff --git a/tests/qemu-iotests/120 b/tests/qemu-iotests/120 index 9f13078764..d899a3f527 100755 --- a/tests/qemu-iotests/120 +++ b/tests/qemu-iotests/120 @@ -49,7 +49,7 @@ echo "{'execute': 'qmp_capabilities'} {'execute': 'human-monitor-command', 'arguments': {'command-line': 'qemu-io drv \"write -P 42 0 64k\"'}} {'execute': 'quit'}" \ - | $QEMU -qmp stdio -nodefaults \ + | $QEMU -qmp stdio -nographic -nodefaults \ -drive id=drv,if=none,file="$TEST_IMG",driver=raw,file.driver=$IMGFMT \ | _filter_qmp | _filter_qemu_io $QEMU_IO -c 'read -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124 index 9ccd11809f..c928f0101b 100644 --- a/tests/qemu-iotests/124 +++ b/tests/qemu-iotests/124 @@ -36,6 +36,23 @@ def try_remove(img): pass +def transaction_action(action, **kwargs): + return { + 'type': action, + 'data': dict((k.replace('_', '-'), v) for k, v in kwargs.iteritems()) + } + + +def transaction_bitmap_clear(node, name, **kwargs): + return transaction_action('block-dirty-bitmap-clear', + node=node, name=name, **kwargs) + + +def transaction_drive_backup(device, target, **kwargs): + return transaction_action('drive-backup', device=device, target=target, + **kwargs) + + class Bitmap: def __init__(self, name, drive): self.name = name @@ -122,9 +139,12 @@ class TestIncrementalBackup(iotests.QMPTestCase): def do_qmp_backup(self, error='Input/output error', **kwargs): res = self.vm.qmp('drive-backup', **kwargs) self.assert_qmp(res, 'return', {}) + return self.wait_qmp_backup(kwargs['device'], error) + + def wait_qmp_backup(self, device, error='Input/output error'): event = self.vm.event_wait(name="BLOCK_JOB_COMPLETED", - match={'data': {'device': kwargs['device']}}) + match={'data': {'device': device}}) self.assertNotEqual(event, None) try: @@ -139,6 +159,12 @@ class TestIncrementalBackup(iotests.QMPTestCase): return False + def wait_qmp_backup_cancelled(self, device): + event = self.vm.event_wait(name='BLOCK_JOB_CANCELLED', + match={'data': {'device': device}}) + self.assertNotEqual(event, None) + + def create_anchor_backup(self, drive=None): if drive is None: drive = self.drives[-1] @@ -264,6 +290,43 @@ class TestIncrementalBackup(iotests.QMPTestCase): return self.do_incremental_simple(granularity=131072) + def test_incremental_transaction(self): + '''Test: Verify backups made from transactionally created bitmaps. + + Create a bitmap "before" VM execution begins, then create a second + bitmap AFTER writes have already occurred. Use transactions to create + a full backup and synchronize both bitmaps to this backup. + Create an incremental backup through both bitmaps and verify that + both backups match the current drive0 image. + ''' + + drive0 = self.drives[0] + bitmap0 = self.add_bitmap('bitmap0', drive0) + self.hmp_io_writes(drive0['id'], (('0xab', 0, 512), + ('0xfe', '16M', '256k'), + ('0x64', '32736k', '64k'))) + bitmap1 = self.add_bitmap('bitmap1', drive0) + + result = self.vm.qmp('transaction', actions=[ + transaction_bitmap_clear(bitmap0.drive['id'], bitmap0.name), + transaction_bitmap_clear(bitmap1.drive['id'], bitmap1.name), + transaction_drive_backup(drive0['id'], drive0['backup'], + sync='full', format=drive0['fmt']) + ]) + self.assert_qmp(result, 'return', {}) + self.wait_until_completed(drive0['id']) + self.files.append(drive0['backup']) + + self.hmp_io_writes(drive0['id'], (('0x9a', 0, 512), + ('0x55', '8M', '352k'), + ('0x78', '15872k', '1M'))) + # Both bitmaps should be correctly in sync. + self.create_incremental(bitmap0) + self.create_incremental(bitmap1) + self.vm.shutdown() + self.check_backups() + + def test_incremental_failure(self): '''Test: Verify backups made after a failure are correct. @@ -321,6 +384,123 @@ class TestIncrementalBackup(iotests.QMPTestCase): self.check_backups() + def test_transaction_failure(self): + '''Test: Verify backups made from a transaction that partially fails. + + Add a second drive with its own unique pattern, and add a bitmap to each + drive. Use blkdebug to interfere with the backup on just one drive and + attempt to create a coherent incremental backup across both drives. + + verify a failure in one but not both, then delete the failed stubs and + re-run the same transaction. + + verify that both incrementals are created successfully. + ''' + + # Create a second drive, with pattern: + drive1 = self.add_node('drive1') + self.img_create(drive1['file'], drive1['fmt']) + io_write_patterns(drive1['file'], (('0x14', 0, 512), + ('0x5d', '1M', '32k'), + ('0xcd', '32M', '124k'))) + + # Create a blkdebug interface to this img as 'drive1' + result = self.vm.qmp('blockdev-add', options={ + 'id': drive1['id'], + 'driver': drive1['fmt'], + 'file': { + 'driver': 'blkdebug', + 'image': { + 'driver': 'file', + 'filename': drive1['file'] + }, + 'set-state': [{ + 'event': 'flush_to_disk', + 'state': 1, + 'new_state': 2 + }], + 'inject-error': [{ + 'event': 'read_aio', + 'errno': 5, + 'state': 2, + 'immediately': False, + 'once': True + }], + } + }) + self.assert_qmp(result, 'return', {}) + + # Create bitmaps and full backups for both drives + drive0 = self.drives[0] + dr0bm0 = self.add_bitmap('bitmap0', drive0) + dr1bm0 = self.add_bitmap('bitmap0', drive1) + self.create_anchor_backup(drive0) + self.create_anchor_backup(drive1) + self.assert_no_active_block_jobs() + self.assertFalse(self.vm.get_qmp_events(wait=False)) + + # Emulate some writes + self.hmp_io_writes(drive0['id'], (('0xab', 0, 512), + ('0xfe', '16M', '256k'), + ('0x64', '32736k', '64k'))) + self.hmp_io_writes(drive1['id'], (('0xba', 0, 512), + ('0xef', '16M', '256k'), + ('0x46', '32736k', '64k'))) + + # Create incremental backup targets + target0 = self.prepare_backup(dr0bm0) + target1 = self.prepare_backup(dr1bm0) + + # Ask for a new incremental backup per-each drive, + # expecting drive1's backup to fail: + transaction = [ + transaction_drive_backup(drive0['id'], target0, sync='incremental', + format=drive0['fmt'], mode='existing', + bitmap=dr0bm0.name), + transaction_drive_backup(drive1['id'], target1, sync='incremental', + format=drive1['fmt'], mode='existing', + bitmap=dr1bm0.name) + ] + result = self.vm.qmp('transaction', actions=transaction, + properties={'completion-mode': 'grouped'} ) + self.assert_qmp(result, 'return', {}) + + # Observe that drive0's backup is cancelled and drive1 completes with + # an error. + self.wait_qmp_backup_cancelled(drive0['id']) + self.assertFalse(self.wait_qmp_backup(drive1['id'])) + error = self.vm.event_wait('BLOCK_JOB_ERROR') + self.assert_qmp(error, 'data', {'device': drive1['id'], + 'action': 'report', + 'operation': 'read'}) + self.assertFalse(self.vm.get_qmp_events(wait=False)) + self.assert_no_active_block_jobs() + + # Delete drive0's successful target and eliminate our record of the + # unsuccessful drive1 target. Then re-run the same transaction. + dr0bm0.del_target() + dr1bm0.del_target() + target0 = self.prepare_backup(dr0bm0) + target1 = self.prepare_backup(dr1bm0) + + # Re-run the exact same transaction. + result = self.vm.qmp('transaction', actions=transaction, + properties={'completion-mode':'grouped'}) + self.assert_qmp(result, 'return', {}) + + # Both should complete successfully this time. + self.assertTrue(self.wait_qmp_backup(drive0['id'])) + self.assertTrue(self.wait_qmp_backup(drive1['id'])) + self.make_reference_backup(dr0bm0) + self.make_reference_backup(dr1bm0) + self.assertFalse(self.vm.get_qmp_events(wait=False)) + self.assert_no_active_block_jobs() + + # And the images should of course validate. + self.vm.shutdown() + self.check_backups() + + def test_sync_dirty_bitmap_missing(self): self.assert_no_active_block_jobs() self.files.append(self.err_img) diff --git a/tests/qemu-iotests/124.out b/tests/qemu-iotests/124.out index 2f7d3902f2..dae404e278 100644 --- a/tests/qemu-iotests/124.out +++ b/tests/qemu-iotests/124.out @@ -1,5 +1,5 @@ -....... +......... ---------------------------------------------------------------------- -Ran 7 tests +Ran 9 tests OK diff --git a/tests/qemu-iotests/136 b/tests/qemu-iotests/136 new file mode 100644 index 0000000000..e8c6937fc9 --- /dev/null +++ b/tests/qemu-iotests/136 @@ -0,0 +1,349 @@ +#!/usr/bin/env python +# +# Tests for block device statistics +# +# Copyright (C) 2015 Igalia, S.L. +# Author: Alberto Garcia <berto@igalia.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import iotests +import os + +interval_length = 10 +nsec_per_sec = 1000000000 +op_latency = nsec_per_sec / 1000 # See qtest_latency_ns in accounting.c +bad_sector = 8192 +bad_offset = bad_sector * 512 +blkdebug_file = os.path.join(iotests.test_dir, 'blkdebug.conf') + +class BlockDeviceStatsTestCase(iotests.QMPTestCase): + test_img = "null-aio://" + total_rd_bytes = 0 + total_rd_ops = 0 + total_wr_bytes = 0 + total_wr_ops = 0 + total_wr_merged = 0 + total_flush_ops = 0 + failed_rd_ops = 0 + failed_wr_ops = 0 + invalid_rd_ops = 0 + invalid_wr_ops = 0 + wr_highest_offset = 0 + account_invalid = False + account_failed = False + + def blockstats(self, device): + result = self.vm.qmp("query-blockstats") + for r in result['return']: + if r['device'] == device: + return r['stats'] + raise Exception("Device not found for blockstats: %s" % device) + + def create_blkdebug_file(self): + file = open(blkdebug_file, 'w') + file.write(''' +[inject-error] +event = "read_aio" +errno = "5" +sector = "%d" + +[inject-error] +event = "write_aio" +errno = "5" +sector = "%d" +''' % (bad_sector, bad_sector)) + file.close() + + def setUp(self): + drive_args = [] + drive_args.append("stats-intervals.0=%d" % interval_length) + drive_args.append("stats-account-invalid=%s" % + (self.account_invalid and "on" or "off")) + drive_args.append("stats-account-failed=%s" % + (self.account_failed and "on" or "off")) + self.create_blkdebug_file() + self.vm = iotests.VM().add_drive('blkdebug:%s:%s ' % + (blkdebug_file, self.test_img), + ','.join(drive_args)) + self.vm.launch() + # Set an initial value for the clock + self.vm.qtest("clock_step %d" % nsec_per_sec) + + def tearDown(self): + self.vm.shutdown() + os.remove(blkdebug_file) + + def accounted_ops(self, read = False, write = False, flush = False): + ops = 0 + if write: + ops += self.total_wr_ops + if self.account_failed: + ops += self.failed_wr_ops + if self.account_invalid: + ops += self.invalid_wr_ops + if read: + ops += self.total_rd_ops + if self.account_failed: + ops += self.failed_rd_ops + if self.account_invalid: + ops += self.invalid_rd_ops + if flush: + ops += self.total_flush_ops + return ops + + def accounted_latency(self, read = False, write = False, flush = False): + latency = 0 + if write: + latency += self.total_wr_ops * op_latency + if self.account_failed: + latency += self.failed_wr_ops * op_latency + if read: + latency += self.total_rd_ops * op_latency + if self.account_failed: + latency += self.failed_rd_ops * op_latency + if flush: + latency += self.total_flush_ops * op_latency + return latency + + def check_values(self): + stats = self.blockstats('drive0') + + # Check that the totals match with what we have calculated + self.assertEqual(self.total_rd_bytes, stats['rd_bytes']) + self.assertEqual(self.total_wr_bytes, stats['wr_bytes']) + self.assertEqual(self.total_rd_ops, stats['rd_operations']) + self.assertEqual(self.total_wr_ops, stats['wr_operations']) + self.assertEqual(self.total_flush_ops, stats['flush_operations']) + self.assertEqual(self.wr_highest_offset, stats['wr_highest_offset']) + self.assertEqual(self.failed_rd_ops, stats['failed_rd_operations']) + self.assertEqual(self.failed_wr_ops, stats['failed_wr_operations']) + self.assertEqual(self.invalid_rd_ops, stats['invalid_rd_operations']) + self.assertEqual(self.invalid_wr_ops, stats['invalid_wr_operations']) + self.assertEqual(self.account_invalid, stats['account_invalid']) + self.assertEqual(self.account_failed, stats['account_failed']) + self.assertEqual(self.total_wr_merged, stats['wr_merged']) + + # Check that there's exactly one interval with the length we defined + self.assertEqual(1, len(stats['timed_stats'])) + timed_stats = stats['timed_stats'][0] + self.assertEqual(interval_length, timed_stats['interval_length']) + + total_rd_latency = self.accounted_latency(read = True) + if (total_rd_latency != 0): + self.assertEqual(total_rd_latency, stats['rd_total_time_ns']) + self.assertEqual(op_latency, timed_stats['min_rd_latency_ns']) + self.assertEqual(op_latency, timed_stats['max_rd_latency_ns']) + self.assertEqual(op_latency, timed_stats['avg_rd_latency_ns']) + self.assertLess(0, timed_stats['avg_rd_queue_depth']) + else: + self.assertEqual(0, stats['rd_total_time_ns']) + self.assertEqual(0, timed_stats['min_rd_latency_ns']) + self.assertEqual(0, timed_stats['max_rd_latency_ns']) + self.assertEqual(0, timed_stats['avg_rd_latency_ns']) + self.assertEqual(0, timed_stats['avg_rd_queue_depth']) + + # min read latency <= avg read latency <= max read latency + self.assertLessEqual(timed_stats['min_rd_latency_ns'], + timed_stats['avg_rd_latency_ns']) + self.assertLessEqual(timed_stats['avg_rd_latency_ns'], + timed_stats['max_rd_latency_ns']) + + total_wr_latency = self.accounted_latency(write = True) + if (total_wr_latency != 0): + self.assertEqual(total_wr_latency, stats['wr_total_time_ns']) + self.assertEqual(op_latency, timed_stats['min_wr_latency_ns']) + self.assertEqual(op_latency, timed_stats['max_wr_latency_ns']) + self.assertEqual(op_latency, timed_stats['avg_wr_latency_ns']) + self.assertLess(0, timed_stats['avg_wr_queue_depth']) + else: + self.assertEqual(0, stats['wr_total_time_ns']) + self.assertEqual(0, timed_stats['min_wr_latency_ns']) + self.assertEqual(0, timed_stats['max_wr_latency_ns']) + self.assertEqual(0, timed_stats['avg_wr_latency_ns']) + self.assertEqual(0, timed_stats['avg_wr_queue_depth']) + + # min write latency <= avg write latency <= max write latency + self.assertLessEqual(timed_stats['min_wr_latency_ns'], + timed_stats['avg_wr_latency_ns']) + self.assertLessEqual(timed_stats['avg_wr_latency_ns'], + timed_stats['max_wr_latency_ns']) + + total_flush_latency = self.accounted_latency(flush = True) + if (total_flush_latency != 0): + self.assertEqual(total_flush_latency, stats['flush_total_time_ns']) + self.assertEqual(op_latency, timed_stats['min_flush_latency_ns']) + self.assertEqual(op_latency, timed_stats['max_flush_latency_ns']) + self.assertEqual(op_latency, timed_stats['avg_flush_latency_ns']) + else: + self.assertEqual(0, stats['flush_total_time_ns']) + self.assertEqual(0, timed_stats['min_flush_latency_ns']) + self.assertEqual(0, timed_stats['max_flush_latency_ns']) + self.assertEqual(0, timed_stats['avg_flush_latency_ns']) + + # min flush latency <= avg flush latency <= max flush latency + self.assertLessEqual(timed_stats['min_flush_latency_ns'], + timed_stats['avg_flush_latency_ns']) + self.assertLessEqual(timed_stats['avg_flush_latency_ns'], + timed_stats['max_flush_latency_ns']) + + # idle_time_ns must be > 0 if we have performed any operation + if (self.accounted_ops(read = True, write = True, flush = True) != 0): + self.assertLess(0, stats['idle_time_ns']) + else: + self.assertFalse(stats.has_key('idle_time_ns')) + + # This test does not alter these, so they must be all 0 + self.assertEqual(0, stats['rd_merged']) + self.assertEqual(0, stats['failed_flush_operations']) + self.assertEqual(0, stats['invalid_flush_operations']) + + def do_test_stats(self, rd_size = 0, rd_ops = 0, wr_size = 0, wr_ops = 0, + flush_ops = 0, invalid_rd_ops = 0, invalid_wr_ops = 0, + failed_rd_ops = 0, failed_wr_ops = 0, wr_merged = 0): + # The 'ops' list will contain all the requested I/O operations + ops = [] + for i in range(rd_ops): + ops.append("aio_read %d %d" % (i * rd_size, rd_size)) + + for i in range(wr_ops): + ops.append("aio_write %d %d" % (i * wr_size, wr_size)) + + for i in range(flush_ops): + ops.append("aio_flush") + + highest_offset = wr_ops * wr_size + + # Two types of invalid operations: unaligned length and unaligned offset + for i in range(invalid_rd_ops / 2): + ops.append("aio_read 0 511") + + for i in range(invalid_rd_ops / 2, invalid_rd_ops): + ops.append("aio_read 13 512") + + for i in range(invalid_wr_ops / 2): + ops.append("aio_write 0 511") + + for i in range(invalid_wr_ops / 2, invalid_wr_ops): + ops.append("aio_write 13 512") + + for i in range(failed_rd_ops): + ops.append("aio_read %d 512" % bad_offset) + + for i in range(failed_wr_ops): + ops.append("aio_write %d 512" % bad_offset) + + if failed_wr_ops > 0: + highest_offset = max(highest_offset, bad_offset + 512) + + for i in range(wr_merged): + first = i * wr_size * 2 + second = first + wr_size + ops.append("multiwrite %d %d ; %d %d" % + (first, wr_size, second, wr_size)) + + highest_offset = max(highest_offset, wr_merged * wr_size * 2) + + # Now perform all operations + for op in ops: + self.vm.hmp_qemu_io("drive0", op) + + # Update the expected totals + self.total_rd_bytes += rd_ops * rd_size + self.total_rd_ops += rd_ops + self.total_wr_bytes += wr_ops * wr_size + self.total_wr_ops += wr_ops + self.total_wr_merged += wr_merged + self.total_flush_ops += flush_ops + self.invalid_rd_ops += invalid_rd_ops + self.invalid_wr_ops += invalid_wr_ops + self.failed_rd_ops += failed_rd_ops + self.failed_wr_ops += failed_wr_ops + + self.wr_highest_offset = max(self.wr_highest_offset, highest_offset) + + # Advance the clock so idle_time_ns has a meaningful value + self.vm.qtest("clock_step %d" % nsec_per_sec) + + # And check that the actual statistics match the expected ones + self.check_values() + + def test_read_only(self): + test_values = [[512, 1], + [65536, 1], + [512, 12], + [65536, 12]] + for i in test_values: + self.do_test_stats(rd_size = i[0], rd_ops = i[1]) + + def test_write_only(self): + test_values = [[512, 1], + [65536, 1], + [512, 12], + [65536, 12]] + for i in test_values: + self.do_test_stats(wr_size = i[0], wr_ops = i[1]) + + def test_invalid(self): + self.do_test_stats(invalid_rd_ops = 7) + self.do_test_stats(invalid_wr_ops = 3) + self.do_test_stats(invalid_rd_ops = 4, invalid_wr_ops = 5) + + def test_failed(self): + self.do_test_stats(failed_rd_ops = 8) + self.do_test_stats(failed_wr_ops = 6) + self.do_test_stats(failed_rd_ops = 5, failed_wr_ops = 12) + + def test_flush(self): + self.do_test_stats(flush_ops = 8) + + def test_merged(self): + for i in range(5): + self.do_test_stats(wr_merged = i * 3) + + def test_all(self): + # rd_size, rd_ops, wr_size, wr_ops, flush_ops + # invalid_rd_ops, invalid_wr_ops, + # failed_rd_ops, failed_wr_ops + # wr_merged + test_values = [[512, 1, 512, 1, 1, 4, 7, 5, 2, 1], + [65536, 1, 2048, 12, 7, 7, 5, 2, 5, 5], + [32768, 9, 8192, 1, 4, 3, 2, 4, 6, 4], + [16384, 11, 3584, 16, 9, 8, 6, 7, 3, 4]] + for i in test_values: + self.do_test_stats(*i) + + def test_no_op(self): + # All values must be sane before doing any I/O + self.check_values() + + +class BlockDeviceStatsTestAccountInvalid(BlockDeviceStatsTestCase): + account_invalid = True + account_failed = False + +class BlockDeviceStatsTestAccountFailed(BlockDeviceStatsTestCase): + account_invalid = False + account_failed = True + +class BlockDeviceStatsTestAccountBoth(BlockDeviceStatsTestCase): + account_invalid = True + account_failed = True + +class BlockDeviceStatsTestCoroutine(BlockDeviceStatsTestCase): + test_img = "null-co://" + +if __name__ == '__main__': + iotests.main(supported_fmts=["raw"]) diff --git a/tests/qemu-iotests/136.out b/tests/qemu-iotests/136.out new file mode 100644 index 0000000000..0a5e9583a4 --- /dev/null +++ b/tests/qemu-iotests/136.out @@ -0,0 +1,5 @@ +........................................ +---------------------------------------------------------------------- +Ran 40 tests + +OK diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out index cf55a41d8a..88c702cf77 100644 --- a/tests/qemu-iotests/137.out +++ b/tests/qemu-iotests/137.out @@ -31,7 +31,11 @@ Cache clean interval too big Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -./common.config: Killed ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" ) +./common.config: Killed ( if [ "${VALGRIND_QEMU}" == "y" ]; then + exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +else + exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@"; +fi ) incompatible_features 0x0 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 65536/65536 bytes at offset 0 diff --git a/tests/qemu-iotests/139 b/tests/qemu-iotests/139 new file mode 100644 index 0000000000..42f78c7baa --- /dev/null +++ b/tests/qemu-iotests/139 @@ -0,0 +1,416 @@ +#!/usr/bin/env python +# +# Test cases for the QMP 'x-blockdev-del' command +# +# Copyright (C) 2015 Igalia, S.L. +# Author: Alberto Garcia <berto@igalia.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os +import iotests +import time + +base_img = os.path.join(iotests.test_dir, 'base.img') +new_img = os.path.join(iotests.test_dir, 'new.img') + +class TestBlockdevDel(iotests.QMPTestCase): + + def setUp(self): + iotests.qemu_img('create', '-f', iotests.imgfmt, base_img, '1M') + self.vm = iotests.VM() + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(base_img) + if os.path.isfile(new_img): + os.remove(new_img) + + # Check whether a BlockBackend exists + def checkBlockBackend(self, backend, node, must_exist = True): + result = self.vm.qmp('query-block') + backends = filter(lambda x: x['device'] == backend, result['return']) + self.assertLessEqual(len(backends), 1) + self.assertEqual(must_exist, len(backends) == 1) + if must_exist: + if node: + self.assertEqual(backends[0]['inserted']['node-name'], node) + else: + self.assertFalse(backends[0].has_key('inserted')) + + # Check whether a BlockDriverState exists + def checkBlockDriverState(self, node, must_exist = True): + result = self.vm.qmp('query-named-block-nodes') + nodes = filter(lambda x: x['node-name'] == node, result['return']) + self.assertLessEqual(len(nodes), 1) + self.assertEqual(must_exist, len(nodes) == 1) + + # Add a new BlockBackend (with its attached BlockDriverState) + def addBlockBackend(self, backend, node): + file_node = '%s_file' % node + self.checkBlockBackend(backend, node, False) + self.checkBlockDriverState(node, False) + self.checkBlockDriverState(file_node, False) + opts = {'driver': iotests.imgfmt, + 'id': backend, + 'node-name': node, + 'file': {'driver': 'file', + 'node-name': file_node, + 'filename': base_img}} + result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts) + self.assert_qmp(result, 'return', {}) + self.checkBlockBackend(backend, node) + self.checkBlockDriverState(node) + self.checkBlockDriverState(file_node) + + # Add a BlockDriverState without a BlockBackend + def addBlockDriverState(self, node): + file_node = '%s_file' % node + self.checkBlockDriverState(node, False) + self.checkBlockDriverState(file_node, False) + opts = {'driver': iotests.imgfmt, + 'node-name': node, + 'file': {'driver': 'file', + 'node-name': file_node, + 'filename': base_img}} + result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts) + self.assert_qmp(result, 'return', {}) + self.checkBlockDriverState(node) + self.checkBlockDriverState(file_node) + + # Add a BlockDriverState that will be used as overlay for the base_img BDS + def addBlockDriverStateOverlay(self, node): + self.checkBlockDriverState(node, False) + iotests.qemu_img('create', '-f', iotests.imgfmt, + '-b', base_img, new_img, '1M') + opts = {'driver': iotests.imgfmt, + 'node-name': node, + 'backing': '', + 'file': {'driver': 'file', + 'filename': new_img}} + result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts) + self.assert_qmp(result, 'return', {}) + self.checkBlockDriverState(node) + + # Delete a BlockBackend + def delBlockBackend(self, backend, node, expect_error = False, + destroys_media = True): + self.checkBlockBackend(backend, node) + if node: + self.checkBlockDriverState(node) + result = self.vm.qmp('x-blockdev-del', id = backend) + if expect_error: + self.assert_qmp(result, 'error/class', 'GenericError') + if node: + self.checkBlockDriverState(node) + else: + self.assert_qmp(result, 'return', {}) + if node: + self.checkBlockDriverState(node, not destroys_media) + self.checkBlockBackend(backend, node, must_exist = expect_error) + + # Delete a BlockDriverState + def delBlockDriverState(self, node, expect_error = False): + self.checkBlockDriverState(node) + result = self.vm.qmp('x-blockdev-del', node_name = node) + if expect_error: + self.assert_qmp(result, 'error/class', 'GenericError') + else: + self.assert_qmp(result, 'return', {}) + self.checkBlockDriverState(node, expect_error) + + # Add a device model + def addDeviceModel(self, device, backend): + result = self.vm.qmp('device_add', id = device, + driver = 'virtio-blk-pci', drive = backend) + self.assert_qmp(result, 'return', {}) + + # Delete a device model + def delDeviceModel(self, device): + result = self.vm.qmp('device_del', id = device) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('system_reset') + self.assert_qmp(result, 'return', {}) + + device_path = '/machine/peripheral/%s/virtio-backend' % device + event = self.vm.event_wait(name="DEVICE_DELETED", + match={'data': {'path': device_path}}) + self.assertNotEqual(event, None) + + event = self.vm.event_wait(name="DEVICE_DELETED", + match={'data': {'device': device}}) + self.assertNotEqual(event, None) + + # Remove a BlockDriverState + def ejectDrive(self, backend, node, expect_error = False, + destroys_media = True): + self.checkBlockBackend(backend, node) + self.checkBlockDriverState(node) + result = self.vm.qmp('eject', device = backend) + if expect_error: + self.assert_qmp(result, 'error/class', 'GenericError') + self.checkBlockDriverState(node) + self.checkBlockBackend(backend, node) + else: + self.assert_qmp(result, 'return', {}) + self.checkBlockDriverState(node, not destroys_media) + self.checkBlockBackend(backend, None) + + # Insert a BlockDriverState + def insertDrive(self, backend, node): + self.checkBlockBackend(backend, None) + self.checkBlockDriverState(node) + result = self.vm.qmp('blockdev-insert-medium', + device = backend, node_name = node) + self.assert_qmp(result, 'return', {}) + self.checkBlockBackend(backend, node) + self.checkBlockDriverState(node) + + # Create a snapshot using 'blockdev-snapshot-sync' + def createSnapshotSync(self, node, overlay): + self.checkBlockDriverState(node) + self.checkBlockDriverState(overlay, False) + opts = {'node-name': node, + 'snapshot-file': new_img, + 'snapshot-node-name': overlay, + 'format': iotests.imgfmt} + result = self.vm.qmp('blockdev-snapshot-sync', conv_keys=False, **opts) + self.assert_qmp(result, 'return', {}) + self.checkBlockDriverState(node) + self.checkBlockDriverState(overlay) + + # Create a snapshot using 'blockdev-snapshot' + def createSnapshot(self, node, overlay): + self.checkBlockDriverState(node) + self.checkBlockDriverState(overlay) + result = self.vm.qmp('blockdev-snapshot', + node = node, overlay = overlay) + self.assert_qmp(result, 'return', {}) + self.checkBlockDriverState(node) + self.checkBlockDriverState(overlay) + + # Create a mirror + def createMirror(self, backend, node, new_node): + self.checkBlockBackend(backend, node) + self.checkBlockDriverState(new_node, False) + opts = {'device': backend, + 'target': new_img, + 'node-name': new_node, + 'sync': 'top', + 'format': iotests.imgfmt} + result = self.vm.qmp('drive-mirror', conv_keys=False, **opts) + self.assert_qmp(result, 'return', {}) + self.checkBlockBackend(backend, node) + self.checkBlockDriverState(new_node) + + # Complete an existing block job + def completeBlockJob(self, backend, node_before, node_after): + self.checkBlockBackend(backend, node_before) + result = self.vm.qmp('block-job-complete', device=backend) + self.assert_qmp(result, 'return', {}) + self.wait_until_completed(backend) + self.checkBlockBackend(backend, node_after) + + # Add a BlkDebug node + # Note that the purpose of this is to test the x-blockdev-del + # sanity checks, not to create a usable blkdebug drive + def addBlkDebug(self, debug, node): + self.checkBlockDriverState(node, False) + self.checkBlockDriverState(debug, False) + image = {'driver': iotests.imgfmt, + 'node-name': node, + 'file': {'driver': 'file', + 'filename': base_img}} + opts = {'driver': 'blkdebug', + 'node-name': debug, + 'image': image} + result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts) + self.assert_qmp(result, 'return', {}) + self.checkBlockDriverState(node) + self.checkBlockDriverState(debug) + + # Add a BlkVerify node + # Note that the purpose of this is to test the x-blockdev-del + # sanity checks, not to create a usable blkverify drive + def addBlkVerify(self, blkverify, test, raw): + self.checkBlockDriverState(test, False) + self.checkBlockDriverState(raw, False) + self.checkBlockDriverState(blkverify, False) + iotests.qemu_img('create', '-f', iotests.imgfmt, new_img, '1M') + node_0 = {'driver': iotests.imgfmt, + 'node-name': test, + 'file': {'driver': 'file', + 'filename': base_img}} + node_1 = {'driver': iotests.imgfmt, + 'node-name': raw, + 'file': {'driver': 'file', + 'filename': new_img}} + opts = {'driver': 'blkverify', + 'node-name': blkverify, + 'test': node_0, + 'raw': node_1} + result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts) + self.assert_qmp(result, 'return', {}) + self.checkBlockDriverState(test) + self.checkBlockDriverState(raw) + self.checkBlockDriverState(blkverify) + + # Add a Quorum node + def addQuorum(self, quorum, child0, child1): + self.checkBlockDriverState(child0, False) + self.checkBlockDriverState(child1, False) + self.checkBlockDriverState(quorum, False) + iotests.qemu_img('create', '-f', iotests.imgfmt, new_img, '1M') + child_0 = {'driver': iotests.imgfmt, + 'node-name': child0, + 'file': {'driver': 'file', + 'filename': base_img}} + child_1 = {'driver': iotests.imgfmt, + 'node-name': child1, + 'file': {'driver': 'file', + 'filename': new_img}} + opts = {'driver': 'quorum', + 'node-name': quorum, + 'vote-threshold': 1, + 'children': [ child_0, child_1 ]} + result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts) + self.assert_qmp(result, 'return', {}) + self.checkBlockDriverState(child0) + self.checkBlockDriverState(child1) + self.checkBlockDriverState(quorum) + + ######################## + # The tests start here # + ######################## + + def testWrongParameters(self): + self.addBlockBackend('drive0', 'node0') + result = self.vm.qmp('x-blockdev-del') + self.assert_qmp(result, 'error/class', 'GenericError') + result = self.vm.qmp('x-blockdev-del', id='drive0', node_name='node0') + self.assert_qmp(result, 'error/class', 'GenericError') + self.delBlockBackend('drive0', 'node0') + + def testBlockBackend(self): + self.addBlockBackend('drive0', 'node0') + # You cannot delete a BDS that is attached to a backend + self.delBlockDriverState('node0', expect_error = True) + self.delBlockBackend('drive0', 'node0') + + def testBlockDriverState(self): + self.addBlockDriverState('node0') + # You cannot delete a file BDS directly + self.delBlockDriverState('node0_file', expect_error = True) + self.delBlockDriverState('node0') + + def testEject(self): + self.addBlockBackend('drive0', 'node0') + self.ejectDrive('drive0', 'node0') + self.delBlockBackend('drive0', None) + + def testDeviceModel(self): + self.addBlockBackend('drive0', 'node0') + self.addDeviceModel('device0', 'drive0') + self.ejectDrive('drive0', 'node0', expect_error = True) + self.delBlockBackend('drive0', 'node0', expect_error = True) + self.delDeviceModel('device0') + self.delBlockBackend('drive0', 'node0') + + def testAttachMedia(self): + # This creates a BlockBackend and removes its media + self.addBlockBackend('drive0', 'node0') + self.ejectDrive('drive0', 'node0') + # This creates a new BlockDriverState and inserts it into the backend + self.addBlockDriverState('node1') + self.insertDrive('drive0', 'node1') + # The backend can't be removed: the new BDS has an extra reference + self.delBlockBackend('drive0', 'node1', expect_error = True) + self.delBlockDriverState('node1', expect_error = True) + # The BDS still exists after being ejected, but now it can be removed + self.ejectDrive('drive0', 'node1', destroys_media = False) + self.delBlockDriverState('node1') + self.delBlockBackend('drive0', None) + + def testSnapshotSync(self): + self.addBlockBackend('drive0', 'node0') + self.createSnapshotSync('node0', 'overlay0') + # This fails because node0 is now being used as a backing image + self.delBlockDriverState('node0', expect_error = True) + # This succeeds because overlay0 only has the backend reference + self.delBlockBackend('drive0', 'overlay0') + self.checkBlockDriverState('node0', False) + + def testSnapshot(self): + self.addBlockBackend('drive0', 'node0') + self.addBlockDriverStateOverlay('overlay0') + self.createSnapshot('node0', 'overlay0') + self.delBlockBackend('drive0', 'overlay0', expect_error = True) + self.delBlockDriverState('node0', expect_error = True) + self.delBlockDriverState('overlay0', expect_error = True) + self.ejectDrive('drive0', 'overlay0', destroys_media = False) + self.delBlockBackend('drive0', None) + self.delBlockDriverState('node0', expect_error = True) + self.delBlockDriverState('overlay0') + self.checkBlockDriverState('node0', False) + + def testMirror(self): + self.addBlockBackend('drive0', 'node0') + self.createMirror('drive0', 'node0', 'mirror0') + # The block job prevents removing the device + self.delBlockBackend('drive0', 'node0', expect_error = True) + self.delBlockDriverState('node0', expect_error = True) + self.delBlockDriverState('mirror0', expect_error = True) + self.wait_ready('drive0') + self.completeBlockJob('drive0', 'node0', 'mirror0') + self.assert_no_active_block_jobs() + self.checkBlockDriverState('node0', False) + # This succeeds because the backend now points to mirror0 + self.delBlockBackend('drive0', 'mirror0') + + def testBlkDebug(self): + self.addBlkDebug('debug0', 'node0') + # 'node0' is used by the blkdebug node + self.delBlockDriverState('node0', expect_error = True) + # But we can remove the blkdebug node directly + self.delBlockDriverState('debug0') + self.checkBlockDriverState('node0', False) + + def testBlkVerify(self): + self.addBlkVerify('verify0', 'node0', 'node1') + # We cannot remove the children of a blkverify device + self.delBlockDriverState('node0', expect_error = True) + self.delBlockDriverState('node1', expect_error = True) + # But we can remove the blkverify node directly + self.delBlockDriverState('verify0') + self.checkBlockDriverState('node0', False) + self.checkBlockDriverState('node1', False) + + def testQuorum(self): + if not 'quorum' in iotests.qemu_img_pipe('--help'): + return + self.addQuorum('quorum0', 'node0', 'node1') + # We cannot remove the children of a Quorum device + self.delBlockDriverState('node0', expect_error = True) + self.delBlockDriverState('node1', expect_error = True) + # But we can remove the Quorum node directly + self.delBlockDriverState('quorum0') + self.checkBlockDriverState('node0', False) + self.checkBlockDriverState('node1', False) + + +if __name__ == '__main__': + iotests.main(supported_fmts=["qcow2"]) diff --git a/tests/qemu-iotests/139.out b/tests/qemu-iotests/139.out new file mode 100644 index 0000000000..281b69efea --- /dev/null +++ b/tests/qemu-iotests/139.out @@ -0,0 +1,5 @@ +............ +---------------------------------------------------------------------- +Ran 12 tests + +OK diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index 25c351bbd1..ff84f4b0d6 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -41,7 +41,6 @@ sortme=false expunge=true have_test_arg=false randomize=false -valgrind=false cachemode=false rm -f $tmp.list $tmp.tmp $tmp.sed @@ -53,6 +52,7 @@ export CACHEMODE="writeback" export QEMU_IO_OPTIONS="" export CACHEMODE_IS_DEFAULT=true export QEMU_OPTIONS="-nodefaults" +export VALGRIND_QEMU= for r do @@ -278,7 +278,7 @@ testlist options ;; -valgrind) - valgrind=true + VALGRIND_QEMU='y' xpand=false ;; @@ -436,8 +436,3 @@ fi if [ "$IMGPROTO" = "nbd" ] ; then [ "$QEMU_NBD" = "" ] && _fatal "qemu-nbd not found" fi - -if $valgrind; then - export REAL_QEMU_IO="$QEMU_IO_PROG" - export QEMU_IO_PROG=valgrind_qemu_io -fi diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index 596bb2b1e5..3ed51b8baa 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -44,6 +44,8 @@ export HOST_OPTIONS=${HOST_OPTIONS:=local.config} export CHECK_OPTIONS=${CHECK_OPTIONS:="-g auto"} export PWD=`pwd` +export _QEMU_HANDLE=0 + # $1 = prog to look for, $2* = default pathnames if not found in $PATH set_prog_path() { @@ -105,7 +107,12 @@ fi _qemu_wrapper() { - (exec "$QEMU_PROG" $QEMU_OPTIONS "$@") + ( + if [ -n "${QEMU_NEED_PID}" ]; then + echo $BASHPID > "${TEST_DIR}/qemu-${_QEMU_HANDLE}.pid" + fi + exec "$QEMU_PROG" $QEMU_OPTIONS "$@" + ) } _qemu_img_wrapper() @@ -115,12 +122,31 @@ _qemu_img_wrapper() _qemu_io_wrapper() { - (exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@") + local VALGRIND_LOGFILE=/tmp/$$.valgrind + local RETVAL + ( + if [ "${VALGRIND_QEMU}" == "y" ]; then + exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" + else + exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" + fi + ) + RETVAL=$? + if [ "${VALGRIND_QEMU}" == "y" ]; then + if [ $RETVAL == 99 ]; then + cat "${VALGRIND_LOGFILE}" + fi + rm -f "${VALGRIND_LOGFILE}" + fi + (exit $RETVAL) } _qemu_nbd_wrapper() { - (exec "$QEMU_NBD_PROG" $QEMU_NBD_OPTIONS "$@") + ( + echo $BASHPID > "${TEST_DIR}/qemu-nbd.pid" + exec "$QEMU_NBD_PROG" $QEMU_NBD_OPTIONS "$@" + ) } export QEMU=_qemu_wrapper diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu index e3faa53d22..8bf3969418 100644 --- a/tests/qemu-iotests/common.qemu +++ b/tests/qemu-iotests/common.qemu @@ -30,8 +30,6 @@ QEMU_COMM_TIMEOUT=10 QEMU_FIFO_IN="${TEST_DIR}/qmp-in-$$" QEMU_FIFO_OUT="${TEST_DIR}/qmp-out-$$" -QEMU_PID= -_QEMU_HANDLE=0 QEMU_HANDLE=0 # If bash version is >= 4.1, these will be overwritten and dynamic @@ -153,11 +151,11 @@ function _launch_qemu() mkfifo "${fifo_out}" mkfifo "${fifo_in}" + QEMU_NEED_PID='y'\ ${QEMU} -nographic -serial none ${comm} -machine accel=qtest "${@}" \ >"${fifo_out}" \ 2>&1 \ <"${fifo_in}" & - QEMU_PID[${_QEMU_HANDLE}]=$! if [[ "${BASH_VERSINFO[0]}" -ge "5" || ("${BASH_VERSINFO[0]}" -ge "4" && "${BASH_VERSINFO[1]}" -ge "1") ]] @@ -196,10 +194,18 @@ function _cleanup_qemu() # QEMU_PID[], QEMU_IN[], QEMU_OUT[] all use same indices for i in "${!QEMU_OUT[@]}" do - if [ -z "${wait}" ]; then - kill -KILL ${QEMU_PID[$i]} 2>/dev/null + local QEMU_PID + if [ -f "${TEST_DIR}/qemu-${i}.pid" ]; then + read QEMU_PID < "${TEST_DIR}/qemu-${i}.pid" + rm -f "${TEST_DIR}/qemu-${i}.pid" + if [ -z "${wait}" ] && [ -n "${QEMU_PID}" ]; then + kill -KILL ${QEMU_PID} 2>/dev/null + fi + if [ -n "${QEMU_PID}" ]; then + wait ${QEMU_PID} 2>/dev/null # silent kill + fi fi - wait ${QEMU_PID[$i]} 2>/dev/null # silent kill + if [ -n "${wait}" ]; then cat <&${QEMU_OUT[$i]} | _filter_testdir | _filter_qemu \ | _filter_qemu_io | _filter_qmp diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 28e4beac15..d9913f8496 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -70,16 +70,6 @@ else TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT fi -function valgrind_qemu_io() -{ - valgrind --log-file=/tmp/$$.valgrind --error-exitcode=99 $REAL_QEMU_IO "$@" - if [ $? != 0 ]; then - cat /tmp/$$.valgrind - fi - rm -f /tmp/$$.valgrind -} - - _optstr_add() { if [ -n "$1" ]; then @@ -154,7 +144,6 @@ _make_test_img() # Start an NBD server on the image file, which is what we'll be talking to if [ $IMGPROTO = "nbd" ]; then eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 -f $IMGFMT $TEST_IMG_FILE &" - QEMU_NBD_PID=$! sleep 1 # FIXME: qemu-nbd needs to be listening before we continue fi } @@ -175,8 +164,11 @@ _cleanup_test_img() case "$IMGPROTO" in nbd) - if [ -n "$QEMU_NBD_PID" ]; then - kill $QEMU_NBD_PID + if [ -f "${TEST_DIR}/qemu-nbd.pid" ]; then + local QEMU_NBD_PID + read QEMU_NBD_PID < "${TEST_DIR}/qemu-nbd.pid" + kill ${QEMU_NBD_PID} + rm -f "${TEST_DIR}/qemu-nbd.pid" fi rm -f "$TEST_IMG_FILE" ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 30c784e940..5a0880893a 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -122,6 +122,7 @@ 114 rw auto quick 115 rw auto 116 rw auto quick +118 rw auto 119 rw auto quick 120 rw auto quick 121 rw auto @@ -135,5 +136,7 @@ 132 rw auto quick 134 rw auto quick 135 rw auto +136 rw auto 137 rw auto 138 rw auto quick +139 rw auto quick diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index ff5905f379..e02245ed07 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -140,6 +140,11 @@ class VM(object): self._args.append('-monitor') self._args.append(args) + def add_drive_raw(self, opts): + self._args.append('-drive') + self._args.append(opts) + return self + def add_drive(self, path, opts='', interface='virtio'): '''Add a virtio-blk drive to the VM''' options = ['if=%s' % interface, diff --git a/tests/qom-test.c b/tests/qom-test.c index fde04e7a19..3e5e8730e7 100644 --- a/tests/qom-test.c +++ b/tests/qom-test.c @@ -47,7 +47,7 @@ static bool is_blacklisted(const char *arch, const char *mach) static void test_properties(const char *path, bool recurse) { char *child_path; - QDict *response, *tuple; + QDict *response, *tuple, *tmp; QList *list; QListEntry *entry; @@ -57,6 +57,7 @@ static void test_properties(const char *path, bool recurse) g_assert(response); if (!recurse) { + QDECREF(response); return; } @@ -75,14 +76,16 @@ static void test_properties(const char *path, bool recurse) } else { const char *prop = qdict_get_str(tuple, "name"); g_test_message("Testing property %s.%s", path, prop); - response = qmp("{ 'execute': 'qom-get'," - " 'arguments': { 'path': %s," - " 'property': %s } }", - path, prop); + tmp = qmp("{ 'execute': 'qom-get'," + " 'arguments': { 'path': %s," + " 'property': %s } }", + path, prop); /* qom-get may fail but should not, e.g., segfault. */ - g_assert(response); + g_assert(tmp); + QDECREF(tmp); } } + QDECREF(response); } static void test_machine(gconstpointer data) @@ -98,9 +101,11 @@ static void test_machine(gconstpointer data) response = qmp("{ 'execute': 'quit' }"); g_assert(qdict_haskey(response, "return")); + QDECREF(response); qtest_end(); g_free(args); + g_free((void *)machine); } static void add_machine_test_cases(void) @@ -129,10 +134,12 @@ static void add_machine_test_cases(void) mname = qstring_get_str(qstr); if (!is_blacklisted(arch, mname)) { path = g_strdup_printf("qom/%s", mname); - qtest_add_data_func(path, mname, test_machine); + qtest_add_data_func(path, g_strdup(mname), test_machine); } } + qtest_end(); + QDECREF(response); } int main(int argc, char **argv) diff --git a/tests/test-aio.c b/tests/test-aio.c index 1623803e8c..e188d8c13d 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -393,6 +393,7 @@ static void test_aio_external_client(void) aio_enable_external(ctx); } assert(aio_poll(ctx, false)); + set_event_notifier(ctx, &data.e, NULL); event_notifier_cleanup(&data.e); } } diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c new file mode 100644 index 0000000000..34747e924d --- /dev/null +++ b/tests/test-blockjob-txn.c @@ -0,0 +1,250 @@ +/* + * Blockjob transactions tests + * + * Copyright Red Hat, Inc. 2015 + * + * Authors: + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include <glib.h> +#include "qapi/error.h" +#include "qemu/main-loop.h" +#include "block/blockjob.h" + +typedef struct { + BlockJob common; + unsigned int iterations; + bool use_timer; + int rc; + int *result; +} TestBlockJob; + +static const BlockJobDriver test_block_job_driver = { + .instance_size = sizeof(TestBlockJob), +}; + +static void test_block_job_complete(BlockJob *job, void *opaque) +{ + BlockDriverState *bs = job->bs; + int rc = (intptr_t)opaque; + + if (block_job_is_cancelled(job)) { + rc = -ECANCELED; + } + + block_job_completed(job, rc); + bdrv_unref(bs); +} + +static void coroutine_fn test_block_job_run(void *opaque) +{ + TestBlockJob *s = opaque; + BlockJob *job = &s->common; + + while (s->iterations--) { + if (s->use_timer) { + block_job_sleep_ns(job, QEMU_CLOCK_REALTIME, 0); + } else { + block_job_yield(job); + } + + if (block_job_is_cancelled(job)) { + break; + } + } + + block_job_defer_to_main_loop(job, test_block_job_complete, + (void *)(intptr_t)s->rc); +} + +typedef struct { + TestBlockJob *job; + int *result; +} TestBlockJobCBData; + +static void test_block_job_cb(void *opaque, int ret) +{ + TestBlockJobCBData *data = opaque; + if (!ret && block_job_is_cancelled(&data->job->common)) { + ret = -ECANCELED; + } + *data->result = ret; + g_free(data); +} + +/* Create a block job that completes with a given return code after a given + * number of event loop iterations. The return code is stored in the given + * result pointer. + * + * The event loop iterations can either be handled automatically with a 0 delay + * timer, or they can be stepped manually by entering the coroutine. + */ +static BlockJob *test_block_job_start(unsigned int iterations, + bool use_timer, + int rc, int *result) +{ + BlockDriverState *bs; + TestBlockJob *s; + TestBlockJobCBData *data; + + data = g_new0(TestBlockJobCBData, 1); + bs = bdrv_new(); + s = block_job_create(&test_block_job_driver, bs, 0, test_block_job_cb, + data, &error_abort); + s->iterations = iterations; + s->use_timer = use_timer; + s->rc = rc; + s->result = result; + s->common.co = qemu_coroutine_create(test_block_job_run); + data->job = s; + data->result = result; + qemu_coroutine_enter(s->common.co, s); + return &s->common; +} + +static void test_single_job(int expected) +{ + BlockJob *job; + BlockJobTxn *txn; + int result = -EINPROGRESS; + + txn = block_job_txn_new(); + job = test_block_job_start(1, true, expected, &result); + block_job_txn_add_job(txn, job); + + if (expected == -ECANCELED) { + block_job_cancel(job); + } + + while (result == -EINPROGRESS) { + aio_poll(qemu_get_aio_context(), true); + } + g_assert_cmpint(result, ==, expected); + + block_job_txn_unref(txn); +} + +static void test_single_job_success(void) +{ + test_single_job(0); +} + +static void test_single_job_failure(void) +{ + test_single_job(-EIO); +} + +static void test_single_job_cancel(void) +{ + test_single_job(-ECANCELED); +} + +static void test_pair_jobs(int expected1, int expected2) +{ + BlockJob *job1; + BlockJob *job2; + BlockJobTxn *txn; + int result1 = -EINPROGRESS; + int result2 = -EINPROGRESS; + + txn = block_job_txn_new(); + job1 = test_block_job_start(1, true, expected1, &result1); + block_job_txn_add_job(txn, job1); + job2 = test_block_job_start(2, true, expected2, &result2); + block_job_txn_add_job(txn, job2); + + if (expected1 == -ECANCELED) { + block_job_cancel(job1); + } + if (expected2 == -ECANCELED) { + block_job_cancel(job2); + } + + while (result1 == -EINPROGRESS || result2 == -EINPROGRESS) { + aio_poll(qemu_get_aio_context(), true); + } + + /* Failure or cancellation of one job cancels the other job */ + if (expected1 != 0) { + expected2 = -ECANCELED; + } else if (expected2 != 0) { + expected1 = -ECANCELED; + } + + g_assert_cmpint(result1, ==, expected1); + g_assert_cmpint(result2, ==, expected2); + + block_job_txn_unref(txn); +} + +static void test_pair_jobs_success(void) +{ + test_pair_jobs(0, 0); +} + +static void test_pair_jobs_failure(void) +{ + /* Test both orderings. The two jobs run for a different number of + * iterations so the code path is different depending on which job fails + * first. + */ + test_pair_jobs(-EIO, 0); + test_pair_jobs(0, -EIO); +} + +static void test_pair_jobs_cancel(void) +{ + test_pair_jobs(-ECANCELED, 0); + test_pair_jobs(0, -ECANCELED); +} + +static void test_pair_jobs_fail_cancel_race(void) +{ + BlockJob *job1; + BlockJob *job2; + BlockJobTxn *txn; + int result1 = -EINPROGRESS; + int result2 = -EINPROGRESS; + + txn = block_job_txn_new(); + job1 = test_block_job_start(1, true, -ECANCELED, &result1); + block_job_txn_add_job(txn, job1); + job2 = test_block_job_start(2, false, 0, &result2); + block_job_txn_add_job(txn, job2); + + block_job_cancel(job1); + + /* Now make job2 finish before the main loop kicks jobs. This simulates + * the race between a pending kick and another job completing. + */ + block_job_enter(job2); + block_job_enter(job2); + + while (result1 == -EINPROGRESS || result2 == -EINPROGRESS) { + aio_poll(qemu_get_aio_context(), true); + } + + g_assert_cmpint(result1, ==, -ECANCELED); + g_assert_cmpint(result2, ==, -ECANCELED); + + block_job_txn_unref(txn); +} + +int main(int argc, char **argv) +{ + qemu_init_main_loop(&error_abort); + + g_test_init(&argc, &argv, NULL); + g_test_add_func("/single/success", test_single_job_success); + g_test_add_func("/single/failure", test_single_job_failure); + g_test_add_func("/single/cancel", test_single_job_cancel); + g_test_add_func("/pair/success", test_pair_jobs_success); + g_test_add_func("/pair/failure", test_pair_jobs_failure); + g_test_add_func("/pair/cancel", test_pair_jobs_cancel); + g_test_add_func("/pair/fail-cancel-race", test_pair_jobs_fail_cancel_race); + return g_test_run(); +} diff --git a/tests/test-qga.c b/tests/test-qga.c index 64738465c5..e6a84d17f0 100644 --- a/tests/test-qga.c +++ b/tests/test-qga.c @@ -13,6 +13,7 @@ #include "libqtest.h" #include "config-host.h" +#include "qga/guest-agent-core.h" typedef struct { char *test_dir; @@ -352,10 +353,10 @@ static void test_qga_network_get_interfaces(gconstpointer fix) static void test_qga_file_ops(gconstpointer fix) { const TestFixture *fixture = fix; - const guchar helloworld[] = "Hello World!\n"; + const unsigned char helloworld[] = "Hello World!\n"; const char *b64; gchar *cmd, *path, *enc; - guchar *dec; + unsigned char *dec; QDict *ret, *val; int64_t id, eof; gsize count; @@ -457,7 +458,7 @@ static void test_qga_file_ops(gconstpointer fix) cmd = g_strdup_printf("{'execute': 'guest-file-seek'," " 'arguments': { 'handle': %" PRId64 ", " " 'offset': %d, 'whence': %d } }", - id, 6, SEEK_SET); + id, 6, QGA_SEEK_SET); ret = qmp_fd(fixture->fd, cmd); qmp_assert_no_error(ret); val = qdict_get_qdict(ret, "return"); @@ -496,6 +497,96 @@ static void test_qga_file_ops(gconstpointer fix) g_free(cmd); } +static void test_qga_file_write_read(gconstpointer fix) +{ + const TestFixture *fixture = fix; + const unsigned char helloworld[] = "Hello World!\n"; + const char *b64; + gchar *cmd, *enc; + QDict *ret, *val; + int64_t id, eof; + gsize count; + + /* open */ + ret = qmp_fd(fixture->fd, "{'execute': 'guest-file-open'," + " 'arguments': { 'path': 'foo', 'mode': 'w+' } }"); + g_assert_nonnull(ret); + qmp_assert_no_error(ret); + id = qdict_get_int(ret, "return"); + QDECREF(ret); + + enc = g_base64_encode(helloworld, sizeof(helloworld)); + /* write */ + cmd = g_strdup_printf("{'execute': 'guest-file-write'," + " 'arguments': { 'handle': %" PRId64 "," + " 'buf-b64': '%s' } }", id, enc); + ret = qmp_fd(fixture->fd, cmd); + g_assert_nonnull(ret); + qmp_assert_no_error(ret); + + val = qdict_get_qdict(ret, "return"); + count = qdict_get_int(val, "count"); + eof = qdict_get_bool(val, "eof"); + g_assert_cmpint(count, ==, sizeof(helloworld)); + g_assert_cmpint(eof, ==, 0); + QDECREF(ret); + g_free(cmd); + + /* read (check implicit flush) */ + cmd = g_strdup_printf("{'execute': 'guest-file-read'," + " 'arguments': { 'handle': %" PRId64 "} }", + id); + ret = qmp_fd(fixture->fd, cmd); + val = qdict_get_qdict(ret, "return"); + count = qdict_get_int(val, "count"); + eof = qdict_get_bool(val, "eof"); + b64 = qdict_get_str(val, "buf-b64"); + g_assert_cmpint(count, ==, 0); + g_assert(eof); + g_assert_cmpstr(b64, ==, ""); + QDECREF(ret); + g_free(cmd); + + /* seek to 0 */ + cmd = g_strdup_printf("{'execute': 'guest-file-seek'," + " 'arguments': { 'handle': %" PRId64 ", " + " 'offset': %d, 'whence': %d } }", + id, 0, QGA_SEEK_SET); + ret = qmp_fd(fixture->fd, cmd); + qmp_assert_no_error(ret); + val = qdict_get_qdict(ret, "return"); + count = qdict_get_int(val, "position"); + eof = qdict_get_bool(val, "eof"); + g_assert_cmpint(count, ==, 0); + g_assert(!eof); + QDECREF(ret); + g_free(cmd); + + /* read */ + cmd = g_strdup_printf("{'execute': 'guest-file-read'," + " 'arguments': { 'handle': %" PRId64 "} }", + id); + ret = qmp_fd(fixture->fd, cmd); + val = qdict_get_qdict(ret, "return"); + count = qdict_get_int(val, "count"); + eof = qdict_get_bool(val, "eof"); + b64 = qdict_get_str(val, "buf-b64"); + g_assert_cmpint(count, ==, sizeof(helloworld)); + g_assert(eof); + g_assert_cmpstr(b64, ==, enc); + QDECREF(ret); + g_free(cmd); + g_free(enc); + + /* close */ + cmd = g_strdup_printf("{'execute': 'guest-file-close'," + " 'arguments': {'handle': %" PRId64 "} }", + id); + ret = qmp_fd(fixture->fd, cmd); + QDECREF(ret); + g_free(cmd); +} + static void test_qga_get_time(gconstpointer fix) { const TestFixture *fixture = fix; @@ -762,6 +853,7 @@ int main(int argc, char **argv) g_test_add_data_func("/qga/get-memory-blocks", &fix, test_qga_get_memory_blocks); g_test_add_data_func("/qga/file-ops", &fix, test_qga_file_ops); + g_test_add_data_func("/qga/file-write-read", &fix, test_qga_file_write_read); g_test_add_data_func("/qga/get-time", &fix, test_qga_get_time); g_test_add_data_func("/qga/invalid-cmd", &fix, test_qga_invalid_cmd); g_test_add_data_func("/qga/fsfreeze-status", &fix, diff --git a/tests/test-qmp-commands.c b/tests/test-qmp-commands.c index bc59835835..888fb5ffec 100644 --- a/tests/test-qmp-commands.c +++ b/tests/test-qmp-commands.c @@ -25,11 +25,9 @@ UserDefTwo *qmp_user_def_cmd2(UserDefOne *ud1a, UserDefOne *ud1d = g_malloc0(sizeof(UserDefOne)); ud1c->string = strdup(ud1a->string); - ud1c->base = g_new0(UserDefZero, 1); - ud1c->base->integer = ud1a->base->integer; + ud1c->integer = ud1a->integer; ud1d->string = strdup(has_udb1 ? ud1b->string : "blah0"); - ud1d->base = g_new0(UserDefZero, 1); - ud1d->base->integer = has_udb1 ? ud1b->base->integer : 0; + ud1d->integer = has_udb1 ? ud1b->integer : 0; ret = g_new0(UserDefTwo, 1); ret->string0 = strdup("blah1"); @@ -64,8 +62,8 @@ __org_qemu_x_Union1 *qmp___org_qemu_x_command(__org_qemu_x_EnumList *a, { __org_qemu_x_Union1 *ret = g_new0(__org_qemu_x_Union1, 1); - ret->kind = ORG_QEMU_X_UNION1_KIND___ORG_QEMU_X_BRANCH; - ret->__org_qemu_x_branch = strdup("blah1"); + ret->type = ORG_QEMU_X_UNION1_KIND___ORG_QEMU_X_BRANCH; + ret->u.__org_qemu_x_branch = strdup("blah1"); return ret; } @@ -176,20 +174,17 @@ static void test_dealloc_types(void) UserDefOneList *ud1list; ud1test = g_malloc0(sizeof(UserDefOne)); - ud1test->base = g_new0(UserDefZero, 1); - ud1test->base->integer = 42; + ud1test->integer = 42; ud1test->string = g_strdup("hi there 42"); qapi_free_UserDefOne(ud1test); ud1a = g_malloc0(sizeof(UserDefOne)); - ud1a->base = g_new0(UserDefZero, 1); - ud1a->base->integer = 43; + ud1a->integer = 43; ud1a->string = g_strdup("hi there 43"); ud1b = g_malloc0(sizeof(UserDefOne)); - ud1b->base = g_new0(UserDefZero, 1); - ud1b->base->integer = 44; + ud1b->integer = 44; ud1b->string = g_strdup("hi there 44"); ud1list = g_malloc0(sizeof(UserDefOneList)); @@ -230,8 +225,7 @@ static void test_dealloc_partial(void) assert(ud2->dict1 == NULL); /* confirm & release construction error */ - assert(err != NULL); - error_free(err); + error_free_or_abort(&err); /* tear down partial object */ qapi_free_UserDefTwo(ud2); diff --git a/tests/test-qmp-event.c b/tests/test-qmp-event.c index 28f146d4b7..035c65cfdf 100644 --- a/tests/test-qmp-event.c +++ b/tests/test-qmp-event.c @@ -179,9 +179,7 @@ static void test_event_c(TestEventData *data, QDict *d, *d_data, *d_b; UserDefOne b; - UserDefZero z; - z.integer = 2; - b.base = &z; + b.integer = 2; b.string = g_strdup("test1"); b.has_enum1 = false; @@ -209,11 +207,9 @@ static void test_event_d(TestEventData *data, { UserDefOne struct1; EventStructOne a; - UserDefZero z; QDict *d, *d_data, *d_a, *d_struct1; - z.integer = 2; - struct1.base = &z; + struct1.integer = 2; struct1.string = g_strdup("test1"); struct1.has_enum1 = true; struct1.enum1 = ENUM_ONE_VALUE1; diff --git a/tests/test-qmp-input-strict.c b/tests/test-qmp-input-strict.c index 53a769388c..f1c2e3ba67 100644 --- a/tests/test-qmp-input-strict.c +++ b/tests/test-qmp-input-strict.c @@ -40,31 +40,42 @@ static void validate_teardown(TestInputVisitorData *data, } } -/* This is provided instead of a test setup function so that the JSON - string used by the tests are kept in the test functions (and not - int main()) */ -static GCC_FMT_ATTR(2, 3) -Visitor *validate_test_init(TestInputVisitorData *data, - const char *json_string, ...) +/* The various test_init functions are provided instead of a test setup + function so that the JSON string used by the tests are kept in the test + functions (and not in main()). */ +static Visitor *validate_test_init_internal(TestInputVisitorData *data, + const char *json_string, + va_list *ap) { Visitor *v; - va_list ap; - va_start(ap, json_string); - data->obj = qobject_from_jsonv(json_string, &ap); - va_end(ap); + validate_teardown(data, NULL); - g_assert(data->obj != NULL); + data->obj = qobject_from_jsonv(json_string, ap); + g_assert(data->obj); data->qiv = qmp_input_visitor_new_strict(data->obj); - g_assert(data->qiv != NULL); + g_assert(data->qiv); v = qmp_input_get_visitor(data->qiv); - g_assert(v != NULL); + g_assert(v); return v; } +static GCC_FMT_ATTR(2, 3) +Visitor *validate_test_init(TestInputVisitorData *data, + const char *json_string, ...) +{ + Visitor *v; + va_list ap; + + va_start(ap, json_string); + v = validate_test_init_internal(data, json_string, &ap); + va_end(ap); + return v; +} + /* similar to validate_test_init(), but does not expect a string * literal/format json_string argument and so can be used for * programatically generated strings (and we can't pass in programatically @@ -75,67 +86,19 @@ Visitor *validate_test_init(TestInputVisitorData *data, static Visitor *validate_test_init_raw(TestInputVisitorData *data, const char *json_string) { - Visitor *v; - - data->obj = qobject_from_json(json_string); - g_assert(data->obj != NULL); - - data->qiv = qmp_input_visitor_new_strict(data->obj); - g_assert(data->qiv != NULL); - - v = qmp_input_get_visitor(data->qiv); - g_assert(v != NULL); - - return v; + return validate_test_init_internal(data, json_string, NULL); } -typedef struct TestStruct -{ - int64_t integer; - bool boolean; - char *string; -} TestStruct; - -static void visit_type_TestStruct(Visitor *v, TestStruct **obj, - const char *name, Error **errp) -{ - Error *err = NULL; - - visit_start_struct(v, (void **)obj, "TestStruct", name, sizeof(TestStruct), - &err); - if (err) { - goto out; - } - - visit_type_int(v, &(*obj)->integer, "integer", &err); - if (err) { - goto out_end; - } - visit_type_bool(v, &(*obj)->boolean, "boolean", &err); - if (err) { - goto out_end; - } - visit_type_str(v, &(*obj)->string, "string", &err); - -out_end: - error_propagate(errp, err); - err = NULL; - visit_end_struct(v, &err); -out: - error_propagate(errp, err); -} static void test_validate_struct(TestInputVisitorData *data, const void *unused) { TestStruct *p = NULL; - Error *err = NULL; Visitor *v; v = validate_test_init(data, "{ 'integer': -42, 'boolean': true, 'string': 'foo' }"); - visit_type_TestStruct(v, &p, NULL, &err); - g_assert(!err); + visit_type_TestStruct(v, &p, NULL, &error_abort); g_free(p->string); g_free(p); } @@ -144,7 +107,6 @@ static void test_validate_struct_nested(TestInputVisitorData *data, const void *unused) { UserDefTwo *udp = NULL; - Error *err = NULL; Visitor *v; v = validate_test_init(data, "{ 'string0': 'string0', " @@ -152,8 +114,7 @@ static void test_validate_struct_nested(TestInputVisitorData *data, "'dict2': { 'userdef': { 'integer': 42, " "'string': 'string' }, 'string': 'string2'}}}"); - visit_type_UserDefTwo(v, &udp, NULL, &err); - g_assert(!err); + visit_type_UserDefTwo(v, &udp, NULL, &error_abort); qapi_free_UserDefTwo(udp); } @@ -161,13 +122,11 @@ static void test_validate_list(TestInputVisitorData *data, const void *unused) { UserDefOneList *head = NULL; - Error *err = NULL; Visitor *v; v = validate_test_init(data, "[ { 'string': 'string0', 'integer': 42 }, { 'string': 'string1', 'integer': 43 }, { 'string': 'string2', 'integer': 44 } ]"); - visit_type_UserDefOneList(v, &head, NULL, &err); - g_assert(!err); + visit_type_UserDefOneList(v, &head, NULL, &error_abort); qapi_free_UserDefOneList(head); } @@ -176,12 +135,10 @@ static void test_validate_union_native_list(TestInputVisitorData *data, { UserDefNativeListUnion *tmp = NULL; Visitor *v; - Error *err = NULL; v = validate_test_init(data, "{ 'type': 'integer', 'data' : [ 1, 2 ] }"); - visit_type_UserDefNativeListUnion(v, &tmp, NULL, &err); - g_assert(!err); + visit_type_UserDefNativeListUnion(v, &tmp, NULL, &error_abort); qapi_free_UserDefNativeListUnion(tmp); } @@ -190,7 +147,6 @@ static void test_validate_union_flat(TestInputVisitorData *data, { UserDefFlatUnion *tmp = NULL; Visitor *v; - Error *err = NULL; v = validate_test_init(data, "{ 'enum1': 'value1', " @@ -198,8 +154,7 @@ static void test_validate_union_flat(TestInputVisitorData *data, "'string': 'str', " "'boolean': true }"); - visit_type_UserDefFlatUnion(v, &tmp, NULL, &err); - g_assert(!err); + visit_type_UserDefFlatUnion(v, &tmp, NULL, &error_abort); qapi_free_UserDefFlatUnion(tmp); } @@ -208,12 +163,10 @@ static void test_validate_alternate(TestInputVisitorData *data, { UserDefAlternate *tmp = NULL; Visitor *v; - Error *err = NULL; v = validate_test_init(data, "42"); - visit_type_UserDefAlternate(v, &tmp, NULL, &err); - g_assert(!err); + visit_type_UserDefAlternate(v, &tmp, NULL, &error_abort); qapi_free_UserDefAlternate(tmp); } @@ -227,7 +180,7 @@ static void test_validate_fail_struct(TestInputVisitorData *data, v = validate_test_init(data, "{ 'integer': -42, 'boolean': true, 'string': 'foo', 'extra': 42 }"); visit_type_TestStruct(v, &p, NULL, &err); - g_assert(err); + error_free_or_abort(&err); if (p) { g_free(p->string); } @@ -244,7 +197,7 @@ static void test_validate_fail_struct_nested(TestInputVisitorData *data, v = validate_test_init(data, "{ 'string0': 'string0', 'dict1': { 'string1': 'string1', 'dict2': { 'userdef1': { 'integer': 42, 'string': 'string', 'extra': [42, 23, {'foo':'bar'}] }, 'string2': 'string2'}}}"); visit_type_UserDefTwo(v, &udp, NULL, &err); - g_assert(err); + error_free_or_abort(&err); qapi_free_UserDefTwo(udp); } @@ -258,7 +211,7 @@ static void test_validate_fail_list(TestInputVisitorData *data, v = validate_test_init(data, "[ { 'string': 'string0', 'integer': 42 }, { 'string': 'string1', 'integer': 43 }, { 'string': 'string2', 'integer': 44, 'extra': 'ggg' } ]"); visit_type_UserDefOneList(v, &head, NULL, &err); - g_assert(err); + error_free_or_abort(&err); qapi_free_UserDefOneList(head); } @@ -273,7 +226,7 @@ static void test_validate_fail_union_native_list(TestInputVisitorData *data, "{ 'type': 'integer', 'data' : [ 'string' ] }"); visit_type_UserDefNativeListUnion(v, &tmp, NULL, &err); - g_assert(err); + error_free_or_abort(&err); qapi_free_UserDefNativeListUnion(tmp); } @@ -287,7 +240,7 @@ static void test_validate_fail_union_flat(TestInputVisitorData *data, v = validate_test_init(data, "{ 'string': 'c', 'integer': 41, 'boolean': true }"); visit_type_UserDefFlatUnion(v, &tmp, NULL, &err); - g_assert(err); + error_free_or_abort(&err); qapi_free_UserDefFlatUnion(tmp); } @@ -302,7 +255,7 @@ static void test_validate_fail_union_flat_no_discrim(TestInputVisitorData *data, v = validate_test_init(data, "{ 'integer': 42, 'string': 'c', 'string1': 'd', 'string2': 'e' }"); visit_type_UserDefFlatUnion2(v, &tmp, NULL, &err); - g_assert(err); + error_free_or_abort(&err); qapi_free_UserDefFlatUnion2(tmp); } @@ -316,7 +269,7 @@ static void test_validate_fail_alternate(TestInputVisitorData *data, v = validate_test_init(data, "3.14"); visit_type_UserDefAlternate(v, &tmp, NULL, &err); - g_assert(err); + error_free_or_abort(&err); qapi_free_UserDefAlternate(tmp); } @@ -324,16 +277,11 @@ static void do_test_validate_qmp_introspect(TestInputVisitorData *data, const char *schema_json) { SchemaInfoList *schema = NULL; - Error *err = NULL; Visitor *v; v = validate_test_init_raw(data, schema_json); - visit_type_SchemaInfoList(v, &schema, NULL, &err); - if (err) { - fprintf(stderr, "%s", error_get_pretty(err)); - } - g_assert(!err); + visit_type_SchemaInfoList(v, &schema, NULL, &error_abort); g_assert(schema); qapi_free_SchemaInfoList(schema); diff --git a/tests/test-qmp-input-visitor.c b/tests/test-qmp-input-visitor.c index 8941963c8d..d48ebdd62f 100644 --- a/tests/test-qmp-input-visitor.c +++ b/tests/test-qmp-input-visitor.c @@ -36,31 +36,42 @@ static void visitor_input_teardown(TestInputVisitorData *data, } } -/* This is provided instead of a test setup function so that the JSON - string used by the tests are kept in the test functions (and not - int main()) */ -static GCC_FMT_ATTR(2, 3) -Visitor *visitor_input_test_init(TestInputVisitorData *data, - const char *json_string, ...) +/* The various test_init functions are provided instead of a test setup + function so that the JSON string used by the tests are kept in the test + functions (and not in main()). */ +static Visitor *visitor_input_test_init_internal(TestInputVisitorData *data, + const char *json_string, + va_list *ap) { Visitor *v; - va_list ap; - va_start(ap, json_string); - data->obj = qobject_from_jsonv(json_string, &ap); - va_end(ap); + visitor_input_teardown(data, NULL); - g_assert(data->obj != NULL); + data->obj = qobject_from_jsonv(json_string, ap); + g_assert(data->obj); data->qiv = qmp_input_visitor_new(data->obj); - g_assert(data->qiv != NULL); + g_assert(data->qiv); v = qmp_input_get_visitor(data->qiv); - g_assert(v != NULL); + g_assert(v); return v; } +static GCC_FMT_ATTR(2, 3) +Visitor *visitor_input_test_init(TestInputVisitorData *data, + const char *json_string, ...) +{ + Visitor *v; + va_list ap; + + va_start(ap, json_string); + v = visitor_input_test_init_internal(data, json_string, &ap); + va_end(ap); + return v; +} + /* similar to visitor_input_test_init(), but does not expect a string * literal/format json_string argument and so can be used for * programatically generated strings (and we can't pass in programatically @@ -71,32 +82,18 @@ Visitor *visitor_input_test_init(TestInputVisitorData *data, static Visitor *visitor_input_test_init_raw(TestInputVisitorData *data, const char *json_string) { - Visitor *v; - - data->obj = qobject_from_json(json_string); - - g_assert(data->obj != NULL); - - data->qiv = qmp_input_visitor_new(data->obj); - g_assert(data->qiv != NULL); - - v = qmp_input_get_visitor(data->qiv); - g_assert(v != NULL); - - return v; + return visitor_input_test_init_internal(data, json_string, NULL); } static void test_visitor_in_int(TestInputVisitorData *data, const void *unused) { int64_t res = 0, value = -42; - Error *err = NULL; Visitor *v; v = visitor_input_test_init(data, "%" PRId64, value); - visit_type_int(v, &res, NULL, &err); - g_assert(!err); + visit_type_int(v, &res, NULL, &error_abort); g_assert_cmpint(res, ==, value); } @@ -114,21 +111,18 @@ static void test_visitor_in_int_overflow(TestInputVisitorData *data, v = visitor_input_test_init(data, "%f", DBL_MAX); visit_type_int(v, &res, NULL, &err); - g_assert(err); - error_free(err); + error_free_or_abort(&err); } static void test_visitor_in_bool(TestInputVisitorData *data, const void *unused) { - Error *err = NULL; bool res = false; Visitor *v; v = visitor_input_test_init(data, "true"); - visit_type_bool(v, &res, NULL, &err); - g_assert(!err); + visit_type_bool(v, &res, NULL, &error_abort); g_assert_cmpint(res, ==, true); } @@ -136,13 +130,11 @@ static void test_visitor_in_number(TestInputVisitorData *data, const void *unused) { double res = 0, value = 3.14; - Error *err = NULL; Visitor *v; v = visitor_input_test_init(data, "%f", value); - visit_type_number(v, &res, NULL, &err); - g_assert(!err); + visit_type_number(v, &res, NULL, &error_abort); g_assert_cmpfloat(res, ==, value); } @@ -150,13 +142,11 @@ static void test_visitor_in_string(TestInputVisitorData *data, const void *unused) { char *res = NULL, *value = (char *) "Q E M U"; - Error *err = NULL; Visitor *v; v = visitor_input_test_init(data, "%s", value); - visit_type_str(v, &res, NULL, &err); - g_assert(!err); + visit_type_str(v, &res, NULL, &error_abort); g_assert_cmpstr(res, ==, value); g_free(res); @@ -165,7 +155,6 @@ static void test_visitor_in_string(TestInputVisitorData *data, static void test_visitor_in_enum(TestInputVisitorData *data, const void *unused) { - Error *err = NULL; Visitor *v; EnumOne i; @@ -174,63 +163,21 @@ static void test_visitor_in_enum(TestInputVisitorData *data, v = visitor_input_test_init(data, "%s", EnumOne_lookup[i]); - visit_type_EnumOne(v, &res, NULL, &err); - g_assert(!err); + visit_type_EnumOne(v, &res, NULL, &error_abort); g_assert_cmpint(i, ==, res); - - visitor_input_teardown(data, NULL); } - - data->obj = NULL; - data->qiv = NULL; } -typedef struct TestStruct -{ - int64_t integer; - bool boolean; - char *string; -} TestStruct; - -static void visit_type_TestStruct(Visitor *v, TestStruct **obj, - const char *name, Error **errp) -{ - Error *err = NULL; - - visit_start_struct(v, (void **)obj, "TestStruct", name, sizeof(TestStruct), - &err); - if (err) { - goto out; - } - visit_type_int(v, &(*obj)->integer, "integer", &err); - if (err) { - goto out_end; - } - visit_type_bool(v, &(*obj)->boolean, "boolean", &err); - if (err) { - goto out_end; - } - visit_type_str(v, &(*obj)->string, "string", &err); - -out_end: - error_propagate(errp, err); - err = NULL; - visit_end_struct(v, &err); -out: - error_propagate(errp, err); -} static void test_visitor_in_struct(TestInputVisitorData *data, const void *unused) { TestStruct *p = NULL; - Error *err = NULL; Visitor *v; v = visitor_input_test_init(data, "{ 'integer': -42, 'boolean': true, 'string': 'foo' }"); - visit_type_TestStruct(v, &p, NULL, &err); - g_assert(!err); + visit_type_TestStruct(v, &p, NULL, &error_abort); g_assert_cmpint(p->integer, ==, -42); g_assert(p->boolean == true); g_assert_cmpstr(p->string, ==, "foo"); @@ -239,17 +186,10 @@ static void test_visitor_in_struct(TestInputVisitorData *data, g_free(p); } -static void check_and_free_str(char *str, const char *cmp) -{ - g_assert_cmpstr(str, ==, cmp); - g_free(str); -} - static void test_visitor_in_struct_nested(TestInputVisitorData *data, const void *unused) { UserDefTwo *udp = NULL; - Error *err = NULL; Visitor *v; v = visitor_input_test_init(data, "{ 'string0': 'string0', " @@ -257,34 +197,28 @@ static void test_visitor_in_struct_nested(TestInputVisitorData *data, "'dict2': { 'userdef': { 'integer': 42, " "'string': 'string' }, 'string': 'string2'}}}"); - visit_type_UserDefTwo(v, &udp, NULL, &err); - g_assert(!err); + visit_type_UserDefTwo(v, &udp, NULL, &error_abort); - check_and_free_str(udp->string0, "string0"); - check_and_free_str(udp->dict1->string1, "string1"); - g_assert_cmpint(udp->dict1->dict2->userdef->base->integer, ==, 42); - check_and_free_str(udp->dict1->dict2->userdef->string, "string"); - check_and_free_str(udp->dict1->dict2->string, "string2"); + g_assert_cmpstr(udp->string0, ==, "string0"); + g_assert_cmpstr(udp->dict1->string1, ==, "string1"); + g_assert_cmpint(udp->dict1->dict2->userdef->integer, ==, 42); + g_assert_cmpstr(udp->dict1->dict2->userdef->string, ==, "string"); + g_assert_cmpstr(udp->dict1->dict2->string, ==, "string2"); g_assert(udp->dict1->has_dict3 == false); - g_free(udp->dict1->dict2->userdef); - g_free(udp->dict1->dict2); - g_free(udp->dict1); - g_free(udp); + qapi_free_UserDefTwo(udp); } static void test_visitor_in_list(TestInputVisitorData *data, const void *unused) { UserDefOneList *item, *head = NULL; - Error *err = NULL; Visitor *v; int i; v = visitor_input_test_init(data, "[ { 'string': 'string0', 'integer': 42 }, { 'string': 'string1', 'integer': 43 }, { 'string': 'string2', 'integer': 44 } ]"); - visit_type_UserDefOneList(v, &head, NULL, &err); - g_assert(!err); + visit_type_UserDefOneList(v, &head, NULL, &error_abort); g_assert(head != NULL); for (i = 0, item = head; item; item = item->next, i++) { @@ -292,17 +226,22 @@ static void test_visitor_in_list(TestInputVisitorData *data, snprintf(string, sizeof(string), "string%d", i); g_assert_cmpstr(item->value->string, ==, string); - g_assert_cmpint(item->value->base->integer, ==, 42 + i); + g_assert_cmpint(item->value->integer, ==, 42 + i); } qapi_free_UserDefOneList(head); + head = NULL; + + /* An empty list is valid */ + v = visitor_input_test_init(data, "[]"); + visit_type_UserDefOneList(v, &head, NULL, &error_abort); + g_assert(!head); } static void test_visitor_in_any(TestInputVisitorData *data, const void *unused) { QObject *res = NULL; - Error *err = NULL; Visitor *v; QInt *qint; QBool *qbool; @@ -311,16 +250,14 @@ static void test_visitor_in_any(TestInputVisitorData *data, QObject *qobj; v = visitor_input_test_init(data, "-42"); - visit_type_any(v, &res, NULL, &err); - g_assert(!err); + visit_type_any(v, &res, NULL, &error_abort); qint = qobject_to_qint(res); g_assert(qint); g_assert_cmpint(qint_get_int(qint), ==, -42); qobject_decref(res); v = visitor_input_test_init(data, "{ 'integer': -42, 'boolean': true, 'string': 'foo' }"); - visit_type_any(v, &res, NULL, &err); - g_assert(!err); + visit_type_any(v, &res, NULL, &error_abort); qdict = qobject_to_qdict(res); g_assert(qdict && qdict_size(qdict) == 3); qobj = qdict_get(qdict, "integer"); @@ -345,8 +282,8 @@ static void test_visitor_in_union_flat(TestInputVisitorData *data, const void *unused) { Visitor *v; - Error *err = NULL; UserDefFlatUnion *tmp; + UserDefUnionBase *base; v = visitor_input_test_init(data, "{ 'enum1': 'value1', " @@ -354,12 +291,15 @@ static void test_visitor_in_union_flat(TestInputVisitorData *data, "'string': 'str', " "'boolean': true }"); - visit_type_UserDefFlatUnion(v, &tmp, NULL, &err); - g_assert(err == NULL); + visit_type_UserDefFlatUnion(v, &tmp, NULL, &error_abort); g_assert_cmpint(tmp->enum1, ==, ENUM_ONE_VALUE1); g_assert_cmpstr(tmp->string, ==, "str"); g_assert_cmpint(tmp->integer, ==, 41); - g_assert_cmpint(tmp->value1->boolean, ==, true); + g_assert_cmpint(tmp->u.value1->boolean, ==, true); + + base = qapi_UserDefFlatUnion_base(tmp); + g_assert(&base->enum1 == &tmp->enum1); + qapi_free_UserDefFlatUnion(tmp); } @@ -372,25 +312,20 @@ static void test_visitor_in_alternate(TestInputVisitorData *data, v = visitor_input_test_init(data, "42"); visit_type_UserDefAlternate(v, &tmp, NULL, &error_abort); - g_assert_cmpint(tmp->kind, ==, USER_DEF_ALTERNATE_KIND_I); - g_assert_cmpint(tmp->i, ==, 42); + g_assert_cmpint(tmp->type, ==, USER_DEF_ALTERNATE_KIND_I); + g_assert_cmpint(tmp->u.i, ==, 42); qapi_free_UserDefAlternate(tmp); - visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, "'string'"); visit_type_UserDefAlternate(v, &tmp, NULL, &error_abort); - g_assert_cmpint(tmp->kind, ==, USER_DEF_ALTERNATE_KIND_S); - g_assert_cmpstr(tmp->s, ==, "string"); + g_assert_cmpint(tmp->type, ==, USER_DEF_ALTERNATE_KIND_S); + g_assert_cmpstr(tmp->u.s, ==, "string"); qapi_free_UserDefAlternate(tmp); - visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, "false"); visit_type_UserDefAlternate(v, &tmp, NULL, &err); - g_assert(err); - error_free(err); - err = NULL; + error_free_or_abort(&err); qapi_free_UserDefAlternate(tmp); - visitor_input_teardown(data, NULL); } static void test_visitor_in_alternate_number(TestInputVisitorData *data, @@ -409,97 +344,77 @@ static void test_visitor_in_alternate_number(TestInputVisitorData *data, v = visitor_input_test_init(data, "42"); visit_type_AltStrBool(v, &asb, NULL, &err); - g_assert(err); - error_free(err); - err = NULL; + error_free_or_abort(&err); qapi_free_AltStrBool(asb); - visitor_input_teardown(data, NULL); /* FIXME: Order of alternate should not affect semantics; asn should * parse the same as ans */ v = visitor_input_test_init(data, "42"); visit_type_AltStrNum(v, &asn, NULL, &err); - /* FIXME g_assert_cmpint(asn->kind, == ALT_STR_NUM_KIND_N); */ - /* FIXME g_assert_cmpfloat(asn->n, ==, 42); */ - g_assert(err); - error_free(err); - err = NULL; + /* FIXME g_assert_cmpint(asn->type, == ALT_STR_NUM_KIND_N); */ + /* FIXME g_assert_cmpfloat(asn->u.n, ==, 42); */ + error_free_or_abort(&err); qapi_free_AltStrNum(asn); - visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, "42"); visit_type_AltNumStr(v, &ans, NULL, &error_abort); - g_assert_cmpint(ans->kind, ==, ALT_NUM_STR_KIND_N); - g_assert_cmpfloat(ans->n, ==, 42); + g_assert_cmpint(ans->type, ==, ALT_NUM_STR_KIND_N); + g_assert_cmpfloat(ans->u.n, ==, 42); qapi_free_AltNumStr(ans); - visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, "42"); visit_type_AltStrInt(v, &asi, NULL, &error_abort); - g_assert_cmpint(asi->kind, ==, ALT_STR_INT_KIND_I); - g_assert_cmpint(asi->i, ==, 42); + g_assert_cmpint(asi->type, ==, ALT_STR_INT_KIND_I); + g_assert_cmpint(asi->u.i, ==, 42); qapi_free_AltStrInt(asi); - visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, "42"); visit_type_AltIntNum(v, &ain, NULL, &error_abort); - g_assert_cmpint(ain->kind, ==, ALT_INT_NUM_KIND_I); - g_assert_cmpint(ain->i, ==, 42); + g_assert_cmpint(ain->type, ==, ALT_INT_NUM_KIND_I); + g_assert_cmpint(ain->u.i, ==, 42); qapi_free_AltIntNum(ain); - visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, "42"); visit_type_AltNumInt(v, &ani, NULL, &error_abort); - g_assert_cmpint(ani->kind, ==, ALT_NUM_INT_KIND_I); - g_assert_cmpint(ani->i, ==, 42); + g_assert_cmpint(ani->type, ==, ALT_NUM_INT_KIND_I); + g_assert_cmpint(ani->u.i, ==, 42); qapi_free_AltNumInt(ani); - visitor_input_teardown(data, NULL); /* Parsing a double */ v = visitor_input_test_init(data, "42.5"); visit_type_AltStrBool(v, &asb, NULL, &err); - g_assert(err); - error_free(err); - err = NULL; + error_free_or_abort(&err); qapi_free_AltStrBool(asb); - visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, "42.5"); visit_type_AltStrNum(v, &asn, NULL, &error_abort); - g_assert_cmpint(asn->kind, ==, ALT_STR_NUM_KIND_N); - g_assert_cmpfloat(asn->n, ==, 42.5); + g_assert_cmpint(asn->type, ==, ALT_STR_NUM_KIND_N); + g_assert_cmpfloat(asn->u.n, ==, 42.5); qapi_free_AltStrNum(asn); - visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, "42.5"); visit_type_AltNumStr(v, &ans, NULL, &error_abort); - g_assert_cmpint(ans->kind, ==, ALT_NUM_STR_KIND_N); - g_assert_cmpfloat(ans->n, ==, 42.5); + g_assert_cmpint(ans->type, ==, ALT_NUM_STR_KIND_N); + g_assert_cmpfloat(ans->u.n, ==, 42.5); qapi_free_AltNumStr(ans); - visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, "42.5"); visit_type_AltStrInt(v, &asi, NULL, &err); - g_assert(err); - error_free(err); - err = NULL; + error_free_or_abort(&err); qapi_free_AltStrInt(asi); - visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, "42.5"); visit_type_AltIntNum(v, &ain, NULL, &error_abort); - g_assert_cmpint(ain->kind, ==, ALT_INT_NUM_KIND_N); - g_assert_cmpfloat(ain->n, ==, 42.5); + g_assert_cmpint(ain->type, ==, ALT_INT_NUM_KIND_N); + g_assert_cmpfloat(ain->u.n, ==, 42.5); qapi_free_AltIntNum(ain); - visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, "42.5"); visit_type_AltNumInt(v, &ani, NULL, &error_abort); - g_assert_cmpint(ani->kind, ==, ALT_NUM_INT_KIND_N); - g_assert_cmpfloat(ani->n, ==, 42.5); + g_assert_cmpint(ani->type, ==, ALT_NUM_INT_KIND_N); + g_assert_cmpfloat(ani->u.n, ==, 42.5); qapi_free_AltNumInt(ani); - visitor_input_teardown(data, NULL); } static void test_native_list_integer_helper(TestInputVisitorData *data, @@ -507,7 +422,6 @@ static void test_native_list_integer_helper(TestInputVisitorData *data, UserDefNativeListUnionKind kind) { UserDefNativeListUnion *cvalue = NULL; - Error *err = NULL; Visitor *v; GString *gstr_list = g_string_new(""); GString *gstr_union = g_string_new(""); @@ -524,71 +438,70 @@ static void test_native_list_integer_helper(TestInputVisitorData *data, gstr_list->str); v = visitor_input_test_init_raw(data, gstr_union->str); - visit_type_UserDefNativeListUnion(v, &cvalue, NULL, &err); - g_assert(err == NULL); + visit_type_UserDefNativeListUnion(v, &cvalue, NULL, &error_abort); g_assert(cvalue != NULL); - g_assert_cmpint(cvalue->kind, ==, kind); + g_assert_cmpint(cvalue->type, ==, kind); switch (kind) { case USER_DEF_NATIVE_LIST_UNION_KIND_INTEGER: { intList *elem = NULL; - for (i = 0, elem = cvalue->integer; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.integer; elem; elem = elem->next, i++) { g_assert_cmpint(elem->value, ==, i); } break; } case USER_DEF_NATIVE_LIST_UNION_KIND_S8: { int8List *elem = NULL; - for (i = 0, elem = cvalue->s8; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.s8; elem; elem = elem->next, i++) { g_assert_cmpint(elem->value, ==, i); } break; } case USER_DEF_NATIVE_LIST_UNION_KIND_S16: { int16List *elem = NULL; - for (i = 0, elem = cvalue->s16; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.s16; elem; elem = elem->next, i++) { g_assert_cmpint(elem->value, ==, i); } break; } case USER_DEF_NATIVE_LIST_UNION_KIND_S32: { int32List *elem = NULL; - for (i = 0, elem = cvalue->s32; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.s32; elem; elem = elem->next, i++) { g_assert_cmpint(elem->value, ==, i); } break; } case USER_DEF_NATIVE_LIST_UNION_KIND_S64: { int64List *elem = NULL; - for (i = 0, elem = cvalue->s64; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.s64; elem; elem = elem->next, i++) { g_assert_cmpint(elem->value, ==, i); } break; } case USER_DEF_NATIVE_LIST_UNION_KIND_U8: { uint8List *elem = NULL; - for (i = 0, elem = cvalue->u8; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.u8; elem; elem = elem->next, i++) { g_assert_cmpint(elem->value, ==, i); } break; } case USER_DEF_NATIVE_LIST_UNION_KIND_U16: { uint16List *elem = NULL; - for (i = 0, elem = cvalue->u16; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.u16; elem; elem = elem->next, i++) { g_assert_cmpint(elem->value, ==, i); } break; } case USER_DEF_NATIVE_LIST_UNION_KIND_U32: { uint32List *elem = NULL; - for (i = 0, elem = cvalue->u32; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.u32; elem; elem = elem->next, i++) { g_assert_cmpint(elem->value, ==, i); } break; } case USER_DEF_NATIVE_LIST_UNION_KIND_U64: { uint64List *elem = NULL; - for (i = 0, elem = cvalue->u64; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.u64; elem; elem = elem->next, i++) { g_assert_cmpint(elem->value, ==, i); } break; @@ -670,7 +583,6 @@ static void test_visitor_in_native_list_bool(TestInputVisitorData *data, { UserDefNativeListUnion *cvalue = NULL; boolList *elem = NULL; - Error *err = NULL; Visitor *v; GString *gstr_list = g_string_new(""); GString *gstr_union = g_string_new(""); @@ -687,12 +599,11 @@ static void test_visitor_in_native_list_bool(TestInputVisitorData *data, gstr_list->str); v = visitor_input_test_init_raw(data, gstr_union->str); - visit_type_UserDefNativeListUnion(v, &cvalue, NULL, &err); - g_assert(err == NULL); + visit_type_UserDefNativeListUnion(v, &cvalue, NULL, &error_abort); g_assert(cvalue != NULL); - g_assert_cmpint(cvalue->kind, ==, USER_DEF_NATIVE_LIST_UNION_KIND_BOOLEAN); + g_assert_cmpint(cvalue->type, ==, USER_DEF_NATIVE_LIST_UNION_KIND_BOOLEAN); - for (i = 0, elem = cvalue->boolean; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.boolean; elem; elem = elem->next, i++) { g_assert_cmpint(elem->value, ==, (i % 3 == 0) ? 1 : 0); } @@ -706,7 +617,6 @@ static void test_visitor_in_native_list_string(TestInputVisitorData *data, { UserDefNativeListUnion *cvalue = NULL; strList *elem = NULL; - Error *err = NULL; Visitor *v; GString *gstr_list = g_string_new(""); GString *gstr_union = g_string_new(""); @@ -722,12 +632,11 @@ static void test_visitor_in_native_list_string(TestInputVisitorData *data, gstr_list->str); v = visitor_input_test_init_raw(data, gstr_union->str); - visit_type_UserDefNativeListUnion(v, &cvalue, NULL, &err); - g_assert(err == NULL); + visit_type_UserDefNativeListUnion(v, &cvalue, NULL, &error_abort); g_assert(cvalue != NULL); - g_assert_cmpint(cvalue->kind, ==, USER_DEF_NATIVE_LIST_UNION_KIND_STRING); + g_assert_cmpint(cvalue->type, ==, USER_DEF_NATIVE_LIST_UNION_KIND_STRING); - for (i = 0, elem = cvalue->string; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.string; elem; elem = elem->next, i++) { gchar str[8]; sprintf(str, "%d", i); g_assert_cmpstr(elem->value, ==, str); @@ -745,7 +654,6 @@ static void test_visitor_in_native_list_number(TestInputVisitorData *data, { UserDefNativeListUnion *cvalue = NULL; numberList *elem = NULL; - Error *err = NULL; Visitor *v; GString *gstr_list = g_string_new(""); GString *gstr_union = g_string_new(""); @@ -761,12 +669,11 @@ static void test_visitor_in_native_list_number(TestInputVisitorData *data, gstr_list->str); v = visitor_input_test_init_raw(data, gstr_union->str); - visit_type_UserDefNativeListUnion(v, &cvalue, NULL, &err); - g_assert(err == NULL); + visit_type_UserDefNativeListUnion(v, &cvalue, NULL, &error_abort); g_assert(cvalue != NULL); - g_assert_cmpint(cvalue->kind, ==, USER_DEF_NATIVE_LIST_UNION_KIND_NUMBER); + g_assert_cmpint(cvalue->type, ==, USER_DEF_NATIVE_LIST_UNION_KIND_NUMBER); - for (i = 0, elem = cvalue->number; elem; elem = elem->next, i++) { + for (i = 0, elem = cvalue->u.number; elem; elem = elem->next, i++) { GString *double_expected = g_string_new(""); GString *double_actual = g_string_new(""); @@ -797,18 +704,69 @@ static void test_visitor_in_errors(TestInputVisitorData *data, TestStruct *p = NULL; Error *err = NULL; Visitor *v; + strList *q = NULL; - v = visitor_input_test_init(data, "{ 'integer': false, 'boolean': 'foo', 'string': -42 }"); + v = visitor_input_test_init(data, "{ 'integer': false, 'boolean': 'foo', " + "'string': -42 }"); visit_type_TestStruct(v, &p, NULL, &err); - g_assert(err); + error_free_or_abort(&err); /* FIXME - a failed parse should not leave a partially-allocated p * for us to clean up; this could cause callers to leak memory. */ g_assert(p->string == NULL); - error_free(err); g_free(p->string); g_free(p); + + v = visitor_input_test_init(data, "[ '1', '2', false, '3' ]"); + visit_type_strList(v, &q, NULL, &err); + error_free_or_abort(&err); + assert(q); + qapi_free_strList(q); +} + +static void test_visitor_in_wrong_type(TestInputVisitorData *data, + const void *unused) +{ + TestStruct *p = NULL; + Visitor *v; + strList *q = NULL; + int64_t i; + Error *err = NULL; + + /* Make sure arrays and structs cannot be confused */ + + v = visitor_input_test_init(data, "[]"); + visit_type_TestStruct(v, &p, NULL, &err); + error_free_or_abort(&err); + g_assert(!p); + + v = visitor_input_test_init(data, "{}"); + visit_type_strList(v, &q, NULL, &err); + error_free_or_abort(&err); + assert(!q); + + /* Make sure primitives and struct cannot be confused */ + + v = visitor_input_test_init(data, "1"); + visit_type_TestStruct(v, &p, NULL, &err); + error_free_or_abort(&err); + g_assert(!p); + + v = visitor_input_test_init(data, "{}"); + visit_type_int(v, &i, NULL, &err); + error_free_or_abort(&err); + + /* Make sure primitives and arrays cannot be confused */ + + v = visitor_input_test_init(data, "1"); + visit_type_strList(v, &q, NULL, &err); + error_free_or_abort(&err); + assert(!q); + + v = visitor_input_test_init(data, "[]"); + visit_type_int(v, &i, NULL, &err); + error_free_or_abort(&err); } int main(int argc, char **argv) @@ -843,6 +801,8 @@ int main(int argc, char **argv) &in_visitor_data, test_visitor_in_alternate); input_visitor_test_add("/visitor/input/errors", &in_visitor_data, test_visitor_in_errors); + input_visitor_test_add("/visitor/input/wrong-type", + &in_visitor_data, test_visitor_in_wrong_type); input_visitor_test_add("/visitor/input/alternate-number", &in_visitor_data, test_visitor_in_alternate_number); input_visitor_test_add("/visitor/input/native_list/int", diff --git a/tests/test-qmp-output-visitor.c b/tests/test-qmp-output-visitor.c index c84002e2f2..0d0c85989a 100644 --- a/tests/test-qmp-output-visitor.c +++ b/tests/test-qmp-output-visitor.c @@ -45,11 +45,9 @@ static void test_visitor_out_int(TestOutputVisitorData *data, const void *unused) { int64_t value = -42; - Error *err = NULL; QObject *obj; - visit_type_int(data->ov, &value, NULL, &err); - g_assert(!err); + visit_type_int(data->ov, &value, NULL, &error_abort); obj = qmp_output_get_qobject(data->qov); g_assert(obj != NULL); @@ -62,12 +60,10 @@ static void test_visitor_out_int(TestOutputVisitorData *data, static void test_visitor_out_bool(TestOutputVisitorData *data, const void *unused) { - Error *err = NULL; bool value = true; QObject *obj; - visit_type_bool(data->ov, &value, NULL, &err); - g_assert(!err); + visit_type_bool(data->ov, &value, NULL, &error_abort); obj = qmp_output_get_qobject(data->qov); g_assert(obj != NULL); @@ -81,11 +77,9 @@ static void test_visitor_out_number(TestOutputVisitorData *data, const void *unused) { double value = 3.14; - Error *err = NULL; QObject *obj; - visit_type_number(data->ov, &value, NULL, &err); - g_assert(!err); + visit_type_number(data->ov, &value, NULL, &error_abort); obj = qmp_output_get_qobject(data->qov); g_assert(obj != NULL); @@ -99,11 +93,9 @@ static void test_visitor_out_string(TestOutputVisitorData *data, const void *unused) { char *string = (char *) "Q E M U"; - Error *err = NULL; QObject *obj; - visit_type_str(data->ov, &string, NULL, &err); - g_assert(!err); + visit_type_str(data->ov, &string, NULL, &error_abort); obj = qmp_output_get_qobject(data->qov); g_assert(obj != NULL); @@ -117,12 +109,10 @@ static void test_visitor_out_no_string(TestOutputVisitorData *data, const void *unused) { char *string = NULL; - Error *err = NULL; QObject *obj; /* A null string should return "" */ - visit_type_str(data->ov, &string, NULL, &err); - g_assert(!err); + visit_type_str(data->ov, &string, NULL, &error_abort); obj = qmp_output_get_qobject(data->qov); g_assert(obj != NULL); @@ -135,13 +125,11 @@ static void test_visitor_out_no_string(TestOutputVisitorData *data, static void test_visitor_out_enum(TestOutputVisitorData *data, const void *unused) { - Error *err = NULL; QObject *obj; EnumOne i; for (i = 0; i < ENUM_ONE_MAX; i++) { - visit_type_EnumOne(data->ov, &i, "unused", &err); - g_assert(!err); + visit_type_EnumOne(data->ov, &i, "unused", &error_abort); obj = qmp_output_get_qobject(data->qov); g_assert(obj != NULL); @@ -166,41 +154,6 @@ static void test_visitor_out_enum_errors(TestOutputVisitorData *data, } } -typedef struct TestStruct -{ - int64_t integer; - bool boolean; - char *string; -} TestStruct; - -static void visit_type_TestStruct(Visitor *v, TestStruct **obj, - const char *name, Error **errp) -{ - Error *err = NULL; - - visit_start_struct(v, (void **)obj, "TestStruct", name, sizeof(TestStruct), - &err); - if (err) { - goto out; - } - - visit_type_int(v, &(*obj)->integer, "integer", &err); - if (err) { - goto out_end; - } - visit_type_bool(v, &(*obj)->boolean, "boolean", &err); - if (err) { - goto out_end; - } - visit_type_str(v, &(*obj)->string, "string", &err); - -out_end: - error_propagate(errp, err); - err = NULL; - visit_end_struct(v, &err); -out: - error_propagate(errp, err); -} static void test_visitor_out_struct(TestOutputVisitorData *data, const void *unused) @@ -209,12 +162,10 @@ static void test_visitor_out_struct(TestOutputVisitorData *data, .boolean = false, .string = (char *) "foo"}; TestStruct *p = &test_struct; - Error *err = NULL; QObject *obj; QDict *qdict; - visit_type_TestStruct(data->ov, &p, NULL, &err); - g_assert(!err); + visit_type_TestStruct(data->ov, &p, NULL, &error_abort); obj = qmp_output_get_qobject(data->qov); g_assert(obj != NULL); @@ -233,7 +184,6 @@ static void test_visitor_out_struct_nested(TestOutputVisitorData *data, const void *unused) { int64_t value = 42; - Error *err = NULL; UserDefTwo *ud2; QObject *obj; QDict *qdict, *dict1, *dict2, *dict3, *userdef; @@ -250,20 +200,17 @@ static void test_visitor_out_struct_nested(TestOutputVisitorData *data, ud2->dict1->dict2 = g_malloc0(sizeof(*ud2->dict1->dict2)); ud2->dict1->dict2->userdef = g_new0(UserDefOne, 1); ud2->dict1->dict2->userdef->string = g_strdup(string); - ud2->dict1->dict2->userdef->base = g_new0(UserDefZero, 1); - ud2->dict1->dict2->userdef->base->integer = value; + ud2->dict1->dict2->userdef->integer = value; ud2->dict1->dict2->string = g_strdup(strings[2]); ud2->dict1->dict3 = g_malloc0(sizeof(*ud2->dict1->dict3)); ud2->dict1->has_dict3 = true; ud2->dict1->dict3->userdef = g_new0(UserDefOne, 1); ud2->dict1->dict3->userdef->string = g_strdup(string); - ud2->dict1->dict3->userdef->base = g_new0(UserDefZero, 1); - ud2->dict1->dict3->userdef->base->integer = value; + ud2->dict1->dict3->userdef->integer = value; ud2->dict1->dict3->string = g_strdup(strings[3]); - visit_type_UserDefTwo(data->ov, &ud2, "unused", &err); - g_assert(!err); + visit_type_UserDefTwo(data->ov, &ud2, "unused", &error_abort); obj = qmp_output_get_qobject(data->qov); g_assert(obj != NULL); @@ -301,8 +248,8 @@ static void test_visitor_out_struct_errors(TestOutputVisitorData *data, const void *unused) { EnumOne bad_values[] = { ENUM_ONE_MAX, -1 }; - UserDefZero b; - UserDefOne u = { .base = &b }, *pu = &u; + UserDefOne u = {0}; + UserDefOne *pu = &u; Error *err; int i; @@ -316,57 +263,33 @@ static void test_visitor_out_struct_errors(TestOutputVisitorData *data, } } -typedef struct TestStructList -{ - union { - TestStruct *value; - uint64_t padding; - }; - struct TestStructList *next; -} TestStructList; - -static void visit_type_TestStructList(Visitor *v, TestStructList **obj, - const char *name, Error **errp) -{ - GenericList *i, **head = (GenericList **)obj; - - visit_start_list(v, name, errp); - - for (*head = i = visit_next_list(v, head, errp); i; i = visit_next_list(v, &i, errp)) { - TestStructList *native_i = (TestStructList *)i; - visit_type_TestStruct(v, &native_i->value, NULL, errp); - } - - visit_end_list(v, errp); -} static void test_visitor_out_list(TestOutputVisitorData *data, const void *unused) { - char *value_str = (char *) "list value"; + const char *value_str = "list value"; TestStructList *p, *head = NULL; const int max_items = 10; bool value_bool = true; int value_int = 10; - Error *err = NULL; QListEntry *entry; QObject *obj; QList *qlist; int i; + /* Build the list in reverse order... */ for (i = 0; i < max_items; i++) { p = g_malloc0(sizeof(*p)); p->value = g_malloc0(sizeof(*p->value)); - p->value->integer = value_int; + p->value->integer = value_int + (max_items - i - 1); p->value->boolean = value_bool; - p->value->string = value_str; + p->value->string = g_strdup(value_str); p->next = head; head = p; } - visit_type_TestStructList(data->ov, &head, NULL, &err); - g_assert(!err); + visit_type_TestStructList(data->ov, &head, NULL, &error_abort); obj = qmp_output_get_qobject(data->qov); g_assert(obj != NULL); @@ -375,6 +298,7 @@ static void test_visitor_out_list(TestOutputVisitorData *data, qlist = qobject_to_qlist(obj); g_assert(!qlist_empty(qlist)); + /* ...and ensure that the visitor sees it in order */ i = 0; QLIST_FOREACH_ENTRY(qlist, entry) { QDict *qdict; @@ -382,7 +306,7 @@ static void test_visitor_out_list(TestOutputVisitorData *data, g_assert(qobject_type(entry->value) == QTYPE_QDICT); qdict = qobject_to_qdict(entry->value); g_assert_cmpint(qdict_size(qdict), ==, 3); - g_assert_cmpint(qdict_get_int(qdict, "integer"), ==, value_int); + g_assert_cmpint(qdict_get_int(qdict, "integer"), ==, value_int + i); g_assert_cmpint(qdict_get_bool(qdict, "boolean"), ==, value_bool); g_assert_cmpstr(qdict_get_str(qdict, "string"), ==, value_str); i++; @@ -390,13 +314,7 @@ static void test_visitor_out_list(TestOutputVisitorData *data, g_assert_cmpint(i, ==, max_items); QDECREF(qlist); - - for (p = head; p;) { - TestStructList *tmp = p->next; - g_free(p->value); - g_free(p); - p = tmp; - } + qapi_free_TestStructList(head); } static void test_visitor_out_list_qapi_free(TestOutputVisitorData *data, @@ -416,8 +334,7 @@ static void test_visitor_out_list_qapi_free(TestOutputVisitorData *data, p->value->dict1->dict2 = g_new0(UserDefTwoDictDict, 1); p->value->dict1->dict2->userdef = g_new0(UserDefOne, 1); p->value->dict1->dict2->userdef->string = g_strdup(string); - p->value->dict1->dict2->userdef->base = g_new0(UserDefZero, 1); - p->value->dict1->dict2->userdef->base->integer = 42; + p->value->dict1->dict2->userdef->integer = 42; p->value->dict1->dict2->string = g_strdup(string); p->value->dict1->has_dict3 = false; @@ -432,7 +349,6 @@ static void test_visitor_out_any(TestOutputVisitorData *data, const void *unused) { QObject *qobj; - Error *err = NULL; QInt *qint; QBool *qbool; QString *qstring; @@ -440,8 +356,7 @@ static void test_visitor_out_any(TestOutputVisitorData *data, QObject *obj; qobj = QOBJECT(qint_from_int(-42)); - visit_type_any(data->ov, &qobj, NULL, &err); - g_assert(!err); + visit_type_any(data->ov, &qobj, NULL, &error_abort); obj = qmp_output_get_qobject(data->qov); g_assert(obj != NULL); g_assert(qobject_type(obj) == QTYPE_QINT); @@ -454,8 +369,8 @@ static void test_visitor_out_any(TestOutputVisitorData *data, qdict_put(qdict, "boolean", qbool_from_bool(true)); qdict_put(qdict, "string", qstring_from_str("foo")); qobj = QOBJECT(qdict); - visit_type_any(data->ov, &qobj, NULL, &err); - g_assert(!err); + visit_type_any(data->ov, &qobj, NULL, &error_abort); + qobject_decref(qobj); obj = qmp_output_get_qobject(data->qov); g_assert(obj != NULL); qdict = qobject_to_qdict(obj); @@ -476,7 +391,6 @@ static void test_visitor_out_any(TestOutputVisitorData *data, g_assert(qstring); g_assert_cmpstr(qstring_get_str(qstring), ==, "foo"); qobject_decref(obj); - qobject_decref(qobj); } static void test_visitor_out_union_flat(TestOutputVisitorData *data, @@ -485,17 +399,14 @@ static void test_visitor_out_union_flat(TestOutputVisitorData *data, QObject *arg; QDict *qdict; - Error *err = NULL; - UserDefFlatUnion *tmp = g_malloc0(sizeof(UserDefFlatUnion)); tmp->enum1 = ENUM_ONE_VALUE1; tmp->string = g_strdup("str"); - tmp->value1 = g_malloc0(sizeof(UserDefA)); - /* TODO when generator bug is fixed: tmp->integer = 41; */ - tmp->value1->boolean = true; + tmp->u.value1 = g_malloc0(sizeof(UserDefA)); + tmp->integer = 41; + tmp->u.value1->boolean = true; - visit_type_UserDefFlatUnion(data->ov, &tmp, NULL, &err); - g_assert(err == NULL); + visit_type_UserDefFlatUnion(data->ov, &tmp, NULL, &error_abort); arg = qmp_output_get_qobject(data->qov); g_assert(qobject_type(arg) == QTYPE_QDICT); @@ -503,7 +414,7 @@ static void test_visitor_out_union_flat(TestOutputVisitorData *data, g_assert_cmpstr(qdict_get_str(qdict, "enum1"), ==, "value1"); g_assert_cmpstr(qdict_get_str(qdict, "string"), ==, "str"); - /* TODO g_assert_cmpint(qdict_get_int(qdict, "integer"), ==, 41); */ + g_assert_cmpint(qdict_get_int(qdict, "integer"), ==, 41); g_assert_cmpint(qdict_get_bool(qdict, "boolean"), ==, true); qapi_free_UserDefFlatUnion(tmp); @@ -514,20 +425,33 @@ static void test_visitor_out_alternate(TestOutputVisitorData *data, const void *unused) { QObject *arg; - Error *err = NULL; + UserDefAlternate *tmp; - UserDefAlternate *tmp = g_malloc0(sizeof(UserDefAlternate)); - tmp->kind = USER_DEF_ALTERNATE_KIND_I; - tmp->i = 42; + tmp = g_new0(UserDefAlternate, 1); + tmp->type = USER_DEF_ALTERNATE_KIND_I; + tmp->u.i = 42; - visit_type_UserDefAlternate(data->ov, &tmp, NULL, &err); - g_assert(err == NULL); + visit_type_UserDefAlternate(data->ov, &tmp, NULL, &error_abort); arg = qmp_output_get_qobject(data->qov); g_assert(qobject_type(arg) == QTYPE_QINT); g_assert_cmpint(qint_get_int(qobject_to_qint(arg)), ==, 42); qapi_free_UserDefAlternate(tmp); + qobject_decref(arg); + + tmp = g_new0(UserDefAlternate, 1); + tmp->type = USER_DEF_ALTERNATE_KIND_S; + tmp->u.s = g_strdup("hello"); + + visit_type_UserDefAlternate(data->ov, &tmp, NULL, &error_abort); + arg = qmp_output_get_qobject(data->qov); + + g_assert(qobject_type(arg) == QTYPE_QSTRING); + g_assert_cmpstr(qstring_get_str(qobject_to_qstring(arg)), ==, "hello"); + + qapi_free_UserDefAlternate(tmp); + qobject_decref(arg); } static void test_visitor_out_empty(TestOutputVisitorData *data, @@ -543,9 +467,9 @@ static void test_visitor_out_empty(TestOutputVisitorData *data, static void init_native_list(UserDefNativeListUnion *cvalue) { int i; - switch (cvalue->kind) { + switch (cvalue->type) { case USER_DEF_NATIVE_LIST_UNION_KIND_INTEGER: { - intList **list = &cvalue->integer; + intList **list = &cvalue->u.integer; for (i = 0; i < 32; i++) { *list = g_new0(intList, 1); (*list)->value = i; @@ -555,7 +479,7 @@ static void init_native_list(UserDefNativeListUnion *cvalue) break; } case USER_DEF_NATIVE_LIST_UNION_KIND_S8: { - int8List **list = &cvalue->s8; + int8List **list = &cvalue->u.s8; for (i = 0; i < 32; i++) { *list = g_new0(int8List, 1); (*list)->value = i; @@ -565,7 +489,7 @@ static void init_native_list(UserDefNativeListUnion *cvalue) break; } case USER_DEF_NATIVE_LIST_UNION_KIND_S16: { - int16List **list = &cvalue->s16; + int16List **list = &cvalue->u.s16; for (i = 0; i < 32; i++) { *list = g_new0(int16List, 1); (*list)->value = i; @@ -575,7 +499,7 @@ static void init_native_list(UserDefNativeListUnion *cvalue) break; } case USER_DEF_NATIVE_LIST_UNION_KIND_S32: { - int32List **list = &cvalue->s32; + int32List **list = &cvalue->u.s32; for (i = 0; i < 32; i++) { *list = g_new0(int32List, 1); (*list)->value = i; @@ -585,7 +509,7 @@ static void init_native_list(UserDefNativeListUnion *cvalue) break; } case USER_DEF_NATIVE_LIST_UNION_KIND_S64: { - int64List **list = &cvalue->s64; + int64List **list = &cvalue->u.s64; for (i = 0; i < 32; i++) { *list = g_new0(int64List, 1); (*list)->value = i; @@ -595,7 +519,7 @@ static void init_native_list(UserDefNativeListUnion *cvalue) break; } case USER_DEF_NATIVE_LIST_UNION_KIND_U8: { - uint8List **list = &cvalue->u8; + uint8List **list = &cvalue->u.u8; for (i = 0; i < 32; i++) { *list = g_new0(uint8List, 1); (*list)->value = i; @@ -605,7 +529,7 @@ static void init_native_list(UserDefNativeListUnion *cvalue) break; } case USER_DEF_NATIVE_LIST_UNION_KIND_U16: { - uint16List **list = &cvalue->u16; + uint16List **list = &cvalue->u.u16; for (i = 0; i < 32; i++) { *list = g_new0(uint16List, 1); (*list)->value = i; @@ -615,7 +539,7 @@ static void init_native_list(UserDefNativeListUnion *cvalue) break; } case USER_DEF_NATIVE_LIST_UNION_KIND_U32: { - uint32List **list = &cvalue->u32; + uint32List **list = &cvalue->u.u32; for (i = 0; i < 32; i++) { *list = g_new0(uint32List, 1); (*list)->value = i; @@ -625,7 +549,7 @@ static void init_native_list(UserDefNativeListUnion *cvalue) break; } case USER_DEF_NATIVE_LIST_UNION_KIND_U64: { - uint64List **list = &cvalue->u64; + uint64List **list = &cvalue->u.u64; for (i = 0; i < 32; i++) { *list = g_new0(uint64List, 1); (*list)->value = i; @@ -635,7 +559,7 @@ static void init_native_list(UserDefNativeListUnion *cvalue) break; } case USER_DEF_NATIVE_LIST_UNION_KIND_BOOLEAN: { - boolList **list = &cvalue->boolean; + boolList **list = &cvalue->u.boolean; for (i = 0; i < 32; i++) { *list = g_new0(boolList, 1); (*list)->value = (i % 3 == 0); @@ -645,7 +569,7 @@ static void init_native_list(UserDefNativeListUnion *cvalue) break; } case USER_DEF_NATIVE_LIST_UNION_KIND_STRING: { - strList **list = &cvalue->string; + strList **list = &cvalue->u.string; for (i = 0; i < 32; i++) { *list = g_new0(strList, 1); (*list)->value = g_strdup_printf("%d", i); @@ -655,7 +579,7 @@ static void init_native_list(UserDefNativeListUnion *cvalue) break; } case USER_DEF_NATIVE_LIST_UNION_KIND_NUMBER: { - numberList **list = &cvalue->number; + numberList **list = &cvalue->u.number; for (i = 0; i < 32; i++) { *list = g_new0(numberList, 1); (*list)->value = (double)i / 3; @@ -761,17 +685,15 @@ static void test_native_list(TestOutputVisitorData *data, UserDefNativeListUnionKind kind) { UserDefNativeListUnion *cvalue = g_new0(UserDefNativeListUnion, 1); - Error *err = NULL; QObject *obj; - cvalue->kind = kind; + cvalue->type = kind; init_native_list(cvalue); - visit_type_UserDefNativeListUnion(data->ov, &cvalue, NULL, &err); - g_assert(err == NULL); + visit_type_UserDefNativeListUnion(data->ov, &cvalue, NULL, &error_abort); obj = qmp_output_get_qobject(data->qov); - check_native_list(obj, cvalue->kind); + check_native_list(obj, cvalue->type); qapi_free_UserDefNativeListUnion(cvalue); qobject_decref(obj); } diff --git a/tests/test-timed-average.c b/tests/test-timed-average.c new file mode 100644 index 0000000000..a049799b80 --- /dev/null +++ b/tests/test-timed-average.c @@ -0,0 +1,90 @@ +/* + * Timed average computation tests + * + * Copyright Nodalink, EURL. 2014 + * + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include <glib.h> +#include <unistd.h> + +#include "qemu/timed-average.h" + +/* This is the clock for QEMU_CLOCK_VIRTUAL */ +static int64_t my_clock_value; + +int64_t cpu_get_clock(void) +{ + return my_clock_value; +} + +static void account(TimedAverage *ta) +{ + timed_average_account(ta, 1); + timed_average_account(ta, 5); + timed_average_account(ta, 2); + timed_average_account(ta, 4); + timed_average_account(ta, 3); +} + +static void test_average(void) +{ + TimedAverage ta; + uint64_t result; + int i; + + /* we will compute some average on a period of 1 second */ + timed_average_init(&ta, QEMU_CLOCK_VIRTUAL, NANOSECONDS_PER_SECOND); + + result = timed_average_min(&ta); + g_assert(result == 0); + result = timed_average_avg(&ta); + g_assert(result == 0); + result = timed_average_max(&ta); + g_assert(result == 0); + + for (i = 0; i < 100; i++) { + account(&ta); + result = timed_average_min(&ta); + g_assert(result == 1); + result = timed_average_avg(&ta); + g_assert(result == 3); + result = timed_average_max(&ta); + g_assert(result == 5); + my_clock_value += NANOSECONDS_PER_SECOND / 10; + } + + my_clock_value += NANOSECONDS_PER_SECOND * 100; + + result = timed_average_min(&ta); + g_assert(result == 0); + result = timed_average_avg(&ta); + g_assert(result == 0); + result = timed_average_max(&ta); + g_assert(result == 0); + + for (i = 0; i < 100; i++) { + account(&ta); + result = timed_average_min(&ta); + g_assert(result == 1); + result = timed_average_avg(&ta); + g_assert(result == 3); + result = timed_average_max(&ta); + g_assert(result == 5); + my_clock_value += NANOSECONDS_PER_SECOND / 10; + } +} + +int main(int argc, char **argv) +{ + /* tests in the same order as the header function declarations */ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/timed-average/average", test_average); + return g_test_run(); +} + diff --git a/tests/test-visitor-serialization.c b/tests/test-visitor-serialization.c index fa86cae88a..9f67f9e003 100644 --- a/tests/test-visitor-serialization.c +++ b/tests/test-visitor-serialization.c @@ -186,40 +186,6 @@ static void visit_primitive_list(Visitor *v, void **native, Error **errp) } } -typedef struct TestStruct -{ - int64_t integer; - bool boolean; - char *string; -} TestStruct; - -static void visit_type_TestStruct(Visitor *v, TestStruct **obj, - const char *name, Error **errp) -{ - Error *err = NULL; - - visit_start_struct(v, (void **)obj, NULL, name, sizeof(TestStruct), &err); - if (err) { - goto out; - } - - visit_type_int(v, &(*obj)->integer, "integer", &err); - if (err) { - goto out_end; - } - visit_type_bool(v, &(*obj)->boolean, "boolean", &err); - if (err) { - goto out_end; - } - visit_type_str(v, &(*obj)->string, "string", &err); - -out_end: - error_propagate(errp, err); - err = NULL; - visit_end_struct(v, &err); -out: - error_propagate(errp, err); -} static TestStruct *struct_create(void) { @@ -258,15 +224,13 @@ static UserDefTwo *nested_struct_create(void) udnp->dict1->string1 = strdup("test_string1"); udnp->dict1->dict2 = g_malloc0(sizeof(*udnp->dict1->dict2)); udnp->dict1->dict2->userdef = g_new0(UserDefOne, 1); - udnp->dict1->dict2->userdef->base = g_new0(UserDefZero, 1); - udnp->dict1->dict2->userdef->base->integer = 42; + udnp->dict1->dict2->userdef->integer = 42; udnp->dict1->dict2->userdef->string = strdup("test_string"); udnp->dict1->dict2->string = strdup("test_string2"); udnp->dict1->dict3 = g_malloc0(sizeof(*udnp->dict1->dict3)); udnp->dict1->has_dict3 = true; udnp->dict1->dict3->userdef = g_new0(UserDefOne, 1); - udnp->dict1->dict3->userdef->base = g_new0(UserDefZero, 1); - udnp->dict1->dict3->userdef->base->integer = 43; + udnp->dict1->dict3->userdef->integer = 43; udnp->dict1->dict3->userdef->string = strdup("test_string"); udnp->dict1->dict3->string = strdup("test_string3"); return udnp; @@ -278,15 +242,15 @@ static void nested_struct_compare(UserDefTwo *udnp1, UserDefTwo *udnp2) g_assert(udnp2); g_assert_cmpstr(udnp1->string0, ==, udnp2->string0); g_assert_cmpstr(udnp1->dict1->string1, ==, udnp2->dict1->string1); - g_assert_cmpint(udnp1->dict1->dict2->userdef->base->integer, ==, - udnp2->dict1->dict2->userdef->base->integer); + g_assert_cmpint(udnp1->dict1->dict2->userdef->integer, ==, + udnp2->dict1->dict2->userdef->integer); g_assert_cmpstr(udnp1->dict1->dict2->userdef->string, ==, udnp2->dict1->dict2->userdef->string); g_assert_cmpstr(udnp1->dict1->dict2->string, ==, udnp2->dict1->dict2->string); g_assert(udnp1->dict1->has_dict3 == udnp2->dict1->has_dict3); - g_assert_cmpint(udnp1->dict1->dict3->userdef->base->integer, ==, - udnp2->dict1->dict3->userdef->base->integer); + g_assert_cmpint(udnp1->dict1->dict3->userdef->integer, ==, + udnp2->dict1->dict3->userdef->integer); g_assert_cmpstr(udnp1->dict1->dict3->userdef->string, ==, udnp2->dict1->dict3->userdef->string); g_assert_cmpstr(udnp1->dict1->dict3->string, ==, @@ -338,14 +302,13 @@ static void test_primitives(gconstpointer opaque) const SerializeOps *ops = args->ops; PrimitiveType *pt = args->test_data; PrimitiveType *pt_copy = g_malloc0(sizeof(*pt_copy)); - Error *err = NULL; void *serialize_data; pt_copy->type = pt->type; - ops->serialize(pt, &serialize_data, visit_primitive_type, &err); - ops->deserialize((void **)&pt_copy, serialize_data, visit_primitive_type, &err); + ops->serialize(pt, &serialize_data, visit_primitive_type, &error_abort); + ops->deserialize((void **)&pt_copy, serialize_data, visit_primitive_type, + &error_abort); - g_assert(err == NULL); g_assert(pt_copy != NULL); if (pt->type == PTYPE_STRING) { g_assert_cmpstr(pt->value.string, ==, pt_copy->value.string); @@ -381,7 +344,6 @@ static void test_primitive_lists(gconstpointer opaque) PrimitiveList pl = { .value = { NULL } }; PrimitiveList pl_copy = { .value = { NULL } }; PrimitiveList *pl_copy_ptr = &pl_copy; - Error *err = NULL; void *serialize_data; void *cur_head = NULL; int i; @@ -528,10 +490,11 @@ static void test_primitive_lists(gconstpointer opaque) } } - ops->serialize((void **)&pl, &serialize_data, visit_primitive_list, &err); - ops->deserialize((void **)&pl_copy_ptr, serialize_data, visit_primitive_list, &err); + ops->serialize((void **)&pl, &serialize_data, visit_primitive_list, + &error_abort); + ops->deserialize((void **)&pl_copy_ptr, serialize_data, + visit_primitive_list, &error_abort); - g_assert(err == NULL); i = 0; /* compare our deserialized list of primitives to the original */ @@ -688,10 +651,8 @@ static void test_primitive_lists(gconstpointer opaque) g_assert_cmpint(i, ==, 33); ops->cleanup(serialize_data); - dealloc_helper(&pl, visit_primitive_list, &err); - g_assert(!err); - dealloc_helper(&pl_copy, visit_primitive_list, &err); - g_assert(!err); + dealloc_helper(&pl, visit_primitive_list, &error_abort); + dealloc_helper(&pl_copy, visit_primitive_list, &error_abort); g_free(args); } @@ -701,13 +662,12 @@ static void test_struct(gconstpointer opaque) const SerializeOps *ops = args->ops; TestStruct *ts = struct_create(); TestStruct *ts_copy = NULL; - Error *err = NULL; void *serialize_data; - ops->serialize(ts, &serialize_data, visit_struct, &err); - ops->deserialize((void **)&ts_copy, serialize_data, visit_struct, &err); + ops->serialize(ts, &serialize_data, visit_struct, &error_abort); + ops->deserialize((void **)&ts_copy, serialize_data, visit_struct, + &error_abort); - g_assert(err == NULL); struct_compare(ts, ts_copy); struct_cleanup(ts); @@ -723,14 +683,12 @@ static void test_nested_struct(gconstpointer opaque) const SerializeOps *ops = args->ops; UserDefTwo *udnp = nested_struct_create(); UserDefTwo *udnp_copy = NULL; - Error *err = NULL; void *serialize_data; - ops->serialize(udnp, &serialize_data, visit_nested_struct, &err); + ops->serialize(udnp, &serialize_data, visit_nested_struct, &error_abort); ops->deserialize((void **)&udnp_copy, serialize_data, visit_nested_struct, - &err); + &error_abort); - g_assert(err == NULL); nested_struct_compare(udnp, udnp_copy); nested_struct_cleanup(udnp); @@ -745,7 +703,6 @@ static void test_nested_struct_list(gconstpointer opaque) TestArgs *args = (TestArgs *) opaque; const SerializeOps *ops = args->ops; UserDefTwoList *listp = NULL, *tmp, *tmp_copy, *listp_copy = NULL; - Error *err = NULL; void *serialize_data; int i = 0; @@ -756,11 +713,10 @@ static void test_nested_struct_list(gconstpointer opaque) listp = tmp; } - ops->serialize(listp, &serialize_data, visit_nested_struct_list, &err); + ops->serialize(listp, &serialize_data, visit_nested_struct_list, + &error_abort); ops->deserialize((void **)&listp_copy, serialize_data, - visit_nested_struct_list, &err); - - g_assert(err == NULL); + visit_nested_struct_list, &error_abort); tmp = listp; tmp_copy = listp_copy; diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c new file mode 100644 index 0000000000..9fb09f1df4 --- /dev/null +++ b/tests/vhost-user-bridge.c @@ -0,0 +1,1398 @@ +/* + * Vhost User Bridge + * + * Copyright (c) 2015 Red Hat, Inc. + * + * Authors: + * Victor Kaplansky <victork@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +/* + * TODO: + * - main should get parameters from the command line. + * - implement all request handlers. Still not implemented: + * vubr_get_queue_num_exec() + * vubr_send_rarp_exec() + * - test for broken requests and virtqueue. + * - implement features defined by Virtio 1.0 spec. + * - support mergeable buffers and indirect descriptors. + * - implement clean shutdown. + * - implement non-blocking writes to UDP backend. + * - implement polling strategy. + * - implement clean starting/stopping of vq processing + * - implement clean starting/stopping of used and buffers + * dirty page logging. + */ + +#define _FILE_OFFSET_BITS 64 + +#include <stddef.h> +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/unistd.h> +#include <sys/mman.h> +#include <sys/eventfd.h> +#include <arpa/inet.h> +#include <ctype.h> +#include <netdb.h> + +#include <linux/vhost.h> + +#include "qemu/atomic.h" +#include "standard-headers/linux/virtio_net.h" +#include "standard-headers/linux/virtio_ring.h" + +#define VHOST_USER_BRIDGE_DEBUG 1 + +#define DPRINT(...) \ + do { \ + if (VHOST_USER_BRIDGE_DEBUG) { \ + printf(__VA_ARGS__); \ + } \ + } while (0) + +typedef void (*CallbackFunc)(int sock, void *ctx); + +typedef struct Event { + void *ctx; + CallbackFunc callback; +} Event; + +typedef struct Dispatcher { + int max_sock; + fd_set fdset; + Event events[FD_SETSIZE]; +} Dispatcher; + +static void +vubr_die(const char *s) +{ + perror(s); + exit(1); +} + +static int +dispatcher_init(Dispatcher *dispr) +{ + FD_ZERO(&dispr->fdset); + dispr->max_sock = -1; + return 0; +} + +static int +dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb) +{ + if (sock >= FD_SETSIZE) { + fprintf(stderr, + "Error: Failed to add new event. sock %d should be less than %d\n", + sock, FD_SETSIZE); + return -1; + } + + dispr->events[sock].ctx = ctx; + dispr->events[sock].callback = cb; + + FD_SET(sock, &dispr->fdset); + if (sock > dispr->max_sock) { + dispr->max_sock = sock; + } + DPRINT("Added sock %d for watching. max_sock: %d\n", + sock, dispr->max_sock); + return 0; +} + +/* dispatcher_remove() is not currently in use but may be useful + * in the future. */ +static int +dispatcher_remove(Dispatcher *dispr, int sock) +{ + if (sock >= FD_SETSIZE) { + fprintf(stderr, + "Error: Failed to remove event. sock %d should be less than %d\n", + sock, FD_SETSIZE); + return -1; + } + + FD_CLR(sock, &dispr->fdset); + DPRINT("Sock %d removed from dispatcher watch.\n", sock); + return 0; +} + +/* timeout in us */ +static int +dispatcher_wait(Dispatcher *dispr, uint32_t timeout) +{ + struct timeval tv; + tv.tv_sec = timeout / 1000000; + tv.tv_usec = timeout % 1000000; + + fd_set fdset = dispr->fdset; + + /* wait until some of sockets become readable. */ + int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv); + + if (rc == -1) { + vubr_die("select"); + } + + /* Timeout */ + if (rc == 0) { + return 0; + } + + /* Now call callback for every ready socket. */ + + int sock; + for (sock = 0; sock < dispr->max_sock + 1; sock++) { + /* The callback on a socket can remove other sockets from the + * dispatcher, thus we have to check that the socket is + * still not removed from dispatcher's list + */ + if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) { + Event *e = &dispr->events[sock]; + e->callback(sock, e->ctx); + } + } + + return 0; +} + +typedef struct VubrVirtq { + int call_fd; + int kick_fd; + uint32_t size; + uint16_t last_avail_index; + uint16_t last_used_index; + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; + uint64_t log_guest_addr; + int enable; +} VubrVirtq; + +/* Based on qemu/hw/virtio/vhost-user.c */ + +#define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_USER_F_PROTOCOL_FEATURES 30 + +#define VHOST_LOG_PAGE 4096 + +enum VhostUserProtocolFeature { + VHOST_USER_PROTOCOL_F_MQ = 0, + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, + VHOST_USER_PROTOCOL_F_RARP = 2, + + VHOST_USER_PROTOCOL_F_MAX +}; + +#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_MAX +} VhostUserRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; + uint64_t mmap_offset; +} VhostUserMemoryRegion; + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + +typedef struct VhostUserMsg { + VhostUserRequest request; + +#define VHOST_USER_VERSION_MASK (0x3) +#define VHOST_USER_REPLY_MASK (0x1<<2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK (0xff) +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + VhostUserLog log; + } payload; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fd_num; +} QEMU_PACKED VhostUserMsg; + +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION (0x1) + +#define MAX_NR_VIRTQUEUE (8) + +typedef struct VubrDevRegion { + /* Guest Physical address. */ + uint64_t gpa; + /* Memory region size. */ + uint64_t size; + /* QEMU virtual address (userspace). */ + uint64_t qva; + /* Starting offset in our mmaped space. */ + uint64_t mmap_offset; + /* Start address of mmaped space. */ + uint64_t mmap_addr; +} VubrDevRegion; + +typedef struct VubrDev { + int sock; + Dispatcher dispatcher; + uint32_t nregions; + VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; + VubrVirtq vq[MAX_NR_VIRTQUEUE]; + int log_call_fd; + uint64_t log_size; + uint8_t *log_table; + int backend_udp_sock; + struct sockaddr_in backend_udp_dest; + int ready; + uint64_t features; +} VubrDev; + +static const char *vubr_request_str[] = { + [VHOST_USER_NONE] = "VHOST_USER_NONE", + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", + [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", + [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", + [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", + [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", + [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", + [VHOST_USER_MAX] = "VHOST_USER_MAX", +}; + +static void +print_buffer(uint8_t *buf, size_t len) +{ + int i; + printf("Raw buffer:\n"); + for (i = 0; i < len; i++) { + if (i % 16 == 0) { + printf("\n"); + } + if (i % 4 == 0) { + printf(" "); + } + printf("%02x ", buf[i]); + } + printf("\n............................................................\n"); +} + +/* Translate guest physical address to our virtual address. */ +static uint64_t +gpa_to_va(VubrDev *dev, uint64_t guest_addr) +{ + int i; + + /* Find matching memory region. */ + for (i = 0; i < dev->nregions; i++) { + VubrDevRegion *r = &dev->regions[i]; + + if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) { + return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset; + } + } + + assert(!"address not found in regions"); + return 0; +} + +/* Translate qemu virtual address to our virtual address. */ +static uint64_t +qva_to_va(VubrDev *dev, uint64_t qemu_addr) +{ + int i; + + /* Find matching memory region. */ + for (i = 0; i < dev->nregions; i++) { + VubrDevRegion *r = &dev->regions[i]; + + if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) { + return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset; + } + } + + assert(!"address not found in regions"); + return 0; +} + +static void +vubr_message_read(int conn_fd, VhostUserMsg *vmsg) +{ + char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { }; + struct iovec iov = { + .iov_base = (char *)vmsg, + .iov_len = VHOST_USER_HDR_SIZE, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = control, + .msg_controllen = sizeof(control), + }; + size_t fd_size; + struct cmsghdr *cmsg; + int rc; + + rc = recvmsg(conn_fd, &msg, 0); + + if (rc == 0) { + vubr_die("recvmsg"); + fprintf(stderr, "Peer disconnected.\n"); + exit(1); + } + if (rc < 0) { + vubr_die("recvmsg"); + } + + vmsg->fd_num = 0; + for (cmsg = CMSG_FIRSTHDR(&msg); + cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) + { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { + fd_size = cmsg->cmsg_len - CMSG_LEN(0); + vmsg->fd_num = fd_size / sizeof(int); + memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); + break; + } + } + + if (vmsg->size > sizeof(vmsg->payload)) { + fprintf(stderr, + "Error: too big message request: %d, size: vmsg->size: %u, " + "while sizeof(vmsg->payload) = %lu\n", + vmsg->request, vmsg->size, sizeof(vmsg->payload)); + exit(1); + } + + if (vmsg->size) { + rc = read(conn_fd, &vmsg->payload, vmsg->size); + if (rc == 0) { + vubr_die("recvmsg"); + fprintf(stderr, "Peer disconnected.\n"); + exit(1); + } + if (rc < 0) { + vubr_die("recvmsg"); + } + + assert(rc == vmsg->size); + } +} + +static void +vubr_message_write(int conn_fd, VhostUserMsg *vmsg) +{ + int rc; + + do { + rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size); + } while (rc < 0 && errno == EINTR); + + if (rc < 0) { + vubr_die("write"); + } +} + +static void +vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len) +{ + int slen = sizeof(struct sockaddr_in); + + if (sendto(dev->backend_udp_sock, buf, len, 0, + (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) { + vubr_die("sendto()"); + } +} + +static int +vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen) +{ + int slen = sizeof(struct sockaddr_in); + int rc; + + rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0, + (struct sockaddr *) &dev->backend_udp_dest, + (socklen_t *)&slen); + if (rc == -1) { + vubr_die("recvfrom()"); + } + + return rc; +} + +static void +vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len) +{ + int hdrlen = sizeof(struct virtio_net_hdr_v1); + + if (VHOST_USER_BRIDGE_DEBUG) { + print_buffer(buf, len); + } + vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen); +} + +/* Kick the log_call_fd if required. */ +static void +vubr_log_kick(VubrDev *dev) +{ + if (dev->log_call_fd != -1) { + DPRINT("Kicking the QEMU's log...\n"); + eventfd_write(dev->log_call_fd, 1); + } +} + +/* Kick the guest if necessary. */ +static void +vubr_virtqueue_kick(VubrVirtq *vq) +{ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { + DPRINT("Kicking the guest...\n"); + eventfd_write(vq->call_fd, 1); + } +} + +static void +vubr_log_page(uint8_t *log_table, uint64_t page) +{ + DPRINT("Logged dirty guest page: %"PRId64"\n", page); + atomic_or(&log_table[page / 8], 1 << (page % 8)); +} + +static void +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length) +{ + uint64_t page; + + if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) || + !dev->log_table || !length) { + return; + } + + assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8)); + + page = address / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < address + length) { + vubr_log_page(dev->log_table, page); + page += VHOST_LOG_PAGE; + } + vubr_log_kick(dev); +} + +static void +vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) +{ + struct vring_desc *desc = vq->desc; + struct vring_avail *avail = vq->avail; + struct vring_used *used = vq->used; + uint64_t log_guest_addr = vq->log_guest_addr; + + unsigned int size = vq->size; + + uint16_t avail_index = atomic_mb_read(&avail->idx); + + /* We check the available descriptors before posting the + * buffer, so here we assume that enough available + * descriptors. */ + assert(vq->last_avail_index != avail_index); + uint16_t a_index = vq->last_avail_index % size; + uint16_t u_index = vq->last_used_index % size; + uint16_t d_index = avail->ring[a_index]; + + int i = d_index; + + DPRINT("Post packet to guest on vq:\n"); + DPRINT(" size = %d\n", vq->size); + DPRINT(" last_avail_index = %d\n", vq->last_avail_index); + DPRINT(" last_used_index = %d\n", vq->last_used_index); + DPRINT(" a_index = %d\n", a_index); + DPRINT(" u_index = %d\n", u_index); + DPRINT(" d_index = %d\n", d_index); + DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr); + DPRINT(" desc[%d].len = %d\n", i, desc[i].len); + DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags); + DPRINT(" avail->idx = %d\n", avail_index); + DPRINT(" used->idx = %d\n", used->idx); + + if (!(desc[i].flags & VRING_DESC_F_WRITE)) { + /* FIXME: we should find writable descriptor. */ + fprintf(stderr, "Error: descriptor is not writable. Exiting.\n"); + exit(1); + } + + void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr); + uint32_t chunk_len = desc[i].len; + + if (len <= chunk_len) { + memcpy(chunk_start, buf, len); + vubr_log_write(dev, desc[i].addr, len); + } else { + fprintf(stderr, + "Received too long packet from the backend. Dropping...\n"); + return; + } + + /* Add descriptor to the used ring. */ + used->ring[u_index].id = d_index; + used->ring[u_index].len = len; + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, ring[u_index]), + sizeof(used->ring[u_index])); + + vq->last_avail_index++; + vq->last_used_index++; + + atomic_mb_set(&used->idx, vq->last_used_index); + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, idx), + sizeof(used->idx)); + + /* Kick the guest if necessary. */ + vubr_virtqueue_kick(vq); +} + +static int +vubr_process_desc(VubrDev *dev, VubrVirtq *vq) +{ + struct vring_desc *desc = vq->desc; + struct vring_avail *avail = vq->avail; + struct vring_used *used = vq->used; + uint64_t log_guest_addr = vq->log_guest_addr; + + unsigned int size = vq->size; + + uint16_t a_index = vq->last_avail_index % size; + uint16_t u_index = vq->last_used_index % size; + uint16_t d_index = avail->ring[a_index]; + + uint32_t i, len = 0; + size_t buf_size = 4096; + uint8_t buf[4096]; + + DPRINT("Chunks: "); + i = d_index; + do { + void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr); + uint32_t chunk_len = desc[i].len; + + assert(!(desc[i].flags & VRING_DESC_F_WRITE)); + + if (len + chunk_len < buf_size) { + memcpy(buf + len, chunk_start, chunk_len); + DPRINT("%d ", chunk_len); + } else { + fprintf(stderr, "Error: too long packet. Dropping...\n"); + break; + } + + len += chunk_len; + + if (!(desc[i].flags & VRING_DESC_F_NEXT)) { + break; + } + + i = desc[i].next; + } while (1); + DPRINT("\n"); + + if (!len) { + return -1; + } + + /* Add descriptor to the used ring. */ + used->ring[u_index].id = d_index; + used->ring[u_index].len = len; + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, ring[u_index]), + sizeof(used->ring[u_index])); + + vubr_consume_raw_packet(dev, buf, len); + + return 0; +} + +static void +vubr_process_avail(VubrDev *dev, VubrVirtq *vq) +{ + struct vring_avail *avail = vq->avail; + struct vring_used *used = vq->used; + uint64_t log_guest_addr = vq->log_guest_addr; + + while (vq->last_avail_index != atomic_mb_read(&avail->idx)) { + vubr_process_desc(dev, vq); + vq->last_avail_index++; + vq->last_used_index++; + } + + atomic_mb_set(&used->idx, vq->last_used_index); + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, idx), + sizeof(used->idx)); +} + +static void +vubr_backend_recv_cb(int sock, void *ctx) +{ + VubrDev *dev = (VubrDev *) ctx; + VubrVirtq *rx_vq = &dev->vq[0]; + uint8_t buf[4096]; + struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf; + int hdrlen = sizeof(struct virtio_net_hdr_v1); + int buflen = sizeof(buf); + int len; + + if (!dev->ready) { + return; + } + + DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n"); + + uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx); + + /* If there is no available descriptors, just do nothing. + * The buffer will be handled by next arrived UDP packet, + * or next kick on receive virtq. */ + if (rx_vq->last_avail_index == avail_index) { + DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n"); + return; + } + + len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen); + + *hdr = (struct virtio_net_hdr_v1) { }; + hdr->num_buffers = 1; + vubr_post_buffer(dev, rx_vq, buf, len + hdrlen); +} + +static void +vubr_kick_cb(int sock, void *ctx) +{ + VubrDev *dev = (VubrDev *) ctx; + eventfd_t kick_data; + ssize_t rc; + + rc = eventfd_read(sock, &kick_data); + if (rc == -1) { + vubr_die("eventfd_read()"); + } else { + DPRINT("Got kick_data: %016"PRIx64"\n", kick_data); + vubr_process_avail(dev, &dev->vq[1]); + } +} + +static int +vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + DPRINT("Function %s() not implemented yet.\n", __func__); + return 0; +} + +static int +vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + vmsg->payload.u64 = + ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | + (1ULL << VHOST_F_LOG_ALL) | + (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)); + + vmsg->size = sizeof(vmsg->payload.u64); + + DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64); + + /* Reply */ + return 1; +} + +static int +vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); + dev->features = vmsg->payload.u64; + return 0; +} + +static int +vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + return 0; +} + +static void +vubr_close_log(VubrDev *dev) +{ + if (dev->log_table) { + if (munmap(dev->log_table, dev->log_size) != 0) { + vubr_die("munmap()"); + } + + dev->log_table = 0; + } + if (dev->log_call_fd != -1) { + close(dev->log_call_fd); + dev->log_call_fd = -1; + } +} + +static int +vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + vubr_close_log(dev); + dev->ready = 0; + dev->features = 0; + return 0; +} + +static int +vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + int i; + VhostUserMemory *memory = &vmsg->payload.memory; + dev->nregions = memory->nregions; + + DPRINT("Nregions: %d\n", memory->nregions); + for (i = 0; i < dev->nregions; i++) { + void *mmap_addr; + VhostUserMemoryRegion *msg_region = &memory->regions[i]; + VubrDevRegion *dev_region = &dev->regions[i]; + + DPRINT("Region %d\n", i); + DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", + msg_region->guest_phys_addr); + DPRINT(" memory_size: 0x%016"PRIx64"\n", + msg_region->memory_size); + DPRINT(" userspace_addr 0x%016"PRIx64"\n", + msg_region->userspace_addr); + DPRINT(" mmap_offset 0x%016"PRIx64"\n", + msg_region->mmap_offset); + + dev_region->gpa = msg_region->guest_phys_addr; + dev_region->size = msg_region->memory_size; + dev_region->qva = msg_region->userspace_addr; + dev_region->mmap_offset = msg_region->mmap_offset; + + /* We don't use offset argument of mmap() since the + * mapped address has to be page aligned, and we use huge + * pages. */ + mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, + PROT_READ | PROT_WRITE, MAP_SHARED, + vmsg->fds[i], 0); + + if (mmap_addr == MAP_FAILED) { + vubr_die("mmap"); + } + dev_region->mmap_addr = (uint64_t) mmap_addr; + DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr); + + close(vmsg->fds[i]); + } + + return 0; +} + +static int +vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + int fd; + uint64_t log_mmap_size, log_mmap_offset; + void *rc; + + assert(vmsg->fd_num == 1); + fd = vmsg->fds[0]; + + assert(vmsg->size == sizeof(vmsg->payload.log)); + log_mmap_offset = vmsg->payload.log.mmap_offset; + log_mmap_size = vmsg->payload.log.mmap_size; + DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset); + DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size); + + rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, + log_mmap_offset); + if (rc == MAP_FAILED) { + vubr_die("mmap"); + } + dev->log_table = rc; + dev->log_size = log_mmap_size; + + vmsg->size = sizeof(vmsg->payload.u64); + /* Reply */ + return 1; +} + +static int +vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + assert(vmsg->fd_num == 1); + dev->log_call_fd = vmsg->fds[0]; + DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]); + return 0; +} + +static int +vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + unsigned int index = vmsg->payload.state.index; + unsigned int num = vmsg->payload.state.num; + + DPRINT("State.index: %d\n", index); + DPRINT("State.num: %d\n", num); + dev->vq[index].size = num; + return 0; +} + +static int +vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + struct vhost_vring_addr *vra = &vmsg->payload.addr; + unsigned int index = vra->index; + VubrVirtq *vq = &dev->vq[index]; + + DPRINT("vhost_vring_addr:\n"); + DPRINT(" index: %d\n", vra->index); + DPRINT(" flags: %d\n", vra->flags); + DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr); + DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr); + DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr); + DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr); + + vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr); + vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr); + vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr); + vq->log_guest_addr = vra->log_guest_addr; + + DPRINT("Setting virtq addresses:\n"); + DPRINT(" vring_desc at %p\n", vq->desc); + DPRINT(" vring_used at %p\n", vq->used); + DPRINT(" vring_avail at %p\n", vq->avail); + + vq->last_used_index = vq->used->idx; + return 0; +} + +static int +vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + unsigned int index = vmsg->payload.state.index; + unsigned int num = vmsg->payload.state.num; + + DPRINT("State.index: %d\n", index); + DPRINT("State.num: %d\n", num); + dev->vq[index].last_avail_index = num; + + return 0; +} + +static int +vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + unsigned int index = vmsg->payload.state.index; + + DPRINT("State.index: %d\n", index); + vmsg->payload.state.num = dev->vq[index].last_avail_index; + vmsg->size = sizeof(vmsg->payload.state); + /* FIXME: this is a work-around for a bug in QEMU enabling + * too early vrings. When protocol features are enabled, + * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */ + dev->ready = 0; + + if (dev->vq[index].call_fd != -1) { + close(dev->vq[index].call_fd); + dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd); + dev->vq[index].call_fd = -1; + } + if (dev->vq[index].kick_fd != -1) { + close(dev->vq[index].kick_fd); + dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd); + dev->vq[index].kick_fd = -1; + } + + /* Reply */ + return 1; +} + +static int +vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + uint64_t u64_arg = vmsg->payload.u64; + int index = u64_arg & VHOST_USER_VRING_IDX_MASK; + + DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); + + assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); + assert(vmsg->fd_num == 1); + + if (dev->vq[index].kick_fd != -1) { + close(dev->vq[index].kick_fd); + dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd); + } + dev->vq[index].kick_fd = vmsg->fds[0]; + DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index); + + if (index % 2 == 1) { + /* TX queue. */ + dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd, + dev, vubr_kick_cb); + + DPRINT("Waiting for kicks on fd: %d for vq: %d\n", + dev->vq[index].kick_fd, index); + } + /* We temporarily use this hack to determine that both TX and RX + * queues are set up and ready for processing. + * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and + * actual kicks. */ + if (dev->vq[0].kick_fd != -1 && + dev->vq[1].kick_fd != -1) { + dev->ready = 1; + DPRINT("vhost-user-bridge is ready for processing queues.\n"); + } + return 0; + +} + +static int +vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + uint64_t u64_arg = vmsg->payload.u64; + int index = u64_arg & VHOST_USER_VRING_IDX_MASK; + + DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); + assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); + assert(vmsg->fd_num == 1); + + if (dev->vq[index].call_fd != -1) { + close(dev->vq[index].call_fd); + dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd); + } + dev->vq[index].call_fd = vmsg->fds[0]; + DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index); + + return 0; +} + +static int +vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); + return 0; +} + +static int +vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD; + DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); + vmsg->size = sizeof(vmsg->payload.u64); + + /* Reply */ + return 1; +} + +static int +vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + /* FIXME: unimplented */ + DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); + return 0; +} + +static int +vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + DPRINT("Function %s() not implemented yet.\n", __func__); + return 0; +} + +static int +vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + unsigned int index = vmsg->payload.state.index; + unsigned int enable = vmsg->payload.state.num; + + DPRINT("State.index: %d\n", index); + DPRINT("State.enable: %d\n", enable); + dev->vq[index].enable = enable; + return 0; +} + +static int +vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg) +{ + DPRINT("Function %s() not implemented yet.\n", __func__); + return 0; +} + +static int +vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg) +{ + /* Print out generic part of the request. */ + DPRINT( + "================== Vhost user message from QEMU ==================\n"); + DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request], + vmsg->request); + DPRINT("Flags: 0x%x\n", vmsg->flags); + DPRINT("Size: %d\n", vmsg->size); + + if (vmsg->fd_num) { + int i; + DPRINT("Fds:"); + for (i = 0; i < vmsg->fd_num; i++) { + DPRINT(" %d", vmsg->fds[i]); + } + DPRINT("\n"); + } + + switch (vmsg->request) { + case VHOST_USER_NONE: + return vubr_none_exec(dev, vmsg); + case VHOST_USER_GET_FEATURES: + return vubr_get_features_exec(dev, vmsg); + case VHOST_USER_SET_FEATURES: + return vubr_set_features_exec(dev, vmsg); + case VHOST_USER_SET_OWNER: + return vubr_set_owner_exec(dev, vmsg); + case VHOST_USER_RESET_OWNER: + return vubr_reset_device_exec(dev, vmsg); + case VHOST_USER_SET_MEM_TABLE: + return vubr_set_mem_table_exec(dev, vmsg); + case VHOST_USER_SET_LOG_BASE: + return vubr_set_log_base_exec(dev, vmsg); + case VHOST_USER_SET_LOG_FD: + return vubr_set_log_fd_exec(dev, vmsg); + case VHOST_USER_SET_VRING_NUM: + return vubr_set_vring_num_exec(dev, vmsg); + case VHOST_USER_SET_VRING_ADDR: + return vubr_set_vring_addr_exec(dev, vmsg); + case VHOST_USER_SET_VRING_BASE: + return vubr_set_vring_base_exec(dev, vmsg); + case VHOST_USER_GET_VRING_BASE: + return vubr_get_vring_base_exec(dev, vmsg); + case VHOST_USER_SET_VRING_KICK: + return vubr_set_vring_kick_exec(dev, vmsg); + case VHOST_USER_SET_VRING_CALL: + return vubr_set_vring_call_exec(dev, vmsg); + case VHOST_USER_SET_VRING_ERR: + return vubr_set_vring_err_exec(dev, vmsg); + case VHOST_USER_GET_PROTOCOL_FEATURES: + return vubr_get_protocol_features_exec(dev, vmsg); + case VHOST_USER_SET_PROTOCOL_FEATURES: + return vubr_set_protocol_features_exec(dev, vmsg); + case VHOST_USER_GET_QUEUE_NUM: + return vubr_get_queue_num_exec(dev, vmsg); + case VHOST_USER_SET_VRING_ENABLE: + return vubr_set_vring_enable_exec(dev, vmsg); + case VHOST_USER_SEND_RARP: + return vubr_send_rarp_exec(dev, vmsg); + + case VHOST_USER_MAX: + assert(vmsg->request != VHOST_USER_MAX); + } + return 0; +} + +static void +vubr_receive_cb(int sock, void *ctx) +{ + VubrDev *dev = (VubrDev *) ctx; + VhostUserMsg vmsg; + int reply_requested; + + vubr_message_read(sock, &vmsg); + reply_requested = vubr_execute_request(dev, &vmsg); + if (reply_requested) { + /* Set the version in the flags when sending the reply */ + vmsg.flags &= ~VHOST_USER_VERSION_MASK; + vmsg.flags |= VHOST_USER_VERSION; + vmsg.flags |= VHOST_USER_REPLY_MASK; + vubr_message_write(sock, &vmsg); + } +} + +static void +vubr_accept_cb(int sock, void *ctx) +{ + VubrDev *dev = (VubrDev *)ctx; + int conn_fd; + struct sockaddr_un un; + socklen_t len = sizeof(un); + + conn_fd = accept(sock, (struct sockaddr *) &un, &len); + if (conn_fd == -1) { + vubr_die("accept()"); + } + DPRINT("Got connection from remote peer on sock %d\n", conn_fd); + dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb); +} + +static VubrDev * +vubr_new(const char *path) +{ + VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev)); + dev->nregions = 0; + int i; + struct sockaddr_un un; + size_t len; + + for (i = 0; i < MAX_NR_VIRTQUEUE; i++) { + dev->vq[i] = (VubrVirtq) { + .call_fd = -1, .kick_fd = -1, + .size = 0, + .last_avail_index = 0, .last_used_index = 0, + .desc = 0, .avail = 0, .used = 0, + .enable = 0, + }; + } + + /* Init log */ + dev->log_call_fd = -1; + dev->log_size = 0; + dev->log_table = 0; + dev->ready = 0; + dev->features = 0; + + /* Get a UNIX socket. */ + dev->sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (dev->sock == -1) { + vubr_die("socket"); + } + + un.sun_family = AF_UNIX; + strcpy(un.sun_path, path); + len = sizeof(un.sun_family) + strlen(path); + unlink(path); + + if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) { + vubr_die("bind"); + } + + if (listen(dev->sock, 1) == -1) { + vubr_die("listen"); + } + + dispatcher_init(&dev->dispatcher); + dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, + vubr_accept_cb); + + DPRINT("Waiting for connections on UNIX socket %s ...\n", path); + return dev; +} + +static void +vubr_set_host(struct sockaddr_in *saddr, const char *host) +{ + if (isdigit(host[0])) { + if (!inet_aton(host, &saddr->sin_addr)) { + fprintf(stderr, "inet_aton() failed.\n"); + exit(1); + } + } else { + struct hostent *he = gethostbyname(host); + + if (!he) { + fprintf(stderr, "gethostbyname() failed.\n"); + exit(1); + } + saddr->sin_addr = *(struct in_addr *)he->h_addr; + } +} + +static void +vubr_backend_udp_setup(VubrDev *dev, + const char *local_host, + const char *local_port, + const char *remote_host, + const char *remote_port) +{ + int sock; + const char *r; + + int lport, rport; + + lport = strtol(local_port, (char **)&r, 0); + if (r == local_port) { + fprintf(stderr, "lport parsing failed.\n"); + exit(1); + } + + rport = strtol(remote_port, (char **)&r, 0); + if (r == remote_port) { + fprintf(stderr, "rport parsing failed.\n"); + exit(1); + } + + struct sockaddr_in si_local = { + .sin_family = AF_INET, + .sin_port = htons(lport), + }; + + vubr_set_host(&si_local, local_host); + + /* setup destination for sends */ + dev->backend_udp_dest = (struct sockaddr_in) { + .sin_family = AF_INET, + .sin_port = htons(rport), + }; + vubr_set_host(&dev->backend_udp_dest, remote_host); + + sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (sock == -1) { + vubr_die("socket"); + } + + if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) { + vubr_die("bind"); + } + + dev->backend_udp_sock = sock; + dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb); + DPRINT("Waiting for data from udp backend on %s:%d...\n", + local_host, lport); +} + +static void +vubr_run(VubrDev *dev) +{ + while (1) { + /* timeout 200ms */ + dispatcher_wait(&dev->dispatcher, 200000); + /* Here one can try polling strategy. */ + } +} + +static int +vubr_parse_host_port(const char **host, const char **port, const char *buf) +{ + char *p = strchr(buf, ':'); + + if (!p) { + return -1; + } + *p = '\0'; + *host = strdup(buf); + *port = strdup(p + 1); + return 0; +} + +#define DEFAULT_UD_SOCKET "/tmp/vubr.sock" +#define DEFAULT_LHOST "127.0.0.1" +#define DEFAULT_LPORT "4444" +#define DEFAULT_RHOST "127.0.0.1" +#define DEFAULT_RPORT "5555" + +static const char *ud_socket_path = DEFAULT_UD_SOCKET; +static const char *lhost = DEFAULT_LHOST; +static const char *lport = DEFAULT_LPORT; +static const char *rhost = DEFAULT_RHOST; +static const char *rport = DEFAULT_RPORT; + +int +main(int argc, char *argv[]) +{ + VubrDev *dev; + int opt; + + while ((opt = getopt(argc, argv, "l:r:u:")) != -1) { + + switch (opt) { + case 'l': + if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) { + goto out; + } + break; + case 'r': + if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) { + goto out; + } + break; + case 'u': + ud_socket_path = strdup(optarg); + break; + default: + goto out; + } + } + + DPRINT("ud socket: %s\n", ud_socket_path); + DPRINT("local: %s:%s\n", lhost, lport); + DPRINT("remote: %s:%s\n", rhost, rport); + + dev = vubr_new(ud_socket_path); + if (!dev) { + return 1; + } + + vubr_backend_udp_setup(dev, lhost, lport, rhost, rport); + vubr_run(dev); + return 0; + +out: + fprintf(stderr, "Usage: %s ", argv[0]); + fprintf(stderr, "[-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n"); + fprintf(stderr, "\t-u path to unix doman socket. default: %s\n", + DEFAULT_UD_SOCKET); + fprintf(stderr, "\t-l local host and port. default: %s:%s\n", + DEFAULT_LHOST, DEFAULT_LPORT); + fprintf(stderr, "\t-r remote host and port. default: %s:%s\n", + DEFAULT_RHOST, DEFAULT_RPORT); + + return 1; +} diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index a74c934cc0..991fd85c7c 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -57,7 +57,7 @@ typedef enum VhostUserRequest { VHOST_USER_GET_FEATURES = 1, VHOST_USER_SET_FEATURES = 2, VHOST_USER_SET_OWNER = 3, - VHOST_USER_RESET_DEVICE = 4, + VHOST_USER_RESET_OWNER = 4, VHOST_USER_SET_MEM_TABLE = 5, VHOST_USER_SET_LOG_BASE = 6, VHOST_USER_SET_LOG_FD = 7, @@ -70,6 +70,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_ERR = 14, VHOST_USER_GET_PROTOCOL_FEATURES = 15, VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_SET_VRING_ENABLE = 18, VHOST_USER_MAX } VhostUserRequest; @@ -86,6 +87,11 @@ typedef struct VhostUserMemory { VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; } VhostUserMemory; +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + typedef struct VhostUserMsg { VhostUserRequest request; @@ -94,11 +100,14 @@ typedef struct VhostUserMsg { uint32_t flags; uint32_t size; /* the following payload size */ union { +#define VHOST_USER_VRING_IDX_MASK (0xff) +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) uint64_t u64; struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; - }; + VhostUserLog log; + } payload; } QEMU_PACKED VhostUserMsg; static VhostUserMsg m __attribute__ ((unused)); @@ -114,6 +123,7 @@ static VhostUserMsg m __attribute__ ((unused)); typedef struct TestServer { gchar *socket_path; + gchar *mig_path; gchar *chr_name; CharDriverState *chr; int fds_num; @@ -122,6 +132,7 @@ typedef struct TestServer { GMutex data_mutex; GCond data_cond; int log_fd; + uint64_t rings; } TestServer; #if !GLIB_CHECK_VERSION(2, 32, 0) @@ -162,8 +173,9 @@ static void wait_for_fds(TestServer *s) g_mutex_unlock(&s->data_mutex); } -static void read_guest_mem(TestServer *s) +static void read_guest_mem(const void *data) { + TestServer *s = (void *)data; uint32_t *guest_mem; int i, j; size_t size; @@ -206,8 +218,7 @@ static void read_guest_mem(TestServer *s) static void *thread_function(void *data) { - GMainLoop *loop; - loop = g_main_loop_new(NULL, FALSE); + GMainLoop *loop = data; g_main_loop_run(loop); return NULL; } @@ -242,23 +253,23 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) case VHOST_USER_GET_FEATURES: /* send back features to qemu */ msg.flags |= VHOST_USER_REPLY_MASK; - msg.size = sizeof(m.u64); - msg.u64 = 0x1ULL << VHOST_F_LOG_ALL | + msg.size = sizeof(m.payload.u64); + msg.payload.u64 = 0x1ULL << VHOST_F_LOG_ALL | 0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES; p = (uint8_t *) &msg; qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); break; case VHOST_USER_SET_FEATURES: - g_assert_cmpint(msg.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES), + g_assert_cmpint(msg.payload.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES), !=, 0ULL); break; case VHOST_USER_GET_PROTOCOL_FEATURES: /* send back features to qemu */ msg.flags |= VHOST_USER_REPLY_MASK; - msg.size = sizeof(m.u64); - msg.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD; + msg.size = sizeof(m.payload.u64); + msg.payload.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD; p = (uint8_t *) &msg; qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); break; @@ -266,15 +277,18 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) case VHOST_USER_GET_VRING_BASE: /* send back vring base to qemu */ msg.flags |= VHOST_USER_REPLY_MASK; - msg.size = sizeof(m.state); - msg.state.num = 0; + msg.size = sizeof(m.payload.state); + msg.payload.state.num = 0; p = (uint8_t *) &msg; qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); + + assert(msg.payload.state.index < 2); + s->rings &= ~(0x1ULL << msg.payload.state.index); break; case VHOST_USER_SET_MEM_TABLE: /* received the mem table */ - memcpy(&s->memory, &msg.memory, sizeof(msg.memory)); + memcpy(&s->memory, &msg.payload.memory, sizeof(msg.payload.memory)); s->fds_num = qemu_chr_fe_get_msgfds(chr, s->fds, G_N_ELEMENTS(s->fds)); /* signal the test that it can continue */ @@ -307,8 +321,9 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) g_cond_signal(&s->data_cond); break; - case VHOST_USER_RESET_DEVICE: - s->fds_num = 0; + case VHOST_USER_SET_VRING_BASE: + assert(msg.payload.state.index < 2); + s->rings |= 0x1ULL << msg.payload.state.index; break; default: @@ -351,6 +366,7 @@ static TestServer *test_server_new(const gchar *name) gchar *chr_path; server->socket_path = g_strdup_printf("%s/%s.sock", tmpfs, name); + server->mig_path = g_strdup_printf("%s/%s.mig", tmpfs, name); chr_path = g_strdup_printf("unix:%s,server,nowait", server->socket_path); server->chr_name = g_strdup_printf("chr-%s", name); @@ -375,7 +391,7 @@ static TestServer *test_server_new(const gchar *name) g_strdup_printf(QEMU_CMD extra, (mem), (mem), (root), (s)->chr_name, \ (s)->socket_path, (s)->chr_name, ##__VA_ARGS__) -static void test_server_free(TestServer *server) +static gboolean _test_server_free(TestServer *server) { int i; @@ -392,9 +408,18 @@ static void test_server_free(TestServer *server) unlink(server->socket_path); g_free(server->socket_path); + unlink(server->mig_path); + g_free(server->mig_path); g_free(server->chr_name); g_free(server); + + return FALSE; +} + +static void test_server_free(TestServer *server) +{ + g_idle_add((GSourceFunc)_test_server_free, server); } static void wait_for_log_fd(TestServer *s) @@ -475,25 +500,36 @@ static gboolean test_migrate_source_check(GSource *source) { TestMigrateSource *t = (TestMigrateSource *)source; - gboolean overlap = t->src->fds_num > 0 && t->dest->fds_num > 0; + gboolean overlap = t->src->rings && t->dest->rings; g_assert(!overlap); return FALSE; } +#if !GLIB_CHECK_VERSION(2,36,0) +/* this callback is unnecessary with glib >2.36, the default + * prepare for the source does the same */ +static gboolean +test_migrate_source_prepare(GSource *source, gint *timeout) +{ + *timeout = -1; + return FALSE; +} +#endif + GSourceFuncs test_migrate_source_funcs = { - NULL, - test_migrate_source_check, - NULL, - NULL +#if !GLIB_CHECK_VERSION(2,36,0) + .prepare = test_migrate_source_prepare, +#endif + .check = test_migrate_source_check, }; static void test_migrate(void) { TestServer *s = test_server_new("src"); TestServer *dest = test_server_new("dest"); - const char *uri = "tcp:127.0.0.1:1234"; + char *uri = g_strdup_printf("%s%s", "unix:", dest->mig_path); QTestState *global = global_qtest, *from, *to; GSource *source; gchar *cmd; @@ -564,6 +600,7 @@ static void test_migrate(void) test_server_free(dest); qtest_quit(from); test_server_free(s); + g_free(uri); global_qtest = global; } @@ -576,6 +613,8 @@ int main(int argc, char **argv) char *qemu_cmd = NULL; int ret; char template[] = "/tmp/vhost-test-XXXXXX"; + GMainLoop *loop; + GThread *thread; g_test_init(&argc, &argv, NULL); @@ -598,8 +637,9 @@ int main(int argc, char **argv) server = test_server_new("test"); + loop = g_main_loop_new(NULL, FALSE); /* run the main loop thread so the chardev may operate */ - g_thread_new(NULL, thread_function, NULL); + thread = g_thread_new(NULL, thread_function, loop); qemu_cmd = GET_QEMU_CMD(server); @@ -618,6 +658,14 @@ int main(int argc, char **argv) /* cleanup */ test_server_free(server); + /* finish the helper thread and dispatch pending sources */ + g_main_loop_quit(loop); + g_thread_join(thread); + while (g_main_context_pending(NULL)) { + g_main_context_iteration (NULL, TRUE); + } + g_main_loop_unref(loop); + ret = rmdir(tmpfs); if (ret != 0) { g_test_message("unable to rmdir: path (%s): %s\n", @@ -260,9 +260,9 @@ static TPMInfo *qmp_query_tpm_inst(TPMBackend *drv) switch (drv->ops->type) { case TPM_TYPE_PASSTHROUGH: - res->options->kind = TPM_TYPE_OPTIONS_KIND_PASSTHROUGH; + res->options->type = TPM_TYPE_OPTIONS_KIND_PASSTHROUGH; tpo = g_new0(TPMPassthroughOptions, 1); - res->options->passthrough = tpo; + res->options->u.passthrough = tpo; if (drv->path) { tpo->path = g_strdup(drv->path); tpo->has_path = true; diff --git a/trace-events b/trace-events index bdfe79f359..2fce98e762 100644 --- a/trace-events +++ b/trace-events @@ -69,7 +69,7 @@ bdrv_aio_write_zeroes(void *bs, int64_t sector_num, int nb_sectors, int flags, v bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d" bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" -bdrv_co_no_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" +bdrv_co_readv_no_serialising(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_write_zeroes(void *bs, int64_t sector_num, int nb_sector, int flags) "bs %p sector_num %"PRId64" nb_sectors %d flags %#x" bdrv_co_io_em(void *bs, int64_t sector_num, int nb_sectors, int is_write, void *acb) "bs %p sector_num %"PRId64" nb_sectors %d is_write %d acb %p" @@ -139,8 +139,8 @@ paio_submit_co(int64_t sector_num, int nb_sectors, int type) "sector_num %"PRId6 paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d" # ioport.c -cpu_in(unsigned int addr, unsigned int val) "addr %#x value %u" -cpu_out(unsigned int addr, unsigned int val) "addr %#x value %u" +cpu_in(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u" +cpu_out(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u" # balloon.c # Since requests are raised via monitor, not many tracepoints are needed. @@ -1031,9 +1031,9 @@ esp_pci_sbac_write(uint32_t reg, uint32_t val) "sbac: 0x%8.8x -> 0x%8.8x" # monitor.c handle_qmp_command(void *mon, const char *cmd_name) "mon %p cmd_name \"%s\"" monitor_protocol_emitter(void *mon) "mon %p" -monitor_protocol_event_handler(uint32_t event, void *data, uint64_t last, uint64_t now) "event=%d data=%p last=%" PRId64 " now=%" PRId64 +monitor_protocol_event_handler(uint32_t event, void *qdict) "event=%d data=%p" monitor_protocol_event_emit(uint32_t event, void *data) "event=%d data=%p" -monitor_protocol_event_queue(uint32_t event, void *data, uint64_t rate, uint64_t last, uint64_t now) "event=%d data=%p rate=%" PRId64 " last=%" PRId64 " now=%" PRId64 +monitor_protocol_event_queue(uint32_t event, void *qdict, uint64_t rate) "event=%d data=%p rate=%" PRId64 monitor_protocol_event_throttle(uint32_t event, uint64_t rate) "event=%d rate=%" PRId64 # hw/net/opencores_eth.c @@ -1202,16 +1202,43 @@ virtio_gpu_fence_resp(uint64_t fence) "fence 0x%" PRIx64 # migration/savevm.c qemu_loadvm_state_section(unsigned int section_type) "%d" +qemu_loadvm_state_section_command(int ret) "%d" qemu_loadvm_state_section_partend(uint32_t section_id) "%u" +qemu_loadvm_state_main(void) "" +qemu_loadvm_state_main_quit_parent(void) "" +qemu_loadvm_state_post_main(int ret) "%d" qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" +qemu_savevm_send_packaged(void) "" +loadvm_handle_cmd_packaged(unsigned int length) "%u" +loadvm_handle_cmd_packaged_main(int ret) "%d" +loadvm_handle_cmd_packaged_received(int ret) "%d" +loadvm_postcopy_handle_advise(void) "" +loadvm_postcopy_handle_listen(void) "" +loadvm_postcopy_handle_run(void) "" +loadvm_postcopy_handle_run_cpu_sync(void) "" +loadvm_postcopy_handle_run_vmstart(void) "" +loadvm_postcopy_ram_handle_discard(void) "" +loadvm_postcopy_ram_handle_discard_end(void) "" +loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) "%s: %ud" +loadvm_process_command(uint16_t com, uint16_t len) "com=0x%x len=%d" +loadvm_process_command_ping(uint32_t val) "%x" +postcopy_ram_listen_thread_exit(void) "" +postcopy_ram_listen_thread_start(void) "" +qemu_savevm_send_postcopy_advise(void) "" +qemu_savevm_send_postcopy_ram_discard(const char *id, uint16_t len) "%s: %ud" +savevm_command_send(uint16_t command, uint16_t len) "com=0x%x len=%d" savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u" savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d" savevm_section_skip(const char *id, unsigned int section_id) "%s, section_id %u" +savevm_send_open_return_path(void) "" +savevm_send_ping(uint32_t val) "%x" +savevm_send_postcopy_listen(void) "" +savevm_send_postcopy_run(void) "" savevm_state_begin(void) "" savevm_state_header(void) "" savevm_state_iterate(void) "" -savevm_state_complete(void) "" -savevm_state_cancel(void) "" +savevm_state_cleanup(void) "" +savevm_state_complete_precopy(void) "" vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s" vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s" qemu_announce_self_iter(const char *mac) "%s" @@ -1229,9 +1256,14 @@ vmstate_subsection_load_good(const char *parent) "%s" qemu_file_fclose(void) "" # migration/ram.c +get_queued_page(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr) "%s/%" PRIx64 " ram_addr=%" PRIx64 +get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr, int sent) "%s/%" PRIx64 " ram_addr=%" PRIx64 " (sent=%d)" migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64"" migration_throttle(void) "" +ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x" +ram_postcopy_send_discard_bitmap(void) "" +ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx" # hw/display/qxl.c disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d" @@ -1387,6 +1419,12 @@ ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "ad prep_io_800_writeb(uint32_t addr, uint32_t val) "0x%08" PRIx32 " => 0x%02" PRIx32 prep_io_800_readb(uint32_t addr, uint32_t retval) "0x%08" PRIx32 " <= 0x%02" PRIx32 +# io/buffer.c +buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd" +buffer_move_empty(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s" +buffer_move(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s" +buffer_free(const char *buf, size_t len) "%s: capacity %zd" + # util/hbitmap.c hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx" hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64 @@ -1421,17 +1459,40 @@ flic_no_device_api(int err) "flic: no Device Contral API support %d" flic_reset_failed(int err) "flic: reset failed %d" # migration.c +await_return_path_close_on_source_close(void) "" +await_return_path_close_on_source_joining(void) "" migrate_set_state(int new_state) "new state %d" migrate_fd_cleanup(void) "" migrate_fd_error(void) "" migrate_fd_cancel(void) "" -migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " max %" PRIu64 -migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64 -migrate_state_too_big(void) "" +migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at %zx len %zx" +migrate_pending(uint64_t size, uint64_t max, uint64_t post, uint64_t nonpost) "pending size %" PRIu64 " max %" PRIu64 " (post=%" PRIu64 " nonpost=%" PRIu64 ")" +migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d" +migration_completion_file_err(void) "" +migration_completion_postcopy_end(void) "" +migration_completion_postcopy_end_after_complete(void) "" +migration_completion_postcopy_end_before_rp(void) "" +migration_completion_postcopy_end_after_rp(int rp_error) "%d" +migration_thread_after_loop(void) "" +migration_thread_file_err(void) "" +migration_thread_setup_complete(void) "" +open_return_path_on_source(void) "" +open_return_path_on_source_continue(void) "" +postcopy_start(void) "" +postcopy_start_set_run(void) "" +source_return_path_thread_bad_end(void) "" +source_return_path_thread_end(void) "" +source_return_path_thread_entry(void) "" +source_return_path_thread_loop_top(void) "" +source_return_path_thread_pong(uint32_t val) "%x" +source_return_path_thread_shut(uint32_t val) "%x" migrate_global_state_post_load(const char *state) "loaded state: %s" migrate_global_state_pre_save(const char *state) "saved state: %s" -migration_completion_file_err(void) "" migration_thread_low_pending(uint64_t pending) "%" PRIu64 +migrate_state_too_big(void) "" +migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64 +process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d" +process_incoming_migration_co_postcopy_end_main(void) "" # migration/rdma.c qemu_rdma_accept_incoming_migration(void) "" @@ -1497,6 +1558,25 @@ rdma_start_incoming_migration_after_rdma_listen(void) "" rdma_start_outgoing_migration_after_rdma_connect(void) "" rdma_start_outgoing_migration_after_rdma_source_init(void) "" +# migration/postcopy-ram.c +postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands" +postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx" +postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx" +postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" +postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" +postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" +postcopy_place_page(void *host_addr) "host=%p" +postcopy_place_page_zero(void *host_addr) "host=%p" +postcopy_ram_enable_notify(void) "" +postcopy_ram_fault_thread_entry(void) "" +postcopy_ram_fault_thread_exit(void) "" +postcopy_ram_fault_thread_quit(void) "" +postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=%" PRIx64 " rb=%s offset=%zx" +postcopy_ram_incoming_cleanup_closeuf(void) "" +postcopy_ram_incoming_cleanup_entry(void) "" +postcopy_ram_incoming_cleanup_exit(void) "" +postcopy_ram_incoming_cleanup_join(void) "" + # kvm-all.c kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p" diff --git a/trace/simple.c b/trace/simple.c index 11ad030937..56a624cac8 100644 --- a/trace/simple.c +++ b/trace/simple.c @@ -329,7 +329,8 @@ bool st_set_trace_file(const char *file) g_free(trace_file_name); if (!file) { - trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, getpid()); + /* Type cast needed for Windows where getpid() returns an int. */ + trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, (pid_t)getpid()); } else { trace_file_name = g_strdup_printf("%s", file); } diff --git a/translate-all.c b/translate-all.c index 20ce40ec28..042a8576ac 100644 --- a/translate-all.c +++ b/translate-all.c @@ -118,7 +118,7 @@ typedef struct PageDesc { #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS) uintptr_t qemu_host_page_size; -uintptr_t qemu_host_page_mask; +intptr_t qemu_host_page_mask; /* The bottom level has pointers to PageDesc */ static void *l1_map[V_L1_SIZE]; @@ -326,14 +326,14 @@ void page_size_init(void) /* NOTE: we can always suppose that qemu_host_page_size >= TARGET_PAGE_SIZE */ qemu_real_host_page_size = getpagesize(); - qemu_real_host_page_mask = ~(qemu_real_host_page_size - 1); + qemu_real_host_page_mask = -(intptr_t)qemu_real_host_page_size; if (qemu_host_page_size == 0) { qemu_host_page_size = qemu_real_host_page_size; } if (qemu_host_page_size < TARGET_PAGE_SIZE) { qemu_host_page_size = TARGET_PAGE_SIZE; } - qemu_host_page_mask = ~(qemu_host_page_size - 1); + qemu_host_page_mask = -(intptr_t)qemu_host_page_size; } static void page_init(void) @@ -1069,7 +1069,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, #endif phys_pc = get_page_addr_code(env, pc); - if (use_icount) { + if (use_icount && !(cflags & CF_IGNORE_ICOUNT)) { cflags |= CF_USE_ICOUNT; } diff --git a/translate-common.c b/translate-common.c index 619feb466e..171222d037 100644 --- a/translate-common.c +++ b/translate-common.c @@ -21,7 +21,7 @@ #include "qom/cpu.h" uintptr_t qemu_real_host_page_size; -uintptr_t qemu_real_host_page_mask; +intptr_t qemu_real_host_page_mask; #ifndef CONFIG_USER_ONLY /* mask must never be zero, except for A20 change call */ diff --git a/ui/cocoa.m b/ui/cocoa.m index c0d6bb2f70..d76b942732 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -724,7 +724,15 @@ QemuCocoaView *cocoaView; } if (mouse_event) { - if (last_buttons != buttons) { + /* Don't send button events to the guest unless we've got a + * mouse grab or window focus. If we have neither then this event + * is the user clicking on the background window to activate and + * bring us to the front, which will be done by the sendEvent + * call below. We definitely don't want to pass that click through + * to the guest. + */ + if ((isMouseGrabbed || [[self window] isKeyWindow]) && + (last_buttons != buttons)) { static uint32_t bmap[INPUT_BUTTON_MAX] = { [INPUT_BUTTON_LEFT] = MOUSE_EVENT_LBUTTON, [INPUT_BUTTON_MIDDLE] = MOUSE_EVENT_MBUTTON, @@ -1113,10 +1121,13 @@ QemuCocoaView *cocoaView; } Error *err = NULL; - qmp_change_blockdev([drive cStringUsingEncoding: NSASCIIStringEncoding], - [file cStringUsingEncoding: NSASCIIStringEncoding], - "raw", - &err); + qmp_blockdev_change_medium([drive cStringUsingEncoding: + NSASCIIStringEncoding], + [file cStringUsingEncoding: + NSASCIIStringEncoding], + true, "raw", + false, 0, + &err); handleAnyDeviceErrors(err); } } diff --git a/ui/console.c b/ui/console.c index cf649b2612..745c354f53 100644 --- a/ui/console.c +++ b/ui/console.c @@ -450,7 +450,7 @@ static void text_console_resize(QemuConsole *s) if (s->width < w1) w1 = s->width; - cells = g_malloc(s->width * s->total_height * sizeof(TextCell)); + cells = g_new(TextCell, s->width * s->total_height); for(y = 0; y < s->total_height; y++) { c = &cells[y * s->width]; if (w1 > 0) { @@ -2016,7 +2016,7 @@ static VcHandler *vc_handler = text_console_init; static CharDriverState *vc_init(const char *id, ChardevBackend *backend, ChardevReturn *ret, Error **errp) { - return vc_handler(backend->vc, errp); + return vc_handler(backend->u.vc, errp); } void register_vc_handler(VcHandler *handler) @@ -2057,30 +2057,30 @@ static void qemu_chr_parse_vc(QemuOpts *opts, ChardevBackend *backend, { int val; - backend->vc = g_new0(ChardevVC, 1); + backend->u.vc = g_new0(ChardevVC, 1); val = qemu_opt_get_number(opts, "width", 0); if (val != 0) { - backend->vc->has_width = true; - backend->vc->width = val; + backend->u.vc->has_width = true; + backend->u.vc->width = val; } val = qemu_opt_get_number(opts, "height", 0); if (val != 0) { - backend->vc->has_height = true; - backend->vc->height = val; + backend->u.vc->has_height = true; + backend->u.vc->height = val; } val = qemu_opt_get_number(opts, "cols", 0); if (val != 0) { - backend->vc->has_cols = true; - backend->vc->cols = val; + backend->u.vc->has_cols = true; + backend->u.vc->cols = val; } val = qemu_opt_get_number(opts, "rows", 0); if (val != 0) { - backend->vc->has_rows = true; - backend->vc->rows = val; + backend->u.vc->has_rows = true; + backend->u.vc->rows = val; } } diff --git a/ui/curses.c b/ui/curses.c index 8edb038bb3..7e7e4029ec 100644 --- a/ui/curses.c +++ b/ui/curses.c @@ -42,6 +42,8 @@ static WINDOW *screenpad = NULL; static int width, height, gwidth, gheight, invalidate; static int px, py, sminx, sminy, smaxx, smaxy; +chtype vga_to_curses[256]; + static void curses_update(DisplayChangeListener *dcl, int x, int y, int w, int h) { @@ -341,8 +343,55 @@ static void curses_setup(void) nodelay(stdscr, TRUE); nonl(); keypad(stdscr, TRUE); start_color(); raw(); scrollok(stdscr, FALSE); - for (i = 0; i < 64; i ++) + for (i = 0; i < 64; i++) { init_pair(i, colour_default[i & 7], colour_default[i >> 3]); + } + /* Set default color for more than 64. (monitor uses 0x74xx for example) */ + for (i = 64; i < COLOR_PAIRS; i++) { + init_pair(i, COLOR_WHITE, COLOR_BLACK); + } + + /* + * Setup mapping for vga to curses line graphics. + * FIXME: for better font, have to use ncursesw and setlocale() + */ +#if 0 + /* FIXME: map from where? */ + ACS_S1; + ACS_S3; + ACS_S7; + ACS_S9; +#endif + /* ACS_* is not constant. So, we can't initialize statically. */ + vga_to_curses['\0'] = ' '; + vga_to_curses[0x04] = ACS_DIAMOND; + vga_to_curses[0x0a] = ACS_RARROW; + vga_to_curses[0x0b] = ACS_LARROW; + vga_to_curses[0x18] = ACS_UARROW; + vga_to_curses[0x19] = ACS_DARROW; + vga_to_curses[0x9c] = ACS_STERLING; + vga_to_curses[0xb0] = ACS_BOARD; + vga_to_curses[0xb1] = ACS_CKBOARD; + vga_to_curses[0xb3] = ACS_VLINE; + vga_to_curses[0xb4] = ACS_RTEE; + vga_to_curses[0xbf] = ACS_URCORNER; + vga_to_curses[0xc0] = ACS_LLCORNER; + vga_to_curses[0xc1] = ACS_BTEE; + vga_to_curses[0xc2] = ACS_TTEE; + vga_to_curses[0xc3] = ACS_LTEE; + vga_to_curses[0xc4] = ACS_HLINE; + vga_to_curses[0xc5] = ACS_PLUS; + vga_to_curses[0xce] = ACS_LANTERN; + vga_to_curses[0xd8] = ACS_NEQUAL; + vga_to_curses[0xd9] = ACS_LRCORNER; + vga_to_curses[0xda] = ACS_ULCORNER; + vga_to_curses[0xdb] = ACS_BLOCK; + vga_to_curses[0xe3] = ACS_PI; + vga_to_curses[0xf1] = ACS_PLMINUS; + vga_to_curses[0xf2] = ACS_GEQUAL; + vga_to_curses[0xf3] = ACS_LEQUAL; + vga_to_curses[0xf8] = ACS_DEGREE; + vga_to_curses[0xfe] = ACS_BULLET; } static void curses_keyboard_setup(void) @@ -382,7 +431,7 @@ void curses_display_init(DisplayState *ds, int full_screen) curses_winch_init(); - dcl = (DisplayChangeListener *) g_malloc0(sizeof(DisplayChangeListener)); + dcl = g_new0(DisplayChangeListener, 1); dcl->ops = &dcl_ops; register_displaychangelistener(dcl); diff --git a/ui/curses_keys.h b/ui/curses_keys.h index 18ce6dceee..f7467449b9 100644 --- a/ui/curses_keys.h +++ b/ui/curses_keys.h @@ -29,8 +29,7 @@ #include "keymaps.h" -#define KEY_RELEASE 0x80 -#define KEY_MASK 0x7f +#define KEY_MASK SCANCODE_KEYMASK #define GREY_CODE 0xe0 #define GREY SCANCODE_GREY #define SHIFT_CODE 0x2a @@ -60,6 +59,8 @@ static const int curses2keysym[CURSES_KEYS] = { ['\n'] = KEY_ENTER, [27] = 27, [KEY_BTAB] = '\t' | KEYSYM_SHIFT, + [KEY_SPREVIOUS] = KEY_PPAGE | KEYSYM_SHIFT, + [KEY_SNEXT] = KEY_NPAGE | KEYSYM_SHIFT, }; static const int curses2keycode[CURSES_KEYS] = { @@ -149,6 +150,9 @@ static const int curses2keycode[CURSES_KEYS] = { [KEY_IC] = 82 | GREY, /* Insert */ [KEY_DC] = 83 | GREY, /* Delete */ + [KEY_SPREVIOUS] = 73 | GREY | SHIFT, /* Shift + Page Up */ + [KEY_SNEXT] = 81 | GREY | SHIFT, /* Shift + Page Down */ + ['!'] = 2 | SHIFT, ['@'] = 3 | SHIFT, ['#'] = 4 | SHIFT, diff --git a/ui/input-keymap.c b/ui/input-keymap.c index 7635cb0dc4..d36be4b60d 100644 --- a/ui/input-keymap.c +++ b/ui/input-keymap.c @@ -139,11 +139,11 @@ static int number_to_qcode[0x100]; int qemu_input_key_value_to_number(const KeyValue *value) { - if (value->kind == KEY_VALUE_KIND_QCODE) { - return qcode_to_number[value->qcode]; + if (value->type == KEY_VALUE_KIND_QCODE) { + return qcode_to_number[value->u.qcode]; } else { - assert(value->kind == KEY_VALUE_KIND_NUMBER); - return value->number; + assert(value->type == KEY_VALUE_KIND_NUMBER); + return value->u.number; } } @@ -166,11 +166,11 @@ int qemu_input_key_number_to_qcode(uint8_t nr) int qemu_input_key_value_to_qcode(const KeyValue *value) { - if (value->kind == KEY_VALUE_KIND_QCODE) { - return value->qcode; + if (value->type == KEY_VALUE_KIND_QCODE) { + return value->u.qcode; } else { - assert(value->kind == KEY_VALUE_KIND_NUMBER); - return qemu_input_key_number_to_qcode(value->number); + assert(value->type == KEY_VALUE_KIND_NUMBER); + return qemu_input_key_number_to_qcode(value->u.number); } } @@ -180,8 +180,8 @@ int qemu_input_key_value_to_scancode(const KeyValue *value, bool down, int keycode = qemu_input_key_value_to_number(value); int count = 0; - if (value->kind == KEY_VALUE_KIND_QCODE && - value->qcode == Q_KEY_CODE_PAUSE) { + if (value->type == KEY_VALUE_KIND_QCODE && + value->u.qcode == Q_KEY_CODE_PAUSE) { /* specific case */ int v = down ? 0 : 0x80; codes[count++] = 0xe1; diff --git a/ui/input-legacy.c b/ui/input-legacy.c index e50f2968e1..e0a39f08a5 100644 --- a/ui/input-legacy.c +++ b/ui/input-legacy.c @@ -113,8 +113,8 @@ static void legacy_kbd_event(DeviceState *dev, QemuConsole *src, if (!entry || !entry->put_kbd) { return; } - count = qemu_input_key_value_to_scancode(evt->key->key, - evt->key->down, + count = qemu_input_key_value_to_scancode(evt->u.key->key, + evt->u.key->down, scancodes); for (i = 0; i < count; i++) { entry->put_kbd(entry->opaque, scancodes[i]); @@ -150,21 +150,22 @@ static void legacy_mouse_event(DeviceState *dev, QemuConsole *src, }; QEMUPutMouseEntry *s = (QEMUPutMouseEntry *)dev; - switch (evt->kind) { + switch (evt->type) { case INPUT_EVENT_KIND_BTN: - if (evt->btn->down) { - s->buttons |= bmap[evt->btn->button]; + if (evt->u.btn->down) { + s->buttons |= bmap[evt->u.btn->button]; } else { - s->buttons &= ~bmap[evt->btn->button]; + s->buttons &= ~bmap[evt->u.btn->button]; } - if (evt->btn->down && evt->btn->button == INPUT_BUTTON_WHEEL_UP) { + if (evt->u.btn->down && evt->u.btn->button == INPUT_BUTTON_WHEEL_UP) { s->qemu_put_mouse_event(s->qemu_put_mouse_event_opaque, s->axis[INPUT_AXIS_X], s->axis[INPUT_AXIS_Y], -1, s->buttons); } - if (evt->btn->down && evt->btn->button == INPUT_BUTTON_WHEEL_DOWN) { + if (evt->u.btn->down && + evt->u.btn->button == INPUT_BUTTON_WHEEL_DOWN) { s->qemu_put_mouse_event(s->qemu_put_mouse_event_opaque, s->axis[INPUT_AXIS_X], s->axis[INPUT_AXIS_Y], @@ -173,10 +174,10 @@ static void legacy_mouse_event(DeviceState *dev, QemuConsole *src, } break; case INPUT_EVENT_KIND_ABS: - s->axis[evt->abs->axis] = evt->abs->value; + s->axis[evt->u.abs->axis] = evt->u.abs->value; break; case INPUT_EVENT_KIND_REL: - s->axis[evt->rel->axis] += evt->rel->value; + s->axis[evt->u.rel->axis] += evt->u.rel->value; break; default: break; @@ -205,7 +206,7 @@ QEMUPutMouseEntry *qemu_add_mouse_event_handler(QEMUPutMouseEvent *func, { QEMUPutMouseEntry *s; - s = g_malloc0(sizeof(QEMUPutMouseEntry)); + s = g_new0(QEMUPutMouseEntry, 1); s->qemu_put_mouse_event = func; s->qemu_put_mouse_event_opaque = opaque; @@ -239,7 +240,7 @@ QEMUPutLEDEntry *qemu_add_led_event_handler(QEMUPutLEDEvent *func, { QEMUPutLEDEntry *s; - s = g_malloc0(sizeof(QEMUPutLEDEntry)); + s = g_new0(QEMUPutLEDEntry, 1); s->put_led = func; s->opaque = opaque; diff --git a/ui/input.c b/ui/input.c index 1a552d1de1..a0f9873f59 100644 --- a/ui/input.c +++ b/ui/input.c @@ -6,6 +6,7 @@ #include "trace.h" #include "ui/input.h" #include "ui/console.h" +#include "sysemu/replay.h" struct QemuInputHandlerState { DeviceState *dev; @@ -147,10 +148,10 @@ void qmp_x_input_send_event(bool has_console, int64_t console, for (e = events; e != NULL; e = e->next) { InputEvent *event = e->value; - if (!qemu_input_find_handler(1 << event->kind, con)) { + if (!qemu_input_find_handler(1 << event->type, con)) { error_setg(errp, "Input handler not found for " "event type %s", - InputEventKind_lookup[event->kind]); + InputEventKind_lookup[event->type]); return; } } @@ -168,22 +169,22 @@ static void qemu_input_transform_abs_rotate(InputEvent *evt) { switch (graphic_rotate) { case 90: - if (evt->abs->axis == INPUT_AXIS_X) { - evt->abs->axis = INPUT_AXIS_Y; - } else if (evt->abs->axis == INPUT_AXIS_Y) { - evt->abs->axis = INPUT_AXIS_X; - evt->abs->value = INPUT_EVENT_ABS_SIZE - 1 - evt->abs->value; + if (evt->u.abs->axis == INPUT_AXIS_X) { + evt->u.abs->axis = INPUT_AXIS_Y; + } else if (evt->u.abs->axis == INPUT_AXIS_Y) { + evt->u.abs->axis = INPUT_AXIS_X; + evt->u.abs->value = INPUT_EVENT_ABS_SIZE - 1 - evt->u.abs->value; } break; case 180: - evt->abs->value = INPUT_EVENT_ABS_SIZE - 1 - evt->abs->value; + evt->u.abs->value = INPUT_EVENT_ABS_SIZE - 1 - evt->u.abs->value; break; case 270: - if (evt->abs->axis == INPUT_AXIS_X) { - evt->abs->axis = INPUT_AXIS_Y; - evt->abs->value = INPUT_EVENT_ABS_SIZE - 1 - evt->abs->value; - } else if (evt->abs->axis == INPUT_AXIS_Y) { - evt->abs->axis = INPUT_AXIS_X; + if (evt->u.abs->axis == INPUT_AXIS_X) { + evt->u.abs->axis = INPUT_AXIS_Y; + evt->u.abs->value = INPUT_EVENT_ABS_SIZE - 1 - evt->u.abs->value; + } else if (evt->u.abs->axis == INPUT_AXIS_Y) { + evt->u.abs->axis = INPUT_AXIS_X; } break; } @@ -197,18 +198,18 @@ static void qemu_input_event_trace(QemuConsole *src, InputEvent *evt) if (src) { idx = qemu_console_get_index(src); } - switch (evt->kind) { + switch (evt->type) { case INPUT_EVENT_KIND_KEY: - switch (evt->key->key->kind) { + switch (evt->u.key->key->type) { case KEY_VALUE_KIND_NUMBER: - qcode = qemu_input_key_number_to_qcode(evt->key->key->number); + qcode = qemu_input_key_number_to_qcode(evt->u.key->key->u.number); name = QKeyCode_lookup[qcode]; - trace_input_event_key_number(idx, evt->key->key->number, - name, evt->key->down); + trace_input_event_key_number(idx, evt->u.key->key->u.number, + name, evt->u.key->down); break; case KEY_VALUE_KIND_QCODE: - name = QKeyCode_lookup[evt->key->key->qcode]; - trace_input_event_key_qcode(idx, name, evt->key->down); + name = QKeyCode_lookup[evt->u.key->key->u.qcode]; + trace_input_event_key_qcode(idx, name, evt->u.key->down); break; case KEY_VALUE_KIND_MAX: /* keep gcc happy */ @@ -216,16 +217,16 @@ static void qemu_input_event_trace(QemuConsole *src, InputEvent *evt) } break; case INPUT_EVENT_KIND_BTN: - name = InputButton_lookup[evt->btn->button]; - trace_input_event_btn(idx, name, evt->btn->down); + name = InputButton_lookup[evt->u.btn->button]; + trace_input_event_btn(idx, name, evt->u.btn->down); break; case INPUT_EVENT_KIND_REL: - name = InputAxis_lookup[evt->rel->axis]; - trace_input_event_rel(idx, name, evt->rel->value); + name = InputAxis_lookup[evt->u.rel->axis]; + trace_input_event_rel(idx, name, evt->u.rel->value); break; case INPUT_EVENT_KIND_ABS: - name = InputAxis_lookup[evt->abs->axis]; - trace_input_event_abs(idx, name, evt->abs->value); + name = InputAxis_lookup[evt->u.abs->axis]; + trace_input_event_abs(idx, name, evt->u.abs->value); break; case INPUT_EVENT_KIND_MAX: /* keep gcc happy */ @@ -300,23 +301,19 @@ static void qemu_input_queue_sync(struct QemuInputEventQueueHead *queue) QTAILQ_INSERT_TAIL(queue, item, node); } -void qemu_input_event_send(QemuConsole *src, InputEvent *evt) +void qemu_input_event_send_impl(QemuConsole *src, InputEvent *evt) { QemuInputHandlerState *s; - if (!runstate_is_running() && !runstate_check(RUN_STATE_SUSPENDED)) { - return; - } - qemu_input_event_trace(src, evt); /* pre processing */ - if (graphic_rotate && (evt->kind == INPUT_EVENT_KIND_ABS)) { + if (graphic_rotate && (evt->type == INPUT_EVENT_KIND_ABS)) { qemu_input_transform_abs_rotate(evt); } /* send event */ - s = qemu_input_find_handler(1 << evt->kind, src); + s = qemu_input_find_handler(1 << evt->type, src); if (!s) { return; } @@ -324,14 +321,19 @@ void qemu_input_event_send(QemuConsole *src, InputEvent *evt) s->events++; } -void qemu_input_event_sync(void) +void qemu_input_event_send(QemuConsole *src, InputEvent *evt) { - QemuInputHandlerState *s; - if (!runstate_is_running() && !runstate_check(RUN_STATE_SUSPENDED)) { return; } + replay_input_event(src, evt); +} + +void qemu_input_event_sync_impl(void) +{ + QemuInputHandlerState *s; + trace_input_event_sync(); QTAILQ_FOREACH(s, &handlers, node) { @@ -345,13 +347,22 @@ void qemu_input_event_sync(void) } } +void qemu_input_event_sync(void) +{ + if (!runstate_is_running() && !runstate_check(RUN_STATE_SUSPENDED)) { + return; + } + + replay_input_sync_event(); +} + InputEvent *qemu_input_event_new_key(KeyValue *key, bool down) { InputEvent *evt = g_new0(InputEvent, 1); - evt->key = g_new0(InputKeyEvent, 1); - evt->kind = INPUT_EVENT_KIND_KEY; - evt->key->key = key; - evt->key->down = down; + evt->u.key = g_new0(InputKeyEvent, 1); + evt->type = INPUT_EVENT_KIND_KEY; + evt->u.key->key = key; + evt->u.key->down = down; return evt; } @@ -372,16 +383,16 @@ void qemu_input_event_send_key(QemuConsole *src, KeyValue *key, bool down) void qemu_input_event_send_key_number(QemuConsole *src, int num, bool down) { KeyValue *key = g_new0(KeyValue, 1); - key->kind = KEY_VALUE_KIND_NUMBER; - key->number = num; + key->type = KEY_VALUE_KIND_NUMBER; + key->u.number = num; qemu_input_event_send_key(src, key, down); } void qemu_input_event_send_key_qcode(QemuConsole *src, QKeyCode q, bool down) { KeyValue *key = g_new0(KeyValue, 1); - key->kind = KEY_VALUE_KIND_QCODE; - key->qcode = q; + key->type = KEY_VALUE_KIND_QCODE; + key->u.qcode = q; qemu_input_event_send_key(src, key, down); } @@ -398,10 +409,10 @@ void qemu_input_event_send_key_delay(uint32_t delay_ms) InputEvent *qemu_input_event_new_btn(InputButton btn, bool down) { InputEvent *evt = g_new0(InputEvent, 1); - evt->btn = g_new0(InputBtnEvent, 1); - evt->kind = INPUT_EVENT_KIND_BTN; - evt->btn->button = btn; - evt->btn->down = down; + evt->u.btn = g_new0(InputBtnEvent, 1); + evt->type = INPUT_EVENT_KIND_BTN; + evt->u.btn->button = btn; + evt->u.btn->down = down; return evt; } @@ -451,8 +462,8 @@ InputEvent *qemu_input_event_new_move(InputEventKind kind, InputEvent *evt = g_new0(InputEvent, 1); InputMoveEvent *move = g_new0(InputMoveEvent, 1); - evt->kind = kind; - evt->data = move; + evt->type = kind; + evt->u.data = move; move->axis = axis; move->value = value; return evt; diff --git a/ui/keymaps.c b/ui/keymaps.c index 49410ae9d1..1b9ba3fa24 100644 --- a/ui/keymaps.c +++ b/ui/keymaps.c @@ -109,7 +109,7 @@ static kbd_layout_t *parse_keyboard_layout(const name2keysym_t *table, } if (!k) { - k = g_malloc0(sizeof(kbd_layout_t)); + k = g_new0(kbd_layout_t, 1); } for(;;) { @@ -985,7 +985,7 @@ void sdl_display_init(DisplayState *ds, int full_screen, int no_frame) sdl_grab_start(); } - dcl = g_malloc0(sizeof(DisplayChangeListener)); + dcl = g_new0(DisplayChangeListener, 1); dcl->ops = &dcl_ops; register_displaychangelistener(dcl); diff --git a/ui/spice-core.c b/ui/spice-core.c index bf4fd07499..6a62d712fe 100644 --- a/ui/spice-core.c +++ b/ui/spice-core.c @@ -200,8 +200,6 @@ static void channel_event(int event, SpiceChannelEventInfo *info) { SpiceServerInfo *server = g_malloc0(sizeof(*server)); SpiceChannel *client = g_malloc0(sizeof(*client)); - server->base = g_malloc0(sizeof(*server->base)); - client->base = g_malloc0(sizeof(*client->base)); /* * Spice server might have called us from spice worker thread @@ -218,9 +216,11 @@ static void channel_event(int event, SpiceChannelEventInfo *info) } if (info->flags & SPICE_CHANNEL_EVENT_FLAG_ADDR_EXT) { - add_addr_info(client->base, (struct sockaddr *)&info->paddr_ext, + add_addr_info(qapi_SpiceChannel_base(client), + (struct sockaddr *)&info->paddr_ext, info->plen_ext); - add_addr_info(server->base, (struct sockaddr *)&info->laddr_ext, + add_addr_info(qapi_SpiceServerInfo_base(server), + (struct sockaddr *)&info->laddr_ext, info->llen_ext); } else { error_report("spice: %s, extended address is expected", @@ -229,7 +229,9 @@ static void channel_event(int event, SpiceChannelEventInfo *info) switch (event) { case SPICE_CHANNEL_EVENT_CONNECTED: - qapi_event_send_spice_connected(server->base, client->base, &error_abort); + qapi_event_send_spice_connected(qapi_SpiceServerInfo_base(server), + qapi_SpiceChannel_base(client), + &error_abort); break; case SPICE_CHANNEL_EVENT_INITIALIZED: if (auth) { @@ -242,7 +244,9 @@ static void channel_event(int event, SpiceChannelEventInfo *info) break; case SPICE_CHANNEL_EVENT_DISCONNECTED: channel_list_del(info); - qapi_event_send_spice_disconnected(server->base, client->base, &error_abort); + qapi_event_send_spice_disconnected(qapi_SpiceServerInfo_base(server), + qapi_SpiceChannel_base(client), + &error_abort); break; default: break; @@ -378,16 +382,15 @@ static SpiceChannelList *qmp_query_spice_channels(void) chan = g_malloc0(sizeof(*chan)); chan->value = g_malloc0(sizeof(*chan->value)); - chan->value->base = g_malloc0(sizeof(*chan->value->base)); paddr = (struct sockaddr *)&item->info->paddr_ext; plen = item->info->plen_ext; getnameinfo(paddr, plen, host, sizeof(host), port, sizeof(port), NI_NUMERICHOST | NI_NUMERICSERV); - chan->value->base->host = g_strdup(host); - chan->value->base->port = g_strdup(port); - chan->value->base->family = inet_netfamily(paddr->sa_family); + chan->value->host = g_strdup(host); + chan->value->port = g_strdup(port); + chan->value->family = inet_netfamily(paddr->sa_family); chan->value->connection_id = item->info->connection_id; chan->value->channel_type = item->info->type; diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c index 22c9abce55..aa21191ea2 100644 --- a/ui/vnc-jobs.c +++ b/ui/vnc-jobs.c @@ -29,6 +29,7 @@ #include "vnc.h" #include "vnc-jobs.h" #include "qemu/sockets.h" +#include "qemu/main-loop.h" #include "block/aio.h" /* @@ -54,7 +55,6 @@ struct VncJobQueue { QemuCond cond; QemuMutex mutex; QemuThread thread; - Buffer buffer; bool exit; QTAILQ_HEAD(, VncJob) jobs; }; @@ -79,7 +79,7 @@ static void vnc_unlock_queue(VncJobQueue *queue) VncJob *vnc_job_new(VncState *vs) { - VncJob *job = g_malloc0(sizeof(VncJob)); + VncJob *job = g_new0(VncJob, 1); job->vs = vs; vnc_lock_queue(queue); @@ -90,7 +90,7 @@ VncJob *vnc_job_new(VncState *vs) int vnc_job_add_rect(VncJob *job, int x, int y, int w, int h) { - VncRectEntry *entry = g_malloc0(sizeof(VncRectEntry)); + VncRectEntry *entry = g_new0(VncRectEntry, 1); entry->rect.x = x; entry->rect.y = y; @@ -166,8 +166,11 @@ void vnc_jobs_consume_buffer(VncState *vs) vnc_lock_output(vs); if (vs->jobs_buffer.offset) { - vnc_write(vs, vs->jobs_buffer.buffer, vs->jobs_buffer.offset); - buffer_reset(&vs->jobs_buffer); + if (vs->csock != -1 && buffer_empty(&vs->output)) { + qemu_set_fd_handler(vs->csock, vnc_client_read, + vnc_client_write, vs); + } + buffer_move(&vs->output, &vs->jobs_buffer); } flush = vs->csock != -1 && vs->abort != true; vnc_unlock_output(vs); @@ -182,6 +185,9 @@ void vnc_jobs_consume_buffer(VncState *vs) */ static void vnc_async_encoding_start(VncState *orig, VncState *local) { + buffer_init(&local->output, "vnc-worker-output"); + local->csock = -1; /* Don't do any network work on this thread */ + local->vnc_encoding = orig->vnc_encoding; local->features = orig->features; local->vd = orig->vd; @@ -193,10 +199,6 @@ static void vnc_async_encoding_start(VncState *orig, VncState *local) local->zlib = orig->zlib; local->hextile = orig->hextile; local->zrle = orig->zrle; - local->output = queue->buffer; - local->csock = -1; /* Don't do any network work on this thread */ - - buffer_reset(&local->output); } static void vnc_async_encoding_end(VncState *orig, VncState *local) @@ -206,15 +208,13 @@ static void vnc_async_encoding_end(VncState *orig, VncState *local) orig->hextile = local->hextile; orig->zrle = local->zrle; orig->lossy_rect = local->lossy_rect; - - queue->buffer = local->output; } static int vnc_worker_thread_loop(VncJobQueue *queue) { VncJob *job; VncRectEntry *entry, *tmp; - VncState vs; + VncState vs = {}; int n_rectangles; int saved_offset; @@ -235,6 +235,14 @@ static int vnc_worker_thread_loop(VncJobQueue *queue) vnc_unlock_output(job->vs); goto disconnected; } + if (buffer_empty(&job->vs->output)) { + /* + * Looks like a NOP as it obviously moves no data. But it + * moves the empty buffer, so we don't have to malloc a new + * one for vs.output + */ + buffer_move_empty(&vs.output, &job->vs->output); + } vnc_unlock_output(job->vs); /* Make a local copy of vs and switch output buffers */ @@ -274,14 +282,13 @@ static int vnc_worker_thread_loop(VncJobQueue *queue) vnc_lock_output(job->vs); if (job->vs->csock != -1) { - buffer_reserve(&job->vs->jobs_buffer, vs.output.offset); - buffer_append(&job->vs->jobs_buffer, vs.output.buffer, - vs.output.offset); + buffer_move(&job->vs->jobs_buffer, &vs.output); /* Copy persistent encoding data */ vnc_async_encoding_end(job->vs, &vs); qemu_bh_schedule(job->vs->bh); } else { + buffer_reset(&vs.output); /* Copy persistent encoding data */ vnc_async_encoding_end(job->vs, &vs); } @@ -298,7 +305,7 @@ disconnected: static VncJobQueue *vnc_queue_init(void) { - VncJobQueue *queue = g_malloc0(sizeof(VncJobQueue)); + VncJobQueue *queue = g_new0(VncJobQueue, 1); qemu_cond_init(&queue->cond); qemu_mutex_init(&queue->mutex); @@ -310,7 +317,6 @@ static void vnc_queue_clear(VncJobQueue *q) { qemu_cond_destroy(&queue->cond); qemu_mutex_destroy(&queue->mutex); - buffer_free(&queue->buffer); g_free(q); queue = NULL; /* Unset global queue */ } @@ -156,10 +156,11 @@ char *vnc_socket_remote_addr(const char *format, int fd) { return addr_to_string(format, &sa, salen); } -static VncBasicInfo *vnc_basic_info_get(struct sockaddr_storage *sa, - socklen_t salen) +static void vnc_init_basic_info(struct sockaddr_storage *sa, + socklen_t salen, + VncBasicInfo *info, + Error **errp) { - VncBasicInfo *info; char host[NI_MAXHOST]; char serv[NI_MAXSERV]; int err; @@ -168,42 +169,44 @@ static VncBasicInfo *vnc_basic_info_get(struct sockaddr_storage *sa, host, sizeof(host), serv, sizeof(serv), NI_NUMERICHOST | NI_NUMERICSERV)) != 0) { - VNC_DEBUG("Cannot resolve address %d: %s\n", - err, gai_strerror(err)); - return NULL; + error_setg(errp, "Cannot resolve address: %s", + gai_strerror(err)); + return; } - info = g_malloc0(sizeof(VncBasicInfo)); info->host = g_strdup(host); info->service = g_strdup(serv); info->family = inet_netfamily(sa->ss_family); - return info; } -static VncBasicInfo *vnc_basic_info_get_from_server_addr(int fd) +static void vnc_init_basic_info_from_server_addr(int fd, VncBasicInfo *info, + Error **errp) { struct sockaddr_storage sa; socklen_t salen; salen = sizeof(sa); if (getsockname(fd, (struct sockaddr*)&sa, &salen) < 0) { - return NULL; + error_setg_errno(errp, errno, "getsockname failed"); + return; } - return vnc_basic_info_get(&sa, salen); + vnc_init_basic_info(&sa, salen, info, errp); } -static VncBasicInfo *vnc_basic_info_get_from_remote_addr(int fd) +static void vnc_init_basic_info_from_remote_addr(int fd, VncBasicInfo *info, + Error **errp) { struct sockaddr_storage sa; socklen_t salen; salen = sizeof(sa); if (getpeername(fd, (struct sockaddr*)&sa, &salen) < 0) { - return NULL; + error_setg_errno(errp, errno, "getpeername failed"); + return; } - return vnc_basic_info_get(&sa, salen); + vnc_init_basic_info(&sa, salen, info, errp); } static const char *vnc_auth_name(VncDisplay *vd) { @@ -256,15 +259,18 @@ static const char *vnc_auth_name(VncDisplay *vd) { static VncServerInfo *vnc_server_info_get(VncDisplay *vd) { VncServerInfo *info; - VncBasicInfo *bi = vnc_basic_info_get_from_server_addr(vd->lsock); - if (!bi) { - return NULL; - } + Error *err = NULL; info = g_malloc(sizeof(*info)); - info->base = bi; + vnc_init_basic_info_from_server_addr(vd->lsock, + qapi_VncServerInfo_base(info), &err); info->has_auth = true; info->auth = g_strdup(vnc_auth_name(vd)); + if (err) { + qapi_free_VncServerInfo(info); + info = NULL; + error_free(err); + } return info; } @@ -291,11 +297,16 @@ static void vnc_client_cache_auth(VncState *client) static void vnc_client_cache_addr(VncState *client) { - VncBasicInfo *bi = vnc_basic_info_get_from_remote_addr(client->csock); + Error *err = NULL; - if (bi) { - client->info = g_malloc0(sizeof(*client->info)); - client->info->base = bi; + client->info = g_malloc0(sizeof(*client->info)); + vnc_init_basic_info_from_remote_addr(client->csock, + qapi_VncClientInfo_base(client->info), + &err); + if (err) { + qapi_free_VncClientInfo(client->info); + client->info = NULL; + error_free(err); } } @@ -306,7 +317,6 @@ static void vnc_qmp_event(VncState *vs, QAPIEvent event) if (!vs->info) { return; } - g_assert(vs->info->base); si = vnc_server_info_get(vs->vd); if (!si) { @@ -315,7 +325,8 @@ static void vnc_qmp_event(VncState *vs, QAPIEvent event) switch (event) { case QAPI_EVENT_VNC_CONNECTED: - qapi_event_send_vnc_connected(si, vs->info->base, &error_abort); + qapi_event_send_vnc_connected(si, qapi_VncClientInfo_base(vs->info), + &error_abort); break; case QAPI_EVENT_VNC_INITIALIZED: qapi_event_send_vnc_initialized(si, vs->info, &error_abort); @@ -350,11 +361,10 @@ static VncClientInfo *qmp_query_vnc_client(const VncState *client) } info = g_malloc0(sizeof(*info)); - info->base = g_malloc0(sizeof(*info->base)); - info->base->host = g_strdup(host); - info->base->service = g_strdup(serv); - info->base->family = inet_netfamily(sa.ss_family); - info->base->websocket = client->websocket; + info->host = g_strdup(host); + info->service = g_strdup(serv); + info->family = inet_netfamily(sa.ss_family); + info->websocket = client->websocket; if (client->tls) { info->x509_dname = qcrypto_tls_session_get_peer_name(client->tls); @@ -605,10 +615,25 @@ static void framebuffer_update_request(VncState *vs, int incremental, static void vnc_refresh(DisplayChangeListener *dcl); static int vnc_refresh_server_surface(VncDisplay *vd); +static int vnc_width(VncDisplay *vd) +{ + return MIN(VNC_MAX_WIDTH, ROUND_UP(surface_width(vd->ds), + VNC_DIRTY_PIXELS_PER_BIT)); +} + +static int vnc_height(VncDisplay *vd) +{ + return MIN(VNC_MAX_HEIGHT, surface_height(vd->ds)); +} + static void vnc_set_area_dirty(DECLARE_BITMAP(dirty[VNC_MAX_HEIGHT], VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT), - int width, int height, - int x, int y, int w, int h) { + VncDisplay *vd, + int x, int y, int w, int h) +{ + int width = vnc_width(vd); + int height = vnc_height(vd); + /* this is needed this to ensure we updated all affected * blocks if x % VNC_DIRTY_PIXELS_PER_BIT != 0 */ w += (x % VNC_DIRTY_PIXELS_PER_BIT); @@ -630,10 +655,8 @@ static void vnc_dpy_update(DisplayChangeListener *dcl, { VncDisplay *vd = container_of(dcl, VncDisplay, dcl); struct VncSurface *s = &vd->guest; - int width = pixman_image_get_width(vd->server); - int height = pixman_image_get_height(vd->server); - vnc_set_area_dirty(s->dirty, width, height, x, y, w, h); + vnc_set_area_dirty(s->dirty, vd, x, y, w, h); } void vnc_framebuffer_update(VncState *vs, int x, int y, int w, int h, @@ -703,6 +726,21 @@ void *vnc_server_fb_ptr(VncDisplay *vd, int x, int y) return ptr; } +static void vnc_update_server_surface(VncDisplay *vd) +{ + qemu_pixman_image_unref(vd->server); + vd->server = NULL; + + if (QTAILQ_EMPTY(&vd->clients)) { + return; + } + + vd->server = pixman_image_create_bits(VNC_SERVER_FB_FORMAT, + vnc_width(vd), + vnc_height(vd), + NULL, 0); +} + static void vnc_dpy_switch(DisplayChangeListener *dcl, DisplaySurface *surface) { @@ -711,26 +749,19 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl, int width, height; vnc_abort_display_jobs(vd); + vd->ds = surface; /* server surface */ - qemu_pixman_image_unref(vd->server); - vd->ds = surface; - width = MIN(VNC_MAX_WIDTH, ROUND_UP(surface_width(vd->ds), - VNC_DIRTY_PIXELS_PER_BIT)); - height = MIN(VNC_MAX_HEIGHT, surface_height(vd->ds)); - vd->server = pixman_image_create_bits(VNC_SERVER_FB_FORMAT, - width, height, NULL, 0); + vnc_update_server_surface(vd); /* guest surface */ -#if 0 /* FIXME */ - if (ds_get_bytes_per_pixel(ds) != vd->guest.ds->pf.bytes_per_pixel) - console_color_init(ds); -#endif qemu_pixman_image_unref(vd->guest.fb); vd->guest.fb = pixman_image_ref(surface->image); vd->guest.format = surface->format; + width = vnc_width(vd); + height = vnc_height(vd); memset(vd->guest.dirty, 0x00, sizeof(vd->guest.dirty)); - vnc_set_area_dirty(vd->guest.dirty, width, height, 0, 0, + vnc_set_area_dirty(vd->guest.dirty, vd, 0, 0, width, height); QTAILQ_FOREACH(vs, &vd->clients, next) { @@ -740,7 +771,7 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl, vnc_cursor_define(vs); } memset(vs->dirty, 0x00, sizeof(vs->dirty)); - vnc_set_area_dirty(vs->dirty, width, height, 0, 0, + vnc_set_area_dirty(vs->dirty, vd, 0, 0, width, height); } } @@ -830,6 +861,8 @@ int vnc_raw_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) int vnc_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) { int n = 0; + bool encode_raw = false; + size_t saved_offs = vs->output.offset; switch(vs->vnc_encoding) { case VNC_ENCODING_ZLIB: @@ -852,10 +885,24 @@ int vnc_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) n = vnc_zywrle_send_framebuffer_update(vs, x, y, w, h); break; default: - vnc_framebuffer_update(vs, x, y, w, h, VNC_ENCODING_RAW); - n = vnc_raw_send_framebuffer_update(vs, x, y, w, h); + encode_raw = true; break; } + + /* If the client has the same pixel format as our internal buffer and + * a RAW encoding would need less space fall back to RAW encoding to + * save bandwidth and processing power in the client. */ + if (!encode_raw && vs->write_pixels == vnc_write_pixels_copy && + 12 + h * w * VNC_SERVER_FB_BYTES <= (vs->output.offset - saved_offs)) { + vs->output.offset = saved_offs; + encode_raw = true; + } + + if (encode_raw) { + vnc_framebuffer_update(vs, x, y, w, h, VNC_ENCODING_RAW); + n = vnc_raw_send_framebuffer_update(vs, x, y, w, h); + } + return n; } @@ -884,6 +931,11 @@ static void vnc_dpy_copy(DisplayChangeListener *dcl, int i, x, y, pitch, inc, w_lim, s; int cmp_bytes; + if (!vd->server) { + /* no client connected */ + return; + } + vnc_refresh_server_surface(vd); QTAILQ_FOREACH_SAFE(vs, &vd->clients, next, vn) { if (vnc_has_feature(vs, VNC_FEATURE_COPYRECT)) { @@ -1198,6 +1250,10 @@ void vnc_disconnect_finish(VncState *vs) if (vs->initialized) { QTAILQ_REMOVE(&vs->vd->clients, vs, next); qemu_remove_mouse_mode_change_notifier(&vs->mouse_mode_notifier); + if (QTAILQ_EMPTY(&vs->vd->clients)) { + /* last client gone */ + vnc_update_server_surface(vs->vd); + } } if (vs->vd->lock_key_sync) @@ -1980,9 +2036,6 @@ static void ext_key_event(VncState *vs, int down, static void framebuffer_update_request(VncState *vs, int incremental, int x, int y, int w, int h) { - int width = pixman_image_get_width(vs->vd->server); - int height = pixman_image_get_height(vs->vd->server); - vs->need_update = 1; if (incremental) { @@ -1990,7 +2043,7 @@ static void framebuffer_update_request(VncState *vs, int incremental, } vs->force_update = 1; - vnc_set_area_dirty(vs->dirty, width, height, x, y, w, h); + vnc_set_area_dirty(vs->dirty, vs->vd, x, y, w, h); } static void send_ext_key_event_ack(VncState *vs) @@ -2145,15 +2198,15 @@ static void set_pixel_format(VncState *vs, return; } - vs->client_pf.rmax = red_max; + vs->client_pf.rmax = red_max ? red_max : 0xFF; vs->client_pf.rbits = hweight_long(red_max); vs->client_pf.rshift = red_shift; vs->client_pf.rmask = red_max << red_shift; - vs->client_pf.gmax = green_max; + vs->client_pf.gmax = green_max ? green_max : 0xFF; vs->client_pf.gbits = hweight_long(green_max); vs->client_pf.gshift = green_shift; vs->client_pf.gmask = green_max << green_shift; - vs->client_pf.bmax = blue_max; + vs->client_pf.bmax = blue_max ? blue_max : 0xFF; vs->client_pf.bbits = hweight_long(blue_max); vs->client_pf.bshift = blue_shift; vs->client_pf.bmask = blue_max << blue_shift; @@ -2956,12 +3009,32 @@ static void vnc_refresh(DisplayChangeListener *dcl) static void vnc_connect(VncDisplay *vd, int csock, bool skipauth, bool websocket) { - VncState *vs = g_malloc0(sizeof(VncState)); + VncState *vs = g_new0(VncState, 1); int i; vs->csock = csock; vs->vd = vd; + buffer_init(&vs->input, "vnc-input/%d", csock); + buffer_init(&vs->output, "vnc-output/%d", csock); + buffer_init(&vs->ws_input, "vnc-ws_input/%d", csock); + buffer_init(&vs->ws_output, "vnc-ws_output/%d", csock); + buffer_init(&vs->jobs_buffer, "vnc-jobs_buffer/%d", csock); + + buffer_init(&vs->tight.tight, "vnc-tight/%d", csock); + buffer_init(&vs->tight.zlib, "vnc-tight-zlib/%d", csock); + buffer_init(&vs->tight.gradient, "vnc-tight-gradient/%d", csock); +#ifdef CONFIG_VNC_JPEG + buffer_init(&vs->tight.jpeg, "vnc-tight-jpeg/%d", csock); +#endif +#ifdef CONFIG_VNC_PNG + buffer_init(&vs->tight.png, "vnc-tight-png/%d", csock); +#endif + buffer_init(&vs->zlib.zlib, "vnc-zlib/%d", csock); + buffer_init(&vs->zrle.zrle, "vnc-zrle/%d", csock); + buffer_init(&vs->zrle.fb, "vnc-zrle-fb/%d", csock); + buffer_init(&vs->zrle.zlib, "vnc-zrle-zlib/%d", csock); + if (skipauth) { vs->auth = VNC_AUTH_NONE; vs->subauth = VNC_AUTH_INVALID; @@ -2979,7 +3052,7 @@ static void vnc_connect(VncDisplay *vd, int csock, vs->lossy_rect = g_malloc0(VNC_STAT_ROWS * sizeof (*vs->lossy_rect)); for (i = 0; i < VNC_STAT_ROWS; ++i) { - vs->lossy_rect[i] = g_malloc0(VNC_STAT_COLS * sizeof (uint8_t)); + vs->lossy_rect[i] = g_new0(uint8_t, VNC_STAT_COLS); } VNC_DEBUG("New client on socket %d\n", csock); @@ -3019,6 +3092,7 @@ void vnc_init_state(VncState *vs) { vs->initialized = true; VncDisplay *vd = vs->vd; + bool first_client = QTAILQ_EMPTY(&vd->clients); vs->last_x = -1; vs->last_y = -1; @@ -3032,6 +3106,9 @@ void vnc_init_state(VncState *vs) vs->bh = qemu_bh_new(vnc_jobs_bh, vs); QTAILQ_INSERT_TAIL(&vd->clients, vs, next); + if (first_client) { + vnc_update_server_surface(vd); + } graphic_hw_update(vd->dcl.con); @@ -3514,9 +3591,9 @@ void vnc_display_open(const char *id, Error **errp) } if (strncmp(vnc, "unix:", 5) == 0) { - saddr->kind = SOCKET_ADDRESS_KIND_UNIX; - saddr->q_unix = g_new0(UnixSocketAddress, 1); - saddr->q_unix->path = g_strdup(vnc + 5); + saddr->type = SOCKET_ADDRESS_KIND_UNIX; + saddr->u.q_unix = g_new0(UnixSocketAddress, 1); + saddr->u.q_unix->path = g_strdup(vnc + 5); if (vs->ws_enabled) { error_setg(errp, "UNIX sockets not supported with websock"); @@ -3524,12 +3601,12 @@ void vnc_display_open(const char *id, Error **errp) } } else { unsigned long long baseport; - saddr->kind = SOCKET_ADDRESS_KIND_INET; - saddr->inet = g_new0(InetSocketAddress, 1); + saddr->type = SOCKET_ADDRESS_KIND_INET; + saddr->u.inet = g_new0(InetSocketAddress, 1); if (vnc[0] == '[' && vnc[hlen - 1] == ']') { - saddr->inet->host = g_strndup(vnc + 1, hlen - 2); + saddr->u.inet->host = g_strndup(vnc + 1, hlen - 2); } else { - saddr->inet->host = g_strndup(vnc, hlen); + saddr->u.inet->host = g_strndup(vnc, hlen); } if (parse_uint_full(h + 1, &baseport, 10) < 0) { error_setg(errp, "can't convert to a number: %s", h + 1); @@ -3540,28 +3617,28 @@ void vnc_display_open(const char *id, Error **errp) error_setg(errp, "port %s out of range", h + 1); goto fail; } - saddr->inet->port = g_strdup_printf( + saddr->u.inet->port = g_strdup_printf( "%d", (int)baseport + 5900); if (to) { - saddr->inet->has_to = true; - saddr->inet->to = to; + saddr->u.inet->has_to = true; + saddr->u.inet->to = to + 5900; } - saddr->inet->ipv4 = saddr->inet->has_ipv4 = has_ipv4; - saddr->inet->ipv6 = saddr->inet->has_ipv6 = has_ipv6; + saddr->u.inet->ipv4 = saddr->u.inet->has_ipv4 = has_ipv4; + saddr->u.inet->ipv6 = saddr->u.inet->has_ipv6 = has_ipv6; if (vs->ws_enabled) { - wsaddr->kind = SOCKET_ADDRESS_KIND_INET; - wsaddr->inet = g_new0(InetSocketAddress, 1); - wsaddr->inet->host = g_strdup(saddr->inet->host); - wsaddr->inet->port = g_strdup(websocket); + wsaddr->type = SOCKET_ADDRESS_KIND_INET; + wsaddr->u.inet = g_new0(InetSocketAddress, 1); + wsaddr->u.inet->host = g_strdup(saddr->u.inet->host); + wsaddr->u.inet->port = g_strdup(websocket); if (to) { - wsaddr->inet->has_to = true; - wsaddr->inet->to = to; + wsaddr->u.inet->has_to = true; + wsaddr->u.inet->to = to; } - wsaddr->inet->ipv4 = wsaddr->inet->has_ipv4 = has_ipv4; - wsaddr->inet->ipv6 = wsaddr->inet->has_ipv6 = has_ipv6; + wsaddr->u.inet->ipv4 = wsaddr->u.inet->has_ipv4 = has_ipv4; + wsaddr->u.inet->ipv6 = wsaddr->u.inet->has_ipv6 = has_ipv6; } } } else { @@ -3760,7 +3837,7 @@ void vnc_display_open(const char *id, Error **errp) if (csock < 0) { goto fail; } - vs->is_unix = saddr->kind == SOCKET_ADDRESS_KIND_UNIX; + vs->is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX; vnc_connect(vs, csock, false, false); } else { /* listen for connects */ @@ -3768,7 +3845,7 @@ void vnc_display_open(const char *id, Error **errp) if (vs->lsock < 0) { goto fail; } - vs->is_unix = saddr->kind == SOCKET_ADDRESS_KIND_UNIX; + vs->is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX; if (vs->ws_enabled) { vs->lwebsock = socket_listen(wsaddr, errp); if (vs->lwebsock < 0) { diff --git a/util/Makefile.objs b/util/Makefile.objs index d7cc39907f..89dd80ef86 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -29,3 +29,4 @@ util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o util-obj-y += qemu-coroutine-sleep.o util-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o util-obj-y += buffer.o +util-obj-y += timed-average.o diff --git a/util/buffer.c b/util/buffer.c index cedd055680..8b27c08aac 100644 --- a/util/buffer.c +++ b/util/buffer.c @@ -19,12 +19,77 @@ */ #include "qemu/buffer.h" +#include "trace.h" + +#define BUFFER_MIN_INIT_SIZE 4096 +#define BUFFER_MIN_SHRINK_SIZE 65536 + +/* define the factor alpha for the expentional smoothing + * that is used in the average size calculation. a shift + * of 7 results in an alpha of 1/2^7. */ +#define BUFFER_AVG_SIZE_SHIFT 7 + +static size_t buffer_req_size(Buffer *buffer, size_t len) +{ + return MAX(BUFFER_MIN_INIT_SIZE, + pow2ceil(buffer->offset + len)); +} + +static void buffer_adj_size(Buffer *buffer, size_t len) +{ + size_t old = buffer->capacity; + buffer->capacity = buffer_req_size(buffer, len); + buffer->buffer = g_realloc(buffer->buffer, buffer->capacity); + trace_buffer_resize(buffer->name ?: "unnamed", + old, buffer->capacity); + + /* make it even harder for the buffer to shrink, reset average size + * to currenty capacity if it is larger than the average. */ + buffer->avg_size = MAX(buffer->avg_size, + buffer->capacity << BUFFER_AVG_SIZE_SHIFT); +} + +void buffer_init(Buffer *buffer, const char *name, ...) +{ + va_list ap; + + va_start(ap, name); + buffer->name = g_strdup_vprintf(name, ap); + va_end(ap); +} + +static uint64_t buffer_get_avg_size(Buffer *buffer) +{ + return buffer->avg_size >> BUFFER_AVG_SIZE_SHIFT; +} + +void buffer_shrink(Buffer *buffer) +{ + size_t new; + + /* Calculate the average size of the buffer as + * avg_size = avg_size * ( 1 - a ) + required_size * a + * where a is 1 / 2 ^ BUFFER_AVG_SIZE_SHIFT. */ + buffer->avg_size *= (1 << BUFFER_AVG_SIZE_SHIFT) - 1; + buffer->avg_size >>= BUFFER_AVG_SIZE_SHIFT; + buffer->avg_size += buffer_req_size(buffer, 0); + + /* And then only shrink if the average size of the buffer is much + * too big, to avoid bumping up & down the buffers all the time. + * realloc() isn't exactly cheap ... */ + new = buffer_req_size(buffer, buffer_get_avg_size(buffer)); + if (new < buffer->capacity >> 3 && + new >= BUFFER_MIN_SHRINK_SIZE) { + buffer_adj_size(buffer, buffer_get_avg_size(buffer)); + } + + buffer_adj_size(buffer, 0); +} void buffer_reserve(Buffer *buffer, size_t len) { if ((buffer->capacity - buffer->offset) < len) { - buffer->capacity += (len + 1024); - buffer->buffer = g_realloc(buffer->buffer, buffer->capacity); + buffer_adj_size(buffer, len); } } @@ -41,14 +106,18 @@ uint8_t *buffer_end(Buffer *buffer) void buffer_reset(Buffer *buffer) { buffer->offset = 0; + buffer_shrink(buffer); } void buffer_free(Buffer *buffer) { + trace_buffer_free(buffer->name ?: "unnamed", buffer->capacity); g_free(buffer->buffer); + g_free(buffer->name); buffer->offset = 0; buffer->capacity = 0; buffer->buffer = NULL; + buffer->name = NULL; } void buffer_append(Buffer *buffer, const void *data, size_t len) @@ -62,4 +131,41 @@ void buffer_advance(Buffer *buffer, size_t len) memmove(buffer->buffer, buffer->buffer + len, (buffer->offset - len)); buffer->offset -= len; + buffer_shrink(buffer); +} + +void buffer_move_empty(Buffer *to, Buffer *from) +{ + trace_buffer_move_empty(to->name ?: "unnamed", + from->offset, + from->name ?: "unnamed"); + assert(to->offset == 0); + + g_free(to->buffer); + to->offset = from->offset; + to->capacity = from->capacity; + to->buffer = from->buffer; + + from->offset = 0; + from->capacity = 0; + from->buffer = NULL; +} + +void buffer_move(Buffer *to, Buffer *from) +{ + if (to->offset == 0) { + buffer_move_empty(to, from); + return; + } + + trace_buffer_move(to->name ?: "unnamed", + from->offset, + from->name ?: "unnamed"); + buffer_reserve(to, from->offset); + buffer_append(to, from->buffer, from->offset); + + g_free(from->buffer); + from->offset = 0; + from->capacity = 0; + from->buffer = NULL; } diff --git a/util/error.c b/util/error.c index 8b86490ba1..80c89a2079 100644 --- a/util/error.c +++ b/util/error.c @@ -220,6 +220,13 @@ void error_free(Error *err) } } +void error_free_or_abort(Error **errp) +{ + assert(errp && *errp); + error_free(*errp); + *errp = NULL; +} + void error_propagate(Error **dst_errp, Error *local_err) { if (!local_err) { @@ -29,7 +29,7 @@ bool id_wellformed(const char *id) #define ID_SPECIAL_CHAR '#' -static const char *const id_subsys_str[] = { +static const char *const id_subsys_str[ID_MAX] = { [ID_QDEV] = "qdev", [ID_BLOCK] = "block", }; @@ -53,7 +53,7 @@ char *id_generate(IdSubSystems id) static uint64_t id_counters[ID_MAX]; uint32_t rnd; - assert(id < ID_MAX); + assert(id < ARRAY_SIZE(id_subsys_str)); assert(id_subsys_str[id]); rnd = g_random_int_range(0, 100); diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c index 13942694cc..54793a5dcf 100644 --- a/util/mmap-alloc.c +++ b/util/mmap-alloc.c @@ -14,6 +14,32 @@ #include <sys/mman.h> #include <assert.h> +#define HUGETLBFS_MAGIC 0x958458f6 + +#ifdef CONFIG_LINUX +#include <sys/vfs.h> +#endif + +size_t qemu_fd_getpagesize(int fd) +{ +#ifdef CONFIG_LINUX + struct statfs fs; + int ret; + + if (fd != -1) { + do { + ret = fstatfs(fd, &fs); + } while (ret != 0 && errno == EINTR); + + if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) { + return fs.f_bsize; + } + } +#endif + + return getpagesize(); +} + void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) { /* @@ -21,12 +47,25 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) * space, even if size is already aligned. */ size_t total = size + align; +#if defined(__powerpc64__) && defined(__linux__) + /* On ppc64 mappings in the same segment (aka slice) must share the same + * page size. Since we will be re-allocating part of this segment + * from the supplied fd, we should make sure to use the same page size, + * unless we are using the system page size, in which case anonymous memory + * is OK. Use align as a hint for the page size. + * In this case, set MAP_NORESERVE to avoid allocating backing store memory. + */ + int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd; + int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE; + void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0); +#else void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); +#endif size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; void *ptr1; if (ptr == MAP_FAILED) { - return NULL; + return MAP_FAILED; } /* Make sure align is a power of 2 */ @@ -41,7 +80,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) fd, 0); if (ptr1 == MAP_FAILED) { munmap(ptr, total); - return NULL; + return MAP_FAILED; } ptr += offset; diff --git a/util/osdep.c b/util/osdep.c index 0092bb61b9..534b51147c 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -52,7 +52,14 @@ extern int madvise(caddr_t, size_t, int); static bool fips_enabled = false; -static const char *qemu_version = QEMU_VERSION; +/* Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default + * instead of QEMU_VERSION, so setting hw_version on MachineClass + * is no longer mandatory. + * + * Do NOT change this string, or it will break compatibility on all + * machine classes that don't set hw_version. + */ +static const char *hw_version = "2.5+"; int socket_set_cork(int fd, int v) { @@ -311,14 +318,14 @@ int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen) return ret; } -void qemu_set_version(const char *version) +void qemu_set_hw_version(const char *version) { - qemu_version = version; + hw_version = version; } -const char *qemu_get_version(void) +const char *qemu_hw_version(void) { - return qemu_version; + return hw_version; } void fips_set_state(bool requested) diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 914cef5c2c..d25f6715c7 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -46,7 +46,6 @@ extern int daemon(int, int); #else # define QEMU_VMALLOC_ALIGN getpagesize() #endif -#define HUGETLBFS_MAGIC 0x958458f6 #include <termios.h> #include <unistd.h> @@ -65,7 +64,6 @@ extern int daemon(int, int); #ifdef CONFIG_LINUX #include <sys/syscall.h> -#include <sys/vfs.h> #endif #ifdef __FreeBSD__ @@ -340,26 +338,6 @@ static void sigbus_handler(int signal) siglongjmp(sigjump, 1); } -static size_t fd_getpagesize(int fd) -{ -#ifdef CONFIG_LINUX - struct statfs fs; - int ret; - - if (fd != -1) { - do { - ret = fstatfs(fd, &fs); - } while (ret != 0 && errno == EINTR); - - if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) { - return fs.f_bsize; - } - } -#endif - - return getpagesize(); -} - void os_mem_prealloc(int fd, char *area, size_t memory) { int ret; @@ -387,7 +365,7 @@ void os_mem_prealloc(int fd, char *area, size_t memory) exit(1); } else { int i; - size_t hpagesize = fd_getpagesize(fd); + size_t hpagesize = qemu_fd_getpagesize(fd); size_t numpages = DIV_ROUND_UP(memory, hpagesize); /* MAP_POPULATE silently ignores failures */ diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 09f9e98a40..6a47019dfb 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -454,7 +454,7 @@ gint g_poll(GPollFD *fds, guint nfds, gint timeout) return retval; } -size_t getpagesize(void) +int getpagesize(void) { SYSTEM_INFO system_info; diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 9142917be5..5a31d164d9 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -751,8 +751,7 @@ int unix_listen_opts(QemuOpts *opts, Error **errp) qemu_opt_set(opts, "path", un.sun_path, &error_abort); } - if ((access(un.sun_path, F_OK) == 0) && - unlink(un.sun_path) < 0) { + if (unlink(un.sun_path) < 0 && errno != ENOENT) { error_setg_errno(errp, errno, "Failed to unlink socket %s", un.sun_path); goto err; @@ -918,23 +917,23 @@ SocketAddress *socket_parse(const char *str, Error **errp) error_setg(errp, "invalid Unix socket address"); goto fail; } else { - addr->kind = SOCKET_ADDRESS_KIND_UNIX; - addr->q_unix = g_new(UnixSocketAddress, 1); - addr->q_unix->path = g_strdup(str + 5); + addr->type = SOCKET_ADDRESS_KIND_UNIX; + addr->u.q_unix = g_new(UnixSocketAddress, 1); + addr->u.q_unix->path = g_strdup(str + 5); } } else if (strstart(str, "fd:", NULL)) { if (str[3] == '\0') { error_setg(errp, "invalid file descriptor address"); goto fail; } else { - addr->kind = SOCKET_ADDRESS_KIND_FD; - addr->fd = g_new(String, 1); - addr->fd->str = g_strdup(str + 3); + addr->type = SOCKET_ADDRESS_KIND_FD; + addr->u.fd = g_new(String, 1); + addr->u.fd->str = g_strdup(str + 3); } } else { - addr->kind = SOCKET_ADDRESS_KIND_INET; - addr->inet = inet_parse(str, errp); - if (addr->inet == NULL) { + addr->type = SOCKET_ADDRESS_KIND_INET; + addr->u.inet = inet_parse(str, errp); + if (addr->u.inet == NULL) { goto fail; } } @@ -952,19 +951,19 @@ int socket_connect(SocketAddress *addr, Error **errp, int fd; opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); - switch (addr->kind) { + switch (addr->type) { case SOCKET_ADDRESS_KIND_INET: - inet_addr_to_opts(opts, addr->inet); + inet_addr_to_opts(opts, addr->u.inet); fd = inet_connect_opts(opts, errp, callback, opaque); break; case SOCKET_ADDRESS_KIND_UNIX: - qemu_opt_set(opts, "path", addr->q_unix->path, &error_abort); + qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort); fd = unix_connect_opts(opts, errp, callback, opaque); break; case SOCKET_ADDRESS_KIND_FD: - fd = monitor_get_fd(cur_mon, addr->fd->str, errp); + fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp); if (fd >= 0 && callback) { qemu_set_nonblock(fd); callback(fd, NULL, opaque); @@ -984,19 +983,19 @@ int socket_listen(SocketAddress *addr, Error **errp) int fd; opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); - switch (addr->kind) { + switch (addr->type) { case SOCKET_ADDRESS_KIND_INET: - inet_addr_to_opts(opts, addr->inet); + inet_addr_to_opts(opts, addr->u.inet); fd = inet_listen_opts(opts, 0, errp); break; case SOCKET_ADDRESS_KIND_UNIX: - qemu_opt_set(opts, "path", addr->q_unix->path, &error_abort); + qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort); fd = unix_listen_opts(opts, errp); break; case SOCKET_ADDRESS_KIND_FD: - fd = monitor_get_fd(cur_mon, addr->fd->str, errp); + fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp); break; default: @@ -1012,12 +1011,12 @@ int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp) int fd; opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); - switch (remote->kind) { + switch (remote->type) { case SOCKET_ADDRESS_KIND_INET: - inet_addr_to_opts(opts, remote->inet); + inet_addr_to_opts(opts, remote->u.inet); if (local) { - qemu_opt_set(opts, "localaddr", local->inet->host, &error_abort); - qemu_opt_set(opts, "localport", local->inet->port, &error_abort); + qemu_opt_set(opts, "localaddr", local->u.inet->host, &error_abort); + qemu_opt_set(opts, "localport", local->u.inet->port, &error_abort); } fd = inet_dgram_opts(opts, errp); break; @@ -1052,14 +1051,14 @@ socket_sockaddr_to_address_inet(struct sockaddr_storage *sa, } addr = g_new0(SocketAddress, 1); - addr->kind = SOCKET_ADDRESS_KIND_INET; - addr->inet = g_new0(InetSocketAddress, 1); - addr->inet->host = g_strdup(host); - addr->inet->port = g_strdup(serv); + addr->type = SOCKET_ADDRESS_KIND_INET; + addr->u.inet = g_new0(InetSocketAddress, 1); + addr->u.inet->host = g_strdup(host); + addr->u.inet->port = g_strdup(serv); if (sa->ss_family == AF_INET) { - addr->inet->has_ipv4 = addr->inet->ipv4 = true; + addr->u.inet->has_ipv4 = addr->u.inet->ipv4 = true; } else { - addr->inet->has_ipv6 = addr->inet->ipv6 = true; + addr->u.inet->has_ipv6 = addr->u.inet->ipv6 = true; } return addr; @@ -1076,11 +1075,11 @@ socket_sockaddr_to_address_unix(struct sockaddr_storage *sa, struct sockaddr_un *su = (struct sockaddr_un *)sa; addr = g_new0(SocketAddress, 1); - addr->kind = SOCKET_ADDRESS_KIND_UNIX; - addr->q_unix = g_new0(UnixSocketAddress, 1); + addr->type = SOCKET_ADDRESS_KIND_UNIX; + addr->u.q_unix = g_new0(UnixSocketAddress, 1); if (su->sun_path[0]) { - addr->q_unix->path = g_strndup(su->sun_path, - sizeof(su->sun_path)); + addr->u.q_unix->path = g_strndup(su->sun_path, + sizeof(su->sun_path)); } return addr; diff --git a/util/timed-average.c b/util/timed-average.c new file mode 100644 index 0000000000..a2dfb4834d --- /dev/null +++ b/util/timed-average.c @@ -0,0 +1,231 @@ +/* + * QEMU timed average computation + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> + * + * This program is free sofware: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Sofware Foundation, either version 2 of the License, or + * (at your option) version 3 or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <string.h> + +#include "qemu/timed-average.h" + +/* This module computes an average of a set of values within a time + * window. + * + * Algorithm: + * + * - Create two windows with a certain expiration period, and + * offsetted by period / 2. + * - Each time you want to account a new value, do it in both windows. + * - The minimum / maximum / average values are always returned from + * the oldest window. + * + * Example: + * + * t=0 |t=0.5 |t=1 |t=1.5 |t=2 + * wnd0: [0,0.5)|wnd0: [0.5,1.5) | |wnd0: [1.5,2.5) | + * wnd1: [0,1) | |wnd1: [1,2) | | + * + * Values are returned from: + * + * wnd0---------|wnd1------------|wnd0---------|wnd1-------------| + */ + +/* Update the expiration of a time window + * + * @w: the window used + * @now: the current time in nanoseconds + * @period: the expiration period in nanoseconds + */ +static void update_expiration(TimedAverageWindow *w, int64_t now, + int64_t period) +{ + /* time elapsed since the last theoretical expiration */ + int64_t elapsed = (now - w->expiration) % period; + /* time remaininging until the next expiration */ + int64_t remaining = period - elapsed; + /* compute expiration */ + w->expiration = now + remaining; +} + +/* Reset a window + * + * @w: the window to reset + */ +static void window_reset(TimedAverageWindow *w) +{ + w->min = UINT64_MAX; + w->max = 0; + w->sum = 0; + w->count = 0; +} + +/* Get the current window (that is, the one with the earliest + * expiration time). + * + * @ta: the TimedAverage structure + * @ret: a pointer to the current window + */ +static TimedAverageWindow *current_window(TimedAverage *ta) +{ + return &ta->windows[ta->current]; +} + +/* Initialize a TimedAverage structure + * + * @ta: the TimedAverage structure + * @clock_type: the type of clock to use + * @period: the time window period in nanoseconds + */ +void timed_average_init(TimedAverage *ta, QEMUClockType clock_type, + uint64_t period) +{ + int64_t now = qemu_clock_get_ns(clock_type); + + /* Returned values are from the oldest window, so they belong to + * the interval [ta->period/2,ta->period). By adjusting the + * requested period by 4/3, we guarantee that they're in the + * interval [2/3 period,4/3 period), closer to the requested + * period on average */ + ta->period = (uint64_t) period * 4 / 3; + ta->clock_type = clock_type; + ta->current = 0; + + window_reset(&ta->windows[0]); + window_reset(&ta->windows[1]); + + /* Both windows are offsetted by half a period */ + ta->windows[0].expiration = now + ta->period / 2; + ta->windows[1].expiration = now + ta->period; +} + +/* Check if the time windows have expired, updating their counters and + * expiration time if that's the case. + * + * @ta: the TimedAverage structure + * @elapsed: if non-NULL, the elapsed time (in ns) within the current + * window will be stored here + */ +static void check_expirations(TimedAverage *ta, uint64_t *elapsed) +{ + int64_t now = qemu_clock_get_ns(ta->clock_type); + int i; + + assert(ta->period != 0); + + /* Check if the windows have expired */ + for (i = 0; i < 2; i++) { + TimedAverageWindow *w = &ta->windows[i]; + if (w->expiration <= now) { + window_reset(w); + update_expiration(w, now, ta->period); + } + } + + /* Make ta->current point to the oldest window */ + if (ta->windows[0].expiration < ta->windows[1].expiration) { + ta->current = 0; + } else { + ta->current = 1; + } + + /* Calculate the elapsed time within the current window */ + if (elapsed) { + int64_t remaining = ta->windows[ta->current].expiration - now; + *elapsed = ta->period - remaining; + } +} + +/* Account a value + * + * @ta: the TimedAverage structure + * @value: the value to account + */ +void timed_average_account(TimedAverage *ta, uint64_t value) +{ + int i; + check_expirations(ta, NULL); + + /* Do the accounting in both windows at the same time */ + for (i = 0; i < 2; i++) { + TimedAverageWindow *w = &ta->windows[i]; + + w->sum += value; + w->count++; + + if (value < w->min) { + w->min = value; + } + + if (value > w->max) { + w->max = value; + } + } +} + +/* Get the minimum value + * + * @ta: the TimedAverage structure + * @ret: the minimum value + */ +uint64_t timed_average_min(TimedAverage *ta) +{ + TimedAverageWindow *w; + check_expirations(ta, NULL); + w = current_window(ta); + return w->min < UINT64_MAX ? w->min : 0; +} + +/* Get the average value + * + * @ta: the TimedAverage structure + * @ret: the average value + */ +uint64_t timed_average_avg(TimedAverage *ta) +{ + TimedAverageWindow *w; + check_expirations(ta, NULL); + w = current_window(ta); + return w->count > 0 ? w->sum / w->count : 0; +} + +/* Get the maximum value + * + * @ta: the TimedAverage structure + * @ret: the maximum value + */ +uint64_t timed_average_max(TimedAverage *ta) +{ + check_expirations(ta, NULL); + return current_window(ta)->max; +} + +/* Get the sum of all accounted values + * @ta: the TimedAverage structure + * @elapsed: if non-NULL, the elapsed time (in ns) will be stored here + * @ret: the sum of all accounted values + */ +uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed) +{ + TimedAverageWindow *w; + check_expirations(ta, elapsed); + w = current_window(ta); + return w->sum; +} @@ -122,6 +122,8 @@ int main(int argc, char **argv) #include "qapi-event.h" #include "exec/semihost.h" #include "crypto/init.h" +#include "sysemu/replay.h" +#include "qapi/qmp/qerror.h" #define MAX_VIRTIO_CONSOLES 1 #define MAX_SCLP_CONSOLES 1 @@ -474,6 +476,12 @@ static QemuOptsList qemu_icount_opts = { }, { .name = "sleep", .type = QEMU_OPT_BOOL, + }, { + .name = "rr", + .type = QEMU_OPT_STRING, + }, { + .name = "rrfile", + .type = QEMU_OPT_STRING, }, { /* end of list */ } }, @@ -674,9 +682,9 @@ void runstate_set(RunState new_state) assert(new_state < RUN_STATE_MAX); if (!runstate_valid_transitions[current_run_state][new_state]) { - fprintf(stderr, "ERROR: invalid runstate transition: '%s' -> '%s'\n", - RunState_lookup[current_run_state], - RunState_lookup[new_state]); + error_report("invalid runstate transition: '%s' -> '%s'", + RunState_lookup[current_run_state], + RunState_lookup[new_state]); abort(); } trace_runstate_set(new_state); @@ -828,8 +836,9 @@ static void configure_rtc_date_offset(const char *startdate, int legacy) rtc_start_date = mktimegm(&tm); if (rtc_start_date == -1) { date_fail: - fprintf(stderr, "Invalid date format. Valid formats are:\n" - "'2006-06-17T16:01:21' or '2006-06-17'\n"); + error_report("invalid date format"); + error_printf("valid formats: " + "'2006-06-17T16:01:21' or '2006-06-17'\n"); exit(1); } rtc_date_offset = qemu_time() - rtc_start_date; @@ -845,7 +854,11 @@ static void configure_rtc(QemuOpts *opts) if (!strcmp(value, "utc")) { rtc_utc = 1; } else if (!strcmp(value, "localtime")) { + Error *blocker = NULL; rtc_utc = 0; + error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, + "-rtc base=localtime"); + replay_add_blocker(blocker); } else { configure_rtc_date_offset(value, 0); } @@ -859,7 +872,7 @@ static void configure_rtc(QemuOpts *opts) } else if (!strcmp(value, "vm")) { rtc_clock = QEMU_CLOCK_VIRTUAL; } else { - fprintf(stderr, "qemu: invalid option value '%s'\n", value); + error_report("invalid option value '%s'", value); exit(1); } } @@ -879,7 +892,7 @@ static void configure_rtc(QemuOpts *opts) } else if (!strcmp(value, "none")) { /* discard is default */ } else { - fprintf(stderr, "qemu: invalid option value '%s'\n", value); + error_report("invalid option value '%s'", value); exit(1); } } @@ -905,7 +918,7 @@ static int bt_hci_parse(const char *str) bdaddr_t bdaddr; if (nb_hcis >= MAX_NICS) { - fprintf(stderr, "qemu: Too many bluetooth HCIs (max %i).\n", MAX_NICS); + error_report("too many bluetooth HCIs (max %i)", MAX_NICS); return -1; } @@ -931,8 +944,8 @@ static void bt_vhci_add(int vlan_id) struct bt_scatternet_s *vlan = qemu_find_bt_vlan(vlan_id); if (!vlan->slave) - fprintf(stderr, "qemu: warning: adding a VHCI to " - "an empty scatternet %i\n", vlan_id); + error_report("warning: adding a VHCI to an empty scatternet %i", + vlan_id); bt_vhci_init(bt_new_hci(vlan)); } @@ -950,7 +963,7 @@ static struct bt_device_s *bt_device_add(const char *opt) if (endp) { vlan_id = strtol(endp + 6, &endp, 0); if (*endp) { - fprintf(stderr, "qemu: unrecognised bluetooth vlan Id\n"); + error_report("unrecognised bluetooth vlan Id"); return 0; } } @@ -958,13 +971,13 @@ static struct bt_device_s *bt_device_add(const char *opt) vlan = qemu_find_bt_vlan(vlan_id); if (!vlan->slave) - fprintf(stderr, "qemu: warning: adding a slave device to " - "an empty scatternet %i\n", vlan_id); + error_report("warning: adding a slave device to an empty scatternet %i", + vlan_id); if (!strcmp(devname, "keyboard")) return bt_keyboard_init(vlan); - fprintf(stderr, "qemu: unsupported bluetooth device `%s'\n", devname); + error_report("unsupported bluetooth device '%s'", devname); return 0; } @@ -987,11 +1000,11 @@ static int bt_parse(const char *opt) if (strstart(endp, ",vlan=", &p)) { vlan = strtol(p, (char **) &endp, 0); if (*endp) { - fprintf(stderr, "qemu: bad scatternet '%s'\n", p); + error_report("bad scatternet '%s'", p); return 1; } } else { - fprintf(stderr, "qemu: bad parameter '%s'\n", endp + 1); + error_report("bad parameter '%s'", endp + 1); return 1; } } else @@ -1003,7 +1016,7 @@ static int bt_parse(const char *opt) } else if (strstart(opt, "device:", &endp)) return !bt_device_add(endp); - fprintf(stderr, "qemu: bad bluetooth parameter '%s'\n", opt); + error_report("bad bluetooth parameter '%s'", opt); return 1; } @@ -1018,8 +1031,7 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) return -1; } #else - error_report("sandboxing request but seccomp is not compiled " - "into this build"); + error_report("seccomp support is disabled"); return -1; #endif } @@ -1100,7 +1112,7 @@ static int parse_add_fd(void *opaque, QemuOpts *opts, Error **errp) } #endif if (dupfd == -1) { - error_report("Error duplicating fd: %s", strerror(errno)); + error_report("error duplicating fd: %s", strerror(errno)); return -1; } @@ -1220,18 +1232,19 @@ static void smp_parse(QemuOpts *opts) } else if (threads == 0) { threads = cpus / (cores * sockets); } else if (sockets * cores * threads < cpus) { - fprintf(stderr, "cpu topology: error: " - "sockets (%u) * cores (%u) * threads (%u) < " - "smp_cpus (%u)\n", - sockets, cores, threads, cpus); + error_report("cpu topology: " + "sockets (%u) * cores (%u) * threads (%u) < " + "smp_cpus (%u)", + sockets, cores, threads, cpus); exit(1); } max_cpus = qemu_opt_get_number(opts, "maxcpus", cpus); if (sockets * cores * threads > max_cpus) { - fprintf(stderr, "cpu topology: error: " - "sockets (%u) * cores (%u) * threads (%u) > maxcpus (%u)\n", - sockets, cores, threads, max_cpus); + error_report("cpu topology: " + "sockets (%u) * cores (%u) * threads (%u) > " + "maxcpus (%u)", + sockets, cores, threads, max_cpus); exit(1); } @@ -1246,21 +1259,26 @@ static void smp_parse(QemuOpts *opts) } if (max_cpus > MAX_CPUMASK_BITS) { - fprintf(stderr, "Unsupported number of maxcpus\n"); + error_report("unsupported number of maxcpus"); exit(1); } if (max_cpus < smp_cpus) { - fprintf(stderr, "maxcpus must be equal to or greater than smp\n"); + error_report("maxcpus must be equal to or greater than smp"); exit(1); } + if (smp_cpus > 1 || smp_cores > 1 || smp_threads > 1) { + Error *blocker = NULL; + error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); + replay_add_blocker(blocker); + } } static void realtime_init(void) { if (enable_mlock) { if (os_mlock() < 0) { - fprintf(stderr, "qemu: locking memory failed\n"); + error_report("locking memory failed"); exit(1); } } @@ -1414,7 +1432,7 @@ static int usb_parse(const char *cmdline) int r; r = usb_device_add(cmdline); if (r < 0) { - fprintf(stderr, "qemu: could not add USB device '%s'\n", cmdline); + error_report("could not add USB device '%s'", cmdline); } return r; } @@ -1518,12 +1536,14 @@ MachineInfoList *qmp_query_machines(Error **errp) static int machine_help_func(QemuOpts *opts, MachineState *machine) { ObjectProperty *prop; + ObjectPropertyIterator *iter; if (!qemu_opt_has_help_opt(opts)) { return 0; } - QTAILQ_FOREACH(prop, &OBJECT(machine)->properties, node) { + iter = object_property_iter_init(OBJECT(machine)); + while ((prop = object_property_iter_next(iter))) { if (!prop->set) { continue; } @@ -1536,6 +1556,7 @@ static int machine_help_func(QemuOpts *opts, MachineState *machine) error_printf("\n"); } } + object_property_iter_free(iter); return 1; } @@ -1624,14 +1645,14 @@ static int qemu_shutdown_requested(void) static void qemu_kill_report(void) { if (!qtest_driver() && shutdown_signal != -1) { - fprintf(stderr, "qemu: terminating on signal %d", shutdown_signal); if (shutdown_pid == 0) { /* This happens for eg ^C at the terminal, so it's worth * avoiding printing an odd message in that case. */ - fputc('\n', stderr); + error_report("terminating on signal %d", shutdown_signal); } else { - fprintf(stderr, " from pid " FMT_pid "\n", shutdown_pid); + error_report("terminating on signal %d from pid " FMT_pid, + shutdown_signal, shutdown_pid); } shutdown_signal = -1; } @@ -1640,15 +1661,21 @@ static void qemu_kill_report(void) static int qemu_reset_requested(void) { int r = reset_requested; - reset_requested = 0; - return r; + if (r && replay_checkpoint(CHECKPOINT_RESET_REQUESTED)) { + reset_requested = 0; + return r; + } + return false; } static int qemu_suspend_requested(void) { int r = suspend_requested; - suspend_requested = 0; - return r; + if (r && replay_checkpoint(CHECKPOINT_SUSPEND_REQUESTED)) { + suspend_requested = 0; + return r; + } + return false; } static WakeupReason qemu_wakeup_requested(void) @@ -1796,12 +1823,18 @@ void qemu_system_killed(int signal, pid_t pid) shutdown_signal = signal; shutdown_pid = pid; no_shutdown = 0; - qemu_system_shutdown_request(); + + /* Cannot call qemu_system_shutdown_request directly because + * we are in a signal handler. + */ + shutdown_requested = 1; + qemu_notify_event(); } void qemu_system_shutdown_request(void) { trace_qemu_system_shutdown_request(); + replay_shutdown_request(); shutdown_requested = 1; qemu_notify_event(); } @@ -1980,28 +2013,28 @@ static void select_vgahw (const char *p) if (vga_available()) { vga_interface_type = VGA_STD; } else { - fprintf(stderr, "Error: standard VGA not available\n"); + error_report("standard VGA not available"); exit(0); } } else if (strstart(p, "cirrus", &opts)) { if (cirrus_vga_available()) { vga_interface_type = VGA_CIRRUS; } else { - fprintf(stderr, "Error: Cirrus VGA not available\n"); + error_report("Cirrus VGA not available"); exit(0); } } else if (strstart(p, "vmware", &opts)) { if (vmware_vga_available()) { vga_interface_type = VGA_VMWARE; } else { - fprintf(stderr, "Error: VMWare SVGA not available\n"); + error_report("VMWare SVGA not available"); exit(0); } } else if (strstart(p, "virtio", &opts)) { if (virtio_vga_available()) { vga_interface_type = VGA_VIRTIO; } else { - fprintf(stderr, "Error: Virtio VGA not available\n"); + error_report("Virtio VGA not available"); exit(0); } } else if (strstart(p, "xenfb", &opts)) { @@ -2010,26 +2043,26 @@ static void select_vgahw (const char *p) if (qxl_vga_available()) { vga_interface_type = VGA_QXL; } else { - fprintf(stderr, "Error: QXL VGA not available\n"); + error_report("QXL VGA not available"); exit(0); } } else if (strstart(p, "tcx", &opts)) { if (tcx_vga_available()) { vga_interface_type = VGA_TCX; } else { - fprintf(stderr, "Error: TCX framebuffer not available\n"); + error_report("TCX framebuffer not available"); exit(0); } } else if (strstart(p, "cg3", &opts)) { if (cg3_vga_available()) { vga_interface_type = VGA_CG3; } else { - fprintf(stderr, "Error: CG3 framebuffer not available\n"); + error_report("CG3 framebuffer not available"); exit(0); } } else if (!strstart(p, "none", &opts)) { invalid_vga: - fprintf(stderr, "Unknown vga type: %s\n", p); + error_report("unknown vga type: %s", p); exit(1); } while (*opts) { @@ -2105,7 +2138,7 @@ static DisplayType select_display(const char *p) } } else { invalid_sdl_args: - error_report("Invalid SDL option string"); + error_report("invalid SDL option string"); exit(1); } opts = nextopt; @@ -2134,7 +2167,7 @@ static DisplayType select_display(const char *p) #ifdef CONFIG_CURSES display = DT_CURSES; #else - error_report("Curses support is disabled"); + error_report("curses support is disabled"); exit(1); #endif } else if (strstart(p, "gtk", &opts)) { @@ -2163,7 +2196,7 @@ static DisplayType select_display(const char *p) } } else { invalid_gtk_args: - error_report("Invalid GTK option string"); + error_report("invalid GTK option string"); exit(1); } opts = nextopt; @@ -2175,7 +2208,7 @@ static DisplayType select_display(const char *p) } else if (strstart(p, "none", &opts)) { display = DT_NONE; } else { - error_report("Unknown display type"); + error_report("unknown display type"); exit(1); } @@ -2276,8 +2309,8 @@ static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp) return -1; } if (strncmp(name, "opt/", 4) != 0) { - error_report("WARNING: externally provided fw_cfg item names " - "should be prefixed with \"opt/\"!"); + error_report("warning: externally provided fw_cfg item names " + "should be prefixed with \"opt/\""); } if (nonempty_str(str)) { size = strlen(str); /* NUL terminator NOT included in fw_cfg blob */ @@ -2349,7 +2382,7 @@ static int mon_init_func(void *opaque, QemuOpts *opts, Error **errp) } else if (strcmp(mode, "control") == 0) { flags = MONITOR_USE_CONTROL; } else { - fprintf(stderr, "unknown monitor mode \"%s\"\n", mode); + error_report("unknown monitor mode \"%s\"", mode); exit(1); } @@ -2362,7 +2395,7 @@ static int mon_init_func(void *opaque, QemuOpts *opts, Error **errp) chardev = qemu_opt_get(opts, "chardev"); chr = qemu_chr_find(chardev); if (chr == NULL) { - fprintf(stderr, "chardev \"%s\" not found\n", chardev); + error_report("chardev \"%s\" not found", chardev); exit(1); } @@ -2390,7 +2423,7 @@ static void monitor_parse(const char *optarg, const char *mode, bool pretty) } opts = qemu_chr_parse_compat(label, optarg); if (!opts) { - fprintf(stderr, "parse error: %s\n", optarg); + error_report("parse error: %s", optarg); exit(1); } } @@ -2464,14 +2497,14 @@ static int serial_parse(const char *devname) if (strcmp(devname, "none") == 0) return 0; if (index == MAX_SERIAL_PORTS) { - fprintf(stderr, "qemu: too many serial ports\n"); + error_report("too many serial ports"); exit(1); } snprintf(label, sizeof(label), "serial%d", index); serial_hds[index] = qemu_chr_new(label, devname, NULL); if (!serial_hds[index]) { - fprintf(stderr, "qemu: could not connect serial device" - " to character backend '%s'\n", devname); + error_report("could not connect serial device" + " to character backend '%s'", devname); return -1; } index++; @@ -2486,14 +2519,14 @@ static int parallel_parse(const char *devname) if (strcmp(devname, "none") == 0) return 0; if (index == MAX_PARALLEL_PORTS) { - fprintf(stderr, "qemu: too many parallel ports\n"); + error_report("too many parallel ports"); exit(1); } snprintf(label, sizeof(label), "parallel%d", index); parallel_hds[index] = qemu_chr_new(label, devname, NULL); if (!parallel_hds[index]) { - fprintf(stderr, "qemu: could not connect parallel device" - " to character backend '%s'\n", devname); + error_report("could not connect parallel device" + " to character backend '%s'", devname); return -1; } index++; @@ -2510,7 +2543,7 @@ static int virtcon_parse(const char *devname) if (strcmp(devname, "none") == 0) return 0; if (index == MAX_VIRTIO_CONSOLES) { - fprintf(stderr, "qemu: too many virtio consoles\n"); + error_report("too many virtio consoles"); exit(1); } @@ -2527,8 +2560,8 @@ static int virtcon_parse(const char *devname) snprintf(label, sizeof(label), "virtcon%d", index); virtcon_hds[index] = qemu_chr_new(label, devname, NULL); if (!virtcon_hds[index]) { - fprintf(stderr, "qemu: could not connect virtio console" - " to character backend '%s'\n", devname); + error_report("could not connect virtio console" + " to character backend '%s'", devname); return -1; } qemu_opt_set(dev_opts, "chardev", label, &error_abort); @@ -2548,7 +2581,7 @@ static int sclp_parse(const char *devname) return 0; } if (index == MAX_SCLP_CONSOLES) { - fprintf(stderr, "qemu: too many sclp consoles\n"); + error_report("too many sclp consoles"); exit(1); } @@ -2560,8 +2593,8 @@ static int sclp_parse(const char *devname) snprintf(label, sizeof(label), "sclpcon%d", index); sclp_hds[index] = qemu_chr_new(label, devname, NULL); if (!sclp_hds[index]) { - fprintf(stderr, "qemu: could not connect sclp console" - " to character backend '%s'\n", devname); + error_report("could not connect sclp console" + " to character backend '%s'", devname); return -1; } qemu_opt_set(dev_opts, "chardev", label, &error_abort); @@ -2579,7 +2612,7 @@ static int debugcon_parse(const char *devname) } opts = qemu_opts_create(qemu_find_opts("device"), "debugcon", 1, NULL); if (!opts) { - fprintf(stderr, "qemu: already have a debugcon device\n"); + error_report("already have a debugcon device"); exit(1); } qemu_opt_set(opts, "driver", "isa-debugcon", &error_abort); @@ -2634,8 +2667,8 @@ static gint machine_class_cmp(gconstpointer a, gconstpointer b) return mc; } if (name && !is_help_option(name)) { - error_report("Unsupported machine type"); - error_printf("Use -machine help to list supported machines!\n"); + error_report("unsupported machine type"); + error_printf("Use -machine help to list supported machines\n"); } else { printf("Supported machines are:\n"); machines = g_slist_sort(machines, machine_class_cmp); @@ -3010,8 +3043,7 @@ int main(int argc, char **argv, char **envp) runstate_init(); if (qcrypto_init(&err) < 0) { - fprintf(stderr, "Cannot initialize crypto: %s\n", - error_get_pretty(err)); + error_report("cannot initialize crypto: %s", error_get_pretty(err)); exit(1); } rtc_clock = QEMU_CLOCK_HOST; @@ -3169,7 +3201,7 @@ int main(int argc, char **argv, char **envp) } } else if (*p != '\0') { chs_fail: - fprintf(stderr, "qemu: invalid physical CHS format\n"); + error_report("invalid physical CHS format"); exit(1); } if (hda_opts != NULL) { @@ -3212,7 +3244,7 @@ int main(int argc, char **argv, char **envp) #ifdef CONFIG_CURSES display_type = DT_CURSES; #else - fprintf(stderr, "Curses support is disabled\n"); + error_report("curses support is disabled"); exit(1); #endif break; @@ -3223,8 +3255,7 @@ int main(int argc, char **argv, char **envp) graphic_rotate = strtol(optarg, (char **) &optarg, 10); if (graphic_rotate != 0 && graphic_rotate != 90 && graphic_rotate != 180 && graphic_rotate != 270) { - fprintf(stderr, - "qemu: only 90, 180, 270 deg rotation is available\n"); + error_report("only 90, 180, 270 deg rotation is available"); exit(1); } break; @@ -3375,7 +3406,7 @@ int main(int argc, char **argv, char **envp) w = strtol(p, (char **)&p, 10); if (w <= 0) { graphic_error: - fprintf(stderr, "qemu: invalid resolution or depth\n"); + error_report("invalid resolution or depth"); exit(1); } if (*p != 'x') @@ -3441,7 +3472,7 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_fsdev: olist = qemu_find_opts("fsdev"); if (!olist) { - fprintf(stderr, "fsdev is not supported by this qemu build.\n"); + error_report("fsdev support is disabled"); exit(1); } opts = qemu_opts_parse_noisily(olist, optarg, true); @@ -3456,7 +3487,7 @@ int main(int argc, char **argv, char **envp) olist = qemu_find_opts("virtfs"); if (!olist) { - fprintf(stderr, "virtfs is not supported by this qemu build.\n"); + error_report("virtfs support is disabled"); exit(1); } opts = qemu_opts_parse_noisily(olist, optarg, true); @@ -3466,15 +3497,15 @@ int main(int argc, char **argv, char **envp) if (qemu_opt_get(opts, "fsdriver") == NULL || qemu_opt_get(opts, "mount_tag") == NULL) { - fprintf(stderr, "Usage: -virtfs fsdriver,mount_tag=tag.\n"); + error_report("Usage: -virtfs fsdriver,mount_tag=tag"); exit(1); } fsdev = qemu_opts_create(qemu_find_opts("fsdev"), qemu_opt_get(opts, "mount_tag"), 1, NULL); if (!fsdev) { - fprintf(stderr, "duplicate fsdev id: %s\n", - qemu_opt_get(opts, "mount_tag")); + error_report("duplicate fsdev id: %s", + qemu_opt_get(opts, "mount_tag")); exit(1); } @@ -3483,8 +3514,8 @@ int main(int argc, char **argv, char **envp) #ifdef CONFIG_SYNC_FILE_RANGE qemu_opt_set(fsdev, "writeout", writeout, &error_abort); #else - fprintf(stderr, "writeout=immediate not supported on " - "this platform\n"); + error_report("writeout=immediate not supported " + "on this platform"); exit(1); #endif } @@ -3523,7 +3554,7 @@ int main(int argc, char **argv, char **envp) fsdev = qemu_opts_create(qemu_find_opts("fsdev"), "v_synth", 1, NULL); if (!fsdev) { - fprintf(stderr, "duplicate option: %s\n", "virtfs_synth"); + error_report("duplicate option: %s", "virtfs_synth"); exit(1); } qemu_opt_set(fsdev, "fsdriver", "synth", &error_abort); @@ -3544,15 +3575,14 @@ int main(int argc, char **argv, char **envp) break; case QEMU_OPTION_watchdog: if (watchdog) { - fprintf(stderr, - "qemu: only one watchdog option may be given\n"); + error_report("only one watchdog option may be given"); return 1; } watchdog = optarg; break; case QEMU_OPTION_watchdog_action: if (select_watchdog_action(optarg) == -1) { - fprintf(stderr, "Unknown -watchdog-action parameter\n"); + error_report("unknown -watchdog-action parameter"); exit(1); } break; @@ -3596,7 +3626,7 @@ int main(int argc, char **argv, char **envp) display_type = DT_SDL; break; #else - fprintf(stderr, "SDL support is disabled\n"); + error_report("SDL support is disabled"); exit(1); #endif case QEMU_OPTION_pidfile: @@ -3658,8 +3688,7 @@ int main(int argc, char **argv, char **envp) qemu_opts_parse_noisily(olist, "accel=tcg", false); break; case QEMU_OPTION_no_kvm_pit: { - fprintf(stderr, "Warning: KVM PIT can no longer be disabled " - "separately.\n"); + error_report("warning: ignoring deprecated option"); break; } case QEMU_OPTION_no_kvm_pit_reinjection: { @@ -3672,8 +3701,8 @@ int main(int argc, char **argv, char **envp) { /* end of list */ } }; - fprintf(stderr, "Warning: option deprecated, use " - "lost_tick_policy property of kvm-pit instead.\n"); + error_report("warning: deprecated, replaced by " + "-global kvm-pit.lost_tick_policy=discard"); qdev_prop_register_global_list(kvm_pit_lost_tick_policy); break; } @@ -3708,7 +3737,7 @@ int main(int argc, char **argv, char **envp) exit(1); } #else - fprintf(stderr, "VNC support is disabled\n"); + error_report("VNC support is disabled"); exit(1); #endif break; @@ -3721,7 +3750,7 @@ int main(int argc, char **argv, char **envp) break; case QEMU_OPTION_balloon: if (balloon_parse(optarg) < 0) { - fprintf(stderr, "Unknown -balloon argument %s\n", optarg); + error_report("unknown -balloon argument %s", optarg); exit(1); } break; @@ -3736,15 +3765,14 @@ int main(int argc, char **argv, char **envp) break; case QEMU_OPTION_uuid: if(qemu_uuid_parse(optarg, qemu_uuid) < 0) { - fprintf(stderr, "Fail to parse UUID string." - " Wrong format.\n"); + error_report("failed to parse UUID string: wrong format"); exit(1); } qemu_uuid_set = true; break; case QEMU_OPTION_option_rom: if (nb_option_roms >= MAX_OPTION_ROMS) { - fprintf(stderr, "Too many option ROMs\n"); + error_report("too many option ROMs"); exit(1); } opts = qemu_opts_parse_noisily(qemu_find_opts("option-rom"), @@ -3756,7 +3784,7 @@ int main(int argc, char **argv, char **envp) option_rom[nb_option_roms].bootindex = qemu_opt_get_number(opts, "bootindex", -1); if (!option_rom[nb_option_roms].name) { - fprintf(stderr, "Option ROM file is not specified\n"); + error_report("Option ROM file is not specified"); exit(1); } nb_option_roms++; @@ -3781,9 +3809,8 @@ int main(int argc, char **argv, char **envp) } else if (strcmp("auto", target) == 0) { semihosting.target = SEMIHOSTING_TARGET_AUTO; } else { - fprintf(stderr, "Unsupported semihosting-config" - " %s\n", - optarg); + error_report("unsupported semihosting-config %s", + optarg); exit(1); } } else { @@ -3793,14 +3820,12 @@ int main(int argc, char **argv, char **envp) qemu_opt_foreach(opts, add_semihosting_arg, &semihosting, NULL); } else { - fprintf(stderr, "Unsupported semihosting-config %s\n", - optarg); + error_report("unsupported semihosting-config %s", optarg); exit(1); } break; case QEMU_OPTION_tdf: - fprintf(stderr, "Warning: user space PIT time drift fix " - "is no longer supported.\n"); + error_report("warning: ignoring deprecated option"); break; case QEMU_OPTION_name: opts = qemu_opts_parse_noisily(qemu_find_opts("name"), @@ -3811,7 +3836,7 @@ int main(int argc, char **argv, char **envp) break; case QEMU_OPTION_prom_env: if (nb_prom_envs >= MAX_PROM_ENVS) { - fprintf(stderr, "Too many prom variables\n"); + error_report("too many prom variables"); exit(1); } prom_envs[nb_prom_envs] = optarg; @@ -3894,8 +3919,8 @@ int main(int argc, char **argv, char **envp) { int ret = qemu_read_config_file(optarg); if (ret < 0) { - fprintf(stderr, "read config %s: %s\n", optarg, - strerror(-ret)); + error_report("read config %s: %s", optarg, + strerror(-ret)); exit(1); } break; @@ -3903,7 +3928,7 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_spice: olist = qemu_find_opts("spice"); if (!olist) { - fprintf(stderr, "spice is not supported by this qemu build.\n"); + error_report("spice support is disabled"); exit(1); } opts = qemu_opts_parse_noisily(olist, optarg, false); @@ -3920,7 +3945,8 @@ int main(int argc, char **argv, char **envp) } else { fp = fopen(optarg, "w"); if (fp == NULL) { - fprintf(stderr, "open %s: %s\n", optarg, strerror(errno)); + error_report("open %s: %s", optarg, + strerror(errno)); exit(1); } } @@ -3981,13 +4007,13 @@ int main(int argc, char **argv, char **envp) break; case QEMU_OPTION_dump_vmstate: if (vmstate_dump_file) { - fprintf(stderr, "qemu: only one '-dump-vmstate' " - "option may be given\n"); + error_report("only one '-dump-vmstate' " + "option may be given"); exit(1); } vmstate_dump_file = fopen(optarg, "w"); if (vmstate_dump_file == NULL) { - fprintf(stderr, "open %s: %s\n", optarg, strerror(errno)); + error_report("open %s: %s", optarg, strerror(errno)); exit(1); } break; @@ -3997,6 +4023,8 @@ int main(int argc, char **argv, char **envp) } } + replay_configure(icount_opts); + opts = qemu_get_machine_opts(); optarg = qemu_opt_get(opts, "type"); if (optarg) { @@ -4004,8 +4032,8 @@ int main(int argc, char **argv, char **envp) } if (machine_class == NULL) { - fprintf(stderr, "No machine specified, and there is no default.\n" - "Use -machine help to list supported machines!\n"); + error_report("No machine specified, and there is no default"); + error_printf("Use -machine help to list supported machines\n"); exit(1); } @@ -4052,7 +4080,7 @@ int main(int argc, char **argv, char **envp) cpu_exec_init_all(); if (machine_class->hw_version) { - qemu_set_version(machine_class->hw_version); + qemu_set_hw_version(machine_class->hw_version); } /* Init CPU def lists, based on config @@ -4106,9 +4134,9 @@ int main(int argc, char **argv, char **envp) machine_class->max_cpus = machine_class->max_cpus ?: 1; /* Default to UP */ if (max_cpus > machine_class->max_cpus) { - fprintf(stderr, "Number of SMP CPUs requested (%d) exceeds max CPUs " - "supported by machine '%s' (%d)\n", max_cpus, - machine_class->name, machine_class->max_cpus); + error_report("Number of SMP CPUs requested (%d) exceeds max CPUs " + "supported by machine '%s' (%d)", max_cpus, + machine_class->name, machine_class->max_cpus); exit(1); } @@ -4169,12 +4197,12 @@ int main(int argc, char **argv, char **envp) if (display_type == DT_NOGRAPHIC && (default_parallel || default_serial || default_monitor || default_virtcon)) { - fprintf(stderr, "-nographic can not be used with -daemonize\n"); + error_report("-nographic cannot be used with -daemonize"); exit(1); } #ifdef CONFIG_CURSES if (display_type == DT_CURSES) { - fprintf(stderr, "curses display can not be used with -daemonize\n"); + error_report("curses display cannot be used with -daemonize"); exit(1); } #endif @@ -4233,12 +4261,12 @@ int main(int argc, char **argv, char **envp) } if ((no_frame || alt_grab || ctrl_grab) && display_type != DT_SDL) { - fprintf(stderr, "-no-frame, -alt-grab and -ctrl-grab are only valid " - "for SDL, ignoring option\n"); + error_report("-no-frame, -alt-grab and -ctrl-grab are only valid " + "for SDL, ignoring option"); } if (no_quit && (display_type != DT_GTK && display_type != DT_SDL)) { - fprintf(stderr, "-no-quit is only valid for GTK and SDL, " - "ignoring option\n"); + error_report("-no-quit is only valid for GTK and SDL, " + "ignoring option"); } #if defined(CONFIG_GTK) @@ -4253,13 +4281,14 @@ int main(int argc, char **argv, char **envp) #endif if (request_opengl == 1 && display_opengl == 0) { #if defined(CONFIG_OPENGL) - fprintf(stderr, "OpenGL is not supported by the display.\n"); + error_report("OpenGL is not supported by the display"); #else - fprintf(stderr, "QEMU was built without opengl support.\n"); + error_report("OpenGL support is disabled"); #endif exit(1); } + page_size_init(); socket_init(); if (qemu_opts_foreach(qemu_find_opts("object"), @@ -4281,7 +4310,7 @@ int main(int argc, char **argv, char **envp) #endif if (pid_file && qemu_create_pidfile(pid_file) != 0) { - fprintf(stderr, "Could not acquire pid file: %s\n", strerror(errno)); + error_report("could not acquire pid file: %s", strerror(errno)); exit(1); } @@ -4352,17 +4381,17 @@ int main(int argc, char **argv, char **envp) linux_boot = (kernel_filename != NULL); if (!linux_boot && *kernel_cmdline != '\0') { - fprintf(stderr, "-append only allowed with -kernel option\n"); + error_report("-append only allowed with -kernel option"); exit(1); } if (!linux_boot && initrd_filename != NULL) { - fprintf(stderr, "-initrd only allowed with -kernel option\n"); + error_report("-initrd only allowed with -kernel option"); exit(1); } if (!linux_boot && qemu_opt_get(machine_opts, "dtb")) { - fprintf(stderr, "-dtb only allowed with -kernel option\n"); + error_report("-dtb only allowed with -kernel option"); exit(1); } @@ -4381,7 +4410,7 @@ int main(int argc, char **argv, char **envp) cpu_ticks_init(); if (icount_opts) { if (kvm_enabled() || xen_enabled()) { - fprintf(stderr, "-icount is not allowed with kvm or xen\n"); + error_report("-icount is not allowed with kvm or xen"); exit(1); } configure_icount(icount_opts, &error_abort); @@ -4414,7 +4443,7 @@ int main(int argc, char **argv, char **envp) if (!xen_enabled()) { /* On 32-bit hosts, QEMU is limited by virtual address space */ if (ram_size > (2047 << 20) && HOST_LONG_BITS == 32) { - fprintf(stderr, "qemu: at most 2047 MB RAM can be simulated\n"); + error_report("at most 2047 MB RAM can be simulated"); exit(1); } } @@ -4430,9 +4459,10 @@ int main(int argc, char **argv, char **envp) } /* open the virtual block devices */ - if (snapshot) - qemu_opts_foreach(qemu_find_opts("drive"), - drive_enable_snapshot, NULL, NULL); + if (snapshot || replay_mode != REPLAY_MODE_NONE) { + qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot, + NULL, NULL); + } if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, &machine_class->block_default_type, NULL)) { exit(1); @@ -4487,6 +4517,10 @@ int main(int argc, char **argv, char **envp) } qemu_add_globals(); + /* This checkpoint is required by replay to separate prior clock + reading from the other reads, because timer polling functions query + clock values from the log. */ + replay_checkpoint(CHECKPOINT_INIT); qdev_machine_init(); current_machine->ram_size = ram_size; @@ -4579,7 +4613,7 @@ int main(int argc, char **argv, char **envp) vnc_init_func, NULL, NULL); if (show_vnc_port) { char *ret = vnc_display_local_addr("default"); - printf("VNC server running on `%s'\n", ret); + printf("VNC server running on '%s'\n", ret); g_free(ret); } #endif @@ -4601,10 +4635,16 @@ int main(int argc, char **argv, char **envp) qemu_run_machine_init_done_notifiers(); if (rom_check_and_register_reset() != 0) { - fprintf(stderr, "rom check and register reset failed\n"); + error_report("rom check and register reset failed"); exit(1); } + replay_start(); + + /* This checkpoint is required by replay to separate prior clock + reading from the other reads, because timer polling functions query + clock values from the log. */ + replay_checkpoint(CHECKPOINT_RESET); qemu_system_reset(VMRESET_SILENT); register_global_state(); if (loadvm) { @@ -4642,6 +4682,8 @@ int main(int argc, char **argv, char **envp) } main_loop(); + replay_disable_events(); + bdrv_close_all(); pause_all_vcpus(); res_free(); |