diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2015-11-19 16:26:08 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2015-11-19 16:26:08 +0000 |
commit | 348c32709fdbeb475dd072af49523cfdd75873f1 (patch) | |
tree | 7279674d55079522505ee2e91c1f6a180acfe92f | |
parent | c601a244a49f4e0be2539cbc5ffd288727cd4e89 (diff) | |
parent | 1c7ba94a184df1eddd589d5400d879568d3e5d08 (diff) | |
download | qemu-348c32709fdbeb475dd072af49523cfdd75873f1.zip |
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
vhost, pc: fixes for 2.5
Fixes all over the place.
This also re-enables a test we disabled in 2.5 cycle
now that there's a way not to get a warning from it.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# gpg: Signature made Thu 19 Nov 2015 13:27:43 GMT using RSA key ID D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>"
* remotes/mst/tags/for_upstream:
exec: silence hugetlbfs warning under qtest
tests: re-enable vhost-user-test
acpi: fix buffer overrun on migration
vhost-user: fix log size
vhost-user: ignore qemu-only features
specs/vhost-user: fix spec to match reality
tests/vhost-user-bridge: implement logging of dirty pages
i440fx: print an error message if user tries to enable iommu
q35: Check propery to determine if iommu is set
vhost-user: start/stop all rings
vhost-user: print original request on error
vhost-user-test: support VHOST_USER_SET_VRING_ENABLE
vhost-user: update spec description
vhost: don't send RESET_OWNER at stop
vhost: let SET_VRING_ENABLE message depends on protocol feature
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rwxr-xr-x | configure | 1 | ||||
-rw-r--r-- | docs/specs/vhost-user.txt | 70 | ||||
-rw-r--r-- | exec.c | 5 | ||||
-rw-r--r-- | hw/acpi/core.c | 8 | ||||
-rw-r--r-- | hw/core/machine.c | 5 | ||||
-rw-r--r-- | hw/net/vhost_net.c | 14 | ||||
-rw-r--r-- | hw/pci-host/piix.c | 5 | ||||
-rw-r--r-- | hw/pci-host/q35.c | 2 | ||||
-rw-r--r-- | hw/virtio/vhost-user.c | 25 | ||||
-rw-r--r-- | include/hw/boards.h | 1 | ||||
-rw-r--r-- | tests/Makefile | 5 | ||||
-rw-r--r-- | tests/vhost-user-bridge.c | 220 | ||||
-rw-r--r-- | tests/vhost-user-test.c | 7 | ||||
-rw-r--r-- | vl.c | 28 |
14 files changed, 311 insertions, 85 deletions
@@ -5681,6 +5681,7 @@ case "$target_name" in echo "CONFIG_KVM=y" >> $config_target_mak if test "$vhost_net" = "yes" ; then echo "CONFIG_VHOST_NET=y" >> $config_target_mak + echo "CONFIG_VHOST_NET_TEST_$target_name=y" >> $config_host_mak fi fi esac diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt index 26dde2ec42..7b9cd6d0dd 100644 --- a/docs/specs/vhost-user.txt +++ b/docs/specs/vhost-user.txt @@ -87,6 +87,14 @@ Depending on the request type, payload can be: User address: a 64-bit user address mmap offset: 64-bit offset where region starts in the mapped memory +* Log description + --------------------------- + | log size | log offset | + --------------------------- + log size: size of area used for logging + log offset: offset from start of supplied file descriptor + where logging starts (i.e. where guest address 0 would be logged) + In QEMU the vhost-user message is implemented with the following struct: typedef struct VhostUserMsg { @@ -138,6 +146,29 @@ As older slaves don't support negotiating protocol features, a feature bit was dedicated for this purpose: #define VHOST_USER_F_PROTOCOL_FEATURES 30 +Starting and stopping rings +---------------------- +Client must only process each ring when it is both started and enabled. + +If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated, the ring is initialized +in an enabled state. + +If VHOST_USER_F_PROTOCOL_FEATURES has been negotiated, the ring is initialized +in a disabled state. Client must not process it until ring is enabled by +VHOST_USER_SET_VRING_ENABLE with parameter 1, or after it has been disabled by +VHOST_USER_SET_VRING_ENABLE with parameter 0. + +Each ring is initialized in a stopped state, client must not process it until +ring is started, or after it has been stopped. + +Client must start ring upon receiving a kick (that is, detecting that file +descriptor is readable) on the descriptor specified by +VHOST_USER_SET_VRING_KICK, and stop ring upon receiving +VHOST_USER_GET_VRING_BASE. + +While processing the rings (when they are started and enabled), client must +support changing some configuration aspects on the fly. + Multiple queue support ---------------------- @@ -162,9 +193,13 @@ the slave makes to the memory mapped regions. The client should mark the dirty pages in a log. Once it complies to this logging, it may declare the VHOST_F_LOG_ALL vhost feature. +To start/stop logging of data/used ring writes, server may send messages +VHOST_USER_SET_FEATURES with VHOST_F_LOG_ALL and VHOST_USER_SET_VRING_ADDR with +VHOST_VRING_F_LOG in ring's flags set to 1/0, respectively. + All the modifications to memory pointed by vring "descriptor" should be marked. Modifications to "used" vring should be marked if -VHOST_VRING_F_LOG is part of ring's features. +VHOST_VRING_F_LOG is part of ring's flags. Dirty pages are of size: #define VHOST_LOG_PAGE 0x1000 @@ -173,22 +208,35 @@ The log memory fd is provided in the ancillary data of VHOST_USER_SET_LOG_BASE message when the slave has VHOST_USER_PROTOCOL_F_LOG_SHMFD protocol feature. -The size of the log may be computed by using all the known guest -addresses. The log covers from address 0 to the maximum of guest +The size of the log is supplied as part of VhostUserMsg +which should be large enough to cover all known guest +addresses. Log starts at the supplied offset in the +supplied file descriptor. +The log covers from address 0 to the maximum of guest regions. In pseudo-code, to mark page at "addr" as dirty: page = addr / VHOST_LOG_PAGE log[page / 8] |= 1 << page % 8 +Where addr is the guest physical address. + Use atomic operations, as the log may be concurrently manipulated. +Note that when logging modifications to the used ring (when VHOST_VRING_F_LOG +is set for this ring), log_guest_addr should be used to calculate the log +offset: the write to first byte of the used ring is logged at this offset from +log start. Also note that this value might be outside the legal guest physical +address range (i.e. does not have to be covered by the VhostUserMemory table), +but the bit offset of the last byte of the ring must fall within +the size supplied by VhostUserLog. + VHOST_USER_SET_LOG_FD is an optional message with an eventfd in ancillary data, it may be used to inform the master that the log has been modified. -Once the source has finished migration, VHOST_USER_RESET_OWNER message -will be sent by the source. No further update must be done before the -destination takes over with new regions & rings. +Once the source has finished migration, rings will be stopped by +the source. No further update must be done before rings are +restarted. Protocol features ----------------- @@ -259,11 +307,13 @@ Message types * VHOST_USER_RESET_OWNER Id: 4 - Equivalent ioctl: VHOST_RESET_OWNER Master payload: N/A - Issued when a new connection is about to be closed. The Master will no - longer own this connection (and will usually close it). + This is no longer used. Used to be sent to request stopping + all rings, but some clients interpreted it to also discard + connection state (this interpretation would lead to bugs). + It is recommended that clients either ignore this message, + or use it to stop all rings. * VHOST_USER_SET_MEM_TABLE @@ -388,6 +438,8 @@ Message types Master payload: vring state description Signal slave to enable or disable corresponding vring. + This request should be sent only when VHOST_USER_F_PROTOCOL_FEATURES + has been negotiated. * VHOST_USER_SEND_RARP @@ -51,6 +51,7 @@ #include "qemu/main-loop.h" #include "translate-all.h" #include "sysemu/replay.h" +#include "sysemu/qtest.h" #include "exec/memory-internal.h" #include "exec/ram_addr.h" @@ -1196,8 +1197,10 @@ static long gethugepagesize(const char *path, Error **errp) return 0; } - if (fs.f_type != HUGETLBFS_MAGIC) + if (!qtest_driver() && + fs.f_type != HUGETLBFS_MAGIC) { fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); + } return fs.f_bsize; } diff --git a/hw/acpi/core.c b/hw/acpi/core.c index fe6215af4a..21e113d713 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -625,8 +625,12 @@ void acpi_pm1_cnt_reset(ACPIREGS *ar) void acpi_gpe_init(ACPIREGS *ar, uint8_t len) { ar->gpe.len = len; - ar->gpe.sts = g_malloc0(len / 2); - ar->gpe.en = g_malloc0(len / 2); + /* Only first len / 2 bytes are ever used, + * but the caller in ich9.c migrates full len bytes. + * TODO: fix ich9.c and drop the extra allocation. + */ + ar->gpe.sts = g_malloc0(len); + ar->gpe.en = g_malloc0(len); } void acpi_gpe_reset(ACPIREGS *ar) diff --git a/hw/core/machine.c b/hw/core/machine.c index f4db340468..acca00db22 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -462,11 +462,6 @@ bool machine_usb(MachineState *machine) return machine->usb; } -bool machine_iommu(MachineState *machine) -{ - return machine->iommu; -} - bool machine_kernel_irqchip_allowed(MachineState *machine) { return machine->kernel_irqchip_allowed; diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index d91b7b155e..318c3e6ad2 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -77,14 +77,8 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MRG_RXBUF, - VIRTIO_NET_F_STATUS, - VIRTIO_NET_F_CTRL_VQ, - VIRTIO_NET_F_CTRL_RX, - VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_NET_F_CTRL_RX_EXTRA, - VIRTIO_NET_F_CTRL_MAC_ADDR, - VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, + /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, @@ -292,12 +286,6 @@ static void vhost_net_stop_one(struct vhost_net *net, int r = vhost_ops->vhost_net_set_backend(&net->dev, &file); assert(r >= 0); } - } else if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) { - for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { - const VhostOps *vhost_ops = net->dev.vhost_ops; - int r = vhost_ops->vhost_reset_device(&net->dev); - assert(r >= 0); - } } if (net->nc->info->poll) { net->nc->info->poll(net->nc, true); diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 7b2fbf9598..715208b22a 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -34,6 +34,7 @@ #include "sysemu/sysemu.h" #include "hw/i386/ioapic.h" #include "qapi/visitor.h" +#include "qemu/error-report.h" /* * I440FX chipset data sheet. @@ -301,6 +302,10 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) static void i440fx_realize(PCIDevice *dev, Error **errp) { dev->config[I440FX_SMRAM] = 0x02; + + if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) { + error_report("warning: i440fx doesn't support emulated iommu"); + } } PCIBus *i440fx_init(const char *host_type, const char *pci_type, diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index c81507d710..1fb470758b 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -506,7 +506,7 @@ static void mch_realize(PCIDevice *d, Error **errp) PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } /* Intel IOMMU (VT-d) */ - if (machine_iommu(current_machine)) { + if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) { mch_init_dmar(mch); } } diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index c44360219f..1b6c5ac238 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -121,8 +121,8 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) r = qemu_chr_fe_read_all(chr, p, size); if (r != size) { - error_report("Failed to read msg header. Read %d instead of %d.", r, - size); + error_report("Failed to read msg header. Read %d instead of %d." + " Original request %d.", r, size, msg->request); goto fail; } @@ -206,7 +206,7 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, VhostUserMsg msg = { .request = VHOST_USER_SET_LOG_BASE, .flags = VHOST_USER_VERSION, - .payload.log.mmap_size = log->size, + .payload.log.mmap_size = log->size * sizeof(*(log->log)), .payload.log.mmap_offset = 0, .size = sizeof(msg.payload.log), }; @@ -333,18 +333,23 @@ static int vhost_user_set_vring_base(struct vhost_dev *dev, static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) { - struct vhost_vring_state state = { - .index = dev->vq_index, - .num = enable, - }; + int i; - if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) { + if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { return -1; } - return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); -} + for (i = 0; i < dev->nvqs; ++i) { + struct vhost_vring_state state = { + .index = dev->vq_index + i, + .num = enable, + }; + + vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); + } + return 0; +} static int vhost_user_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) diff --git a/include/hw/boards.h b/include/hw/boards.h index 3e9a92c055..24eb6f0e77 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -33,7 +33,6 @@ MachineClass *find_default_machine(void); extern MachineState *current_machine; bool machine_usb(MachineState *machine); -bool machine_iommu(MachineState *machine); bool machine_kernel_irqchip_allowed(MachineState *machine); bool machine_kernel_irqchip_required(MachineState *machine); int machine_kvm_shadow_mem(MachineState *machine); diff --git a/tests/Makefile b/tests/Makefile index 90c4141ac5..b9379841d8 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -197,8 +197,9 @@ gcov-files-i386-y += hw/usb/hcd-xhci.c check-qtest-i386-y += tests/pc-cpu-test$(EXESUF) check-qtest-i386-y += tests/q35-test$(EXESUF) gcov-files-i386-y += hw/pci-host/q35.c -ifeq ($(CONFIG_VHOST_NET),y) -check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF) +check-qtest-i386-$(CONFIG_VHOST_NET_TEST_i386) += tests/vhost-user-test$(EXESUF) +ifeq ($(CONFIG_VHOST_NET_TEST_i386),) +check-qtest-x86_64-$(CONFIG_VHOST_NET_TEST_x86_64) += tests/vhost-user-test$(EXESUF) endif check-qtest-i386-y += tests/test-netfilter$(EXESUF) check-qtest-x86_64-y = $(check-qtest-i386-y) diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c index 864f69e738..7bdfc98615 100644 --- a/tests/vhost-user-bridge.c +++ b/tests/vhost-user-bridge.c @@ -13,16 +13,22 @@ /* * TODO: * - main should get parameters from the command line. - * - implement all request handlers. + * - implement all request handlers. Still not implemented: + * vubr_get_queue_num_exec() + * vubr_send_rarp_exec() * - test for broken requests and virtqueue. * - implement features defined by Virtio 1.0 spec. * - support mergeable buffers and indirect descriptors. - * - implement RESET_DEVICE request. * - implement clean shutdown. * - implement non-blocking writes to UDP backend. * - implement polling strategy. + * - implement clean starting/stopping of vq processing + * - implement clean starting/stopping of used and buffers + * dirty page logging. */ +#define _FILE_OFFSET_BITS 64 + #include <stddef.h> #include <assert.h> #include <stdio.h> @@ -166,6 +172,8 @@ typedef struct VubrVirtq { struct vring_desc *desc; struct vring_avail *avail; struct vring_used *used; + uint64_t log_guest_addr; + int enable; } VubrVirtq; /* Based on qemu/hw/virtio/vhost-user.c */ @@ -173,6 +181,8 @@ typedef struct VubrVirtq { #define VHOST_MEMORY_MAX_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 +#define VHOST_LOG_PAGE 4096 + enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_MQ = 0, VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, @@ -220,6 +230,11 @@ typedef struct VhostUserMemory { VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; } VhostUserMemory; +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + typedef struct VhostUserMsg { VhostUserRequest request; @@ -234,6 +249,7 @@ typedef struct VhostUserMsg { struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; + VhostUserLog log; } payload; int fds[VHOST_MEMORY_MAX_NREGIONS]; int fd_num; @@ -265,8 +281,13 @@ typedef struct VubrDev { uint32_t nregions; VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; VubrVirtq vq[MAX_NR_VIRTQUEUE]; + int log_call_fd; + uint64_t log_size; + uint8_t *log_table; int backend_udp_sock; struct sockaddr_in backend_udp_dest; + int ready; + uint64_t features; } VubrDev; static const char *vubr_request_str[] = { @@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg) rc = recvmsg(conn_fd, &msg, 0); - if (rc <= 0) { + if (rc == 0) { + vubr_die("recvmsg"); + fprintf(stderr, "Peer disconnected.\n"); + exit(1); + } + if (rc < 0) { vubr_die("recvmsg"); } @@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg) if (vmsg->size) { rc = read(conn_fd, &vmsg->payload, vmsg->size); - if (rc <= 0) { + if (rc == 0) { + vubr_die("recvmsg"); + fprintf(stderr, "Peer disconnected.\n"); + exit(1); + } + if (rc < 0) { vubr_die("recvmsg"); } @@ -455,6 +486,16 @@ vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len) vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen); } +/* Kick the log_call_fd if required. */ +static void +vubr_log_kick(VubrDev *dev) +{ + if (dev->log_call_fd != -1) { + DPRINT("Kicking the QEMU's log...\n"); + eventfd_write(dev->log_call_fd, 1); + } +} + /* Kick the guest if necessary. */ static void vubr_virtqueue_kick(VubrVirtq *vq) @@ -466,11 +507,39 @@ vubr_virtqueue_kick(VubrVirtq *vq) } static void +vubr_log_page(uint8_t *log_table, uint64_t page) +{ + DPRINT("Logged dirty guest page: %"PRId64"\n", page); + atomic_or(&log_table[page / 8], 1 << (page % 8)); +} + +static void +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length) +{ + uint64_t page; + + if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) || + !dev->log_table || !length) { + return; + } + + assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8)); + + page = address / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < address + length) { + vubr_log_page(dev->log_table, page); + page += VHOST_LOG_PAGE; + } + vubr_log_kick(dev); +} + +static void vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) { - struct vring_desc *desc = vq->desc; + struct vring_desc *desc = vq->desc; struct vring_avail *avail = vq->avail; - struct vring_used *used = vq->used; + struct vring_used *used = vq->used; + uint64_t log_guest_addr = vq->log_guest_addr; unsigned int size = vq->size; @@ -510,6 +579,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) if (len <= chunk_len) { memcpy(chunk_start, buf, len); + vubr_log_write(dev, desc[i].addr, len); } else { fprintf(stderr, "Received too long packet from the backend. Dropping...\n"); @@ -519,11 +589,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) /* Add descriptor to the used ring. */ used->ring[u_index].id = d_index; used->ring[u_index].len = len; + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, ring[u_index]), + sizeof(used->ring[u_index])); vq->last_avail_index++; vq->last_used_index++; atomic_mb_set(&used->idx, vq->last_used_index); + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, idx), + sizeof(used->idx)); /* Kick the guest if necessary. */ vubr_virtqueue_kick(vq); @@ -532,9 +608,10 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) static int vubr_process_desc(VubrDev *dev, VubrVirtq *vq) { - struct vring_desc *desc = vq->desc; + struct vring_desc *desc = vq->desc; struct vring_avail *avail = vq->avail; - struct vring_used *used = vq->used; + struct vring_used *used = vq->used; + uint64_t log_guest_addr = vq->log_guest_addr; unsigned int size = vq->size; @@ -552,6 +629,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq) void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr); uint32_t chunk_len = desc[i].len; + assert(!(desc[i].flags & VRING_DESC_F_WRITE)); + if (len + chunk_len < buf_size) { memcpy(buf + len, chunk_start, chunk_len); DPRINT("%d ", chunk_len); @@ -577,6 +656,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq) /* Add descriptor to the used ring. */ used->ring[u_index].id = d_index; used->ring[u_index].len = len; + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, ring[u_index]), + sizeof(used->ring[u_index])); vubr_consume_raw_packet(dev, buf, len); @@ -588,6 +670,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq) { struct vring_avail *avail = vq->avail; struct vring_used *used = vq->used; + uint64_t log_guest_addr = vq->log_guest_addr; while (vq->last_avail_index != atomic_mb_read(&avail->idx)) { vubr_process_desc(dev, vq); @@ -596,6 +679,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq) } atomic_mb_set(&used->idx, vq->last_used_index); + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, idx), + sizeof(used->idx)); } static void @@ -609,6 +695,10 @@ vubr_backend_recv_cb(int sock, void *ctx) int buflen = sizeof(buf); int len; + if (!dev->ready) { + return; + } + DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n"); uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx); @@ -656,14 +746,14 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg) { vmsg->payload.u64 = ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_NET_F_CTRL_VQ) | - (1ULL << VIRTIO_NET_F_CTRL_RX) | - (1ULL << VHOST_F_LOG_ALL)); + (1ULL << VHOST_F_LOG_ALL) | + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)); + vmsg->size = sizeof(vmsg->payload.u64); DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64); - /* reply */ + /* Reply */ return 1; } @@ -671,6 +761,7 @@ static int vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg) { DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); + dev->features = vmsg->payload.u64; return 0; } @@ -680,10 +771,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg) return 0; } +static void +vubr_close_log(VubrDev *dev) +{ + if (dev->log_table) { + if (munmap(dev->log_table, dev->log_size) != 0) { + vubr_die("munmap()"); + } + + dev->log_table = 0; + } + if (dev->log_call_fd != -1) { + close(dev->log_call_fd); + dev->log_call_fd = -1; + } +} + static int vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); + vubr_close_log(dev); + dev->ready = 0; + dev->features = 0; return 0; } @@ -710,9 +819,9 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg) DPRINT(" mmap_offset 0x%016"PRIx64"\n", msg_region->mmap_offset); - dev_region->gpa = msg_region->guest_phys_addr; - dev_region->size = msg_region->memory_size; - dev_region->qva = msg_region->userspace_addr; + dev_region->gpa = msg_region->guest_phys_addr; + dev_region->size = msg_region->memory_size; + dev_region->qva = msg_region->userspace_addr; dev_region->mmap_offset = msg_region->mmap_offset; /* We don't use offset argument of mmap() since the @@ -736,14 +845,38 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg) static int vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); - return 0; + int fd; + uint64_t log_mmap_size, log_mmap_offset; + void *rc; + + assert(vmsg->fd_num == 1); + fd = vmsg->fds[0]; + + assert(vmsg->size == sizeof(vmsg->payload.log)); + log_mmap_offset = vmsg->payload.log.mmap_offset; + log_mmap_size = vmsg->payload.log.mmap_size; + DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset); + DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size); + + rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, + log_mmap_offset); + if (rc == MAP_FAILED) { + vubr_die("mmap"); + } + dev->log_table = rc; + dev->log_size = log_mmap_size; + + vmsg->size = sizeof(vmsg->payload.u64); + /* Reply */ + return 1; } static int vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); + assert(vmsg->fd_num == 1); + dev->log_call_fd = vmsg->fds[0]; + DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]); return 0; } @@ -777,6 +910,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg) vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr); vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr); vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr); + vq->log_guest_addr = vra->log_guest_addr; DPRINT("Setting virtq addresses:\n"); DPRINT(" vring_desc at %p\n", vq->desc); @@ -803,8 +937,18 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) static int vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); - return 0; + unsigned int index = vmsg->payload.state.index; + + DPRINT("State.index: %d\n", index); + vmsg->payload.state.num = dev->vq[index].last_avail_index; + vmsg->size = sizeof(vmsg->payload.state); + /* FIXME: this is a work-around for a bug in QEMU enabling + * too early vrings. When protocol features are enabled, + * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */ + dev->ready = 0; + + /* Reply */ + return 1; } static int @@ -829,7 +973,17 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg) DPRINT("Waiting for kicks on fd: %d for vq: %d\n", dev->vq[index].kick_fd, index); } + /* We temporarily use this hack to determine that both TX and RX + * queues are set up and ready for processing. + * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and + * actual kicks. */ + if (dev->vq[0].kick_fd != -1 && + dev->vq[1].kick_fd != -1) { + dev->ready = 1; + DPRINT("vhost-user-bridge is ready for processing queues.\n"); + } return 0; + } static int @@ -858,9 +1012,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg) static int vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) { - /* FIXME: unimplented */ + vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD; DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); - return 0; + vmsg->size = sizeof(vmsg->payload.u64); + + /* Reply */ + return 1; } static int @@ -881,7 +1038,12 @@ vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg) static int vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); + unsigned int index = vmsg->payload.state.index; + unsigned int enable = vmsg->payload.state.num; + + DPRINT("State.index: %d\n", index); + DPRINT("State.enable: %d\n", enable); + dev->vq[index].enable = enable; return 0; } @@ -987,7 +1149,7 @@ vubr_accept_cb(int sock, void *ctx) socklen_t len = sizeof(un); conn_fd = accept(sock, (struct sockaddr *) &un, &len); - if (conn_fd == -1) { + if (conn_fd == -1) { vubr_die("accept()"); } DPRINT("Got connection from remote peer on sock %d\n", conn_fd); @@ -1009,9 +1171,17 @@ vubr_new(const char *path) .size = 0, .last_avail_index = 0, .last_used_index = 0, .desc = 0, .avail = 0, .used = 0, + .enable = 0, }; } + /* Init log */ + dev->log_call_fd = -1; + dev->log_size = 0; + dev->log_table = 0; + dev->ready = 0; + dev->features = 0; + /* Get a UNIX socket. */ dev->sock = socket(AF_UNIX, SOCK_STREAM, 0); if (dev->sock == -1) { diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 01cfc7e25d..022223b2a7 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -70,6 +70,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_ERR = 14, VHOST_USER_GET_PROTOCOL_FEATURES = 15, VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_SET_VRING_ENABLE = 18, VHOST_USER_MAX } VhostUserRequest; @@ -315,8 +316,10 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) g_cond_signal(&s->data_cond); break; - case VHOST_USER_RESET_OWNER: - s->fds_num = 0; + case VHOST_USER_SET_VRING_ENABLE: + if (!msg.payload.state.num) { + s->fds_num = 0; + } break; default: @@ -4288,14 +4288,23 @@ int main(int argc, char **argv, char **envp) page_size_init(); socket_init(); - if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + if (qemu_opts_foreach(qemu_find_opts("chardev"), + chardev_init_func, NULL, NULL)) { exit(1); } - if (qemu_opts_foreach(qemu_find_opts("chardev"), - chardev_init_func, NULL, NULL)) { + if (qtest_chrdev) { + Error *local_err = NULL; + qtest_init(qtest_chrdev, qtest_log, &local_err); + if (local_err) { + error_report_err(local_err); + exit(1); + } + } + + if (qemu_opts_foreach(qemu_find_opts("object"), + object_create, + object_create_initial, NULL)) { exit(1); } @@ -4325,15 +4334,6 @@ int main(int argc, char **argv, char **envp) configure_accelerator(current_machine); - if (qtest_chrdev) { - Error *local_err = NULL; - qtest_init(qtest_chrdev, qtest_log, &local_err); - if (local_err) { - error_report_err(local_err); - exit(1); - } - } - machine_opts = qemu_get_machine_opts(); kernel_filename = qemu_opt_get(machine_opts, "kernel"); initrd_filename = qemu_opt_get(machine_opts, "initrd"); |