summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2015-11-19 16:26:08 +0000
committerPeter Maydell <peter.maydell@linaro.org>2015-11-19 16:26:08 +0000
commit348c32709fdbeb475dd072af49523cfdd75873f1 (patch)
tree7279674d55079522505ee2e91c1f6a180acfe92f
parentc601a244a49f4e0be2539cbc5ffd288727cd4e89 (diff)
parent1c7ba94a184df1eddd589d5400d879568d3e5d08 (diff)
downloadqemu-348c32709fdbeb475dd072af49523cfdd75873f1.zip
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
vhost, pc: fixes for 2.5 Fixes all over the place. This also re-enables a test we disabled in 2.5 cycle now that there's a way not to get a warning from it. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Thu 19 Nov 2015 13:27:43 GMT using RSA key ID D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" * remotes/mst/tags/for_upstream: exec: silence hugetlbfs warning under qtest tests: re-enable vhost-user-test acpi: fix buffer overrun on migration vhost-user: fix log size vhost-user: ignore qemu-only features specs/vhost-user: fix spec to match reality tests/vhost-user-bridge: implement logging of dirty pages i440fx: print an error message if user tries to enable iommu q35: Check propery to determine if iommu is set vhost-user: start/stop all rings vhost-user: print original request on error vhost-user-test: support VHOST_USER_SET_VRING_ENABLE vhost-user: update spec description vhost: don't send RESET_OWNER at stop vhost: let SET_VRING_ENABLE message depends on protocol feature Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rwxr-xr-xconfigure1
-rw-r--r--docs/specs/vhost-user.txt70
-rw-r--r--exec.c5
-rw-r--r--hw/acpi/core.c8
-rw-r--r--hw/core/machine.c5
-rw-r--r--hw/net/vhost_net.c14
-rw-r--r--hw/pci-host/piix.c5
-rw-r--r--hw/pci-host/q35.c2
-rw-r--r--hw/virtio/vhost-user.c25
-rw-r--r--include/hw/boards.h1
-rw-r--r--tests/Makefile5
-rw-r--r--tests/vhost-user-bridge.c220
-rw-r--r--tests/vhost-user-test.c7
-rw-r--r--vl.c28
14 files changed, 311 insertions, 85 deletions
diff --git a/configure b/configure
index 0a4c78a743..71d6cbcfa7 100755
--- a/configure
+++ b/configure
@@ -5681,6 +5681,7 @@ case "$target_name" in
echo "CONFIG_KVM=y" >> $config_target_mak
if test "$vhost_net" = "yes" ; then
echo "CONFIG_VHOST_NET=y" >> $config_target_mak
+ echo "CONFIG_VHOST_NET_TEST_$target_name=y" >> $config_host_mak
fi
fi
esac
diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
index 26dde2ec42..7b9cd6d0dd 100644
--- a/docs/specs/vhost-user.txt
+++ b/docs/specs/vhost-user.txt
@@ -87,6 +87,14 @@ Depending on the request type, payload can be:
User address: a 64-bit user address
mmap offset: 64-bit offset where region starts in the mapped memory
+* Log description
+ ---------------------------
+ | log size | log offset |
+ ---------------------------
+ log size: size of area used for logging
+ log offset: offset from start of supplied file descriptor
+ where logging starts (i.e. where guest address 0 would be logged)
+
In QEMU the vhost-user message is implemented with the following struct:
typedef struct VhostUserMsg {
@@ -138,6 +146,29 @@ As older slaves don't support negotiating protocol features,
a feature bit was dedicated for this purpose:
#define VHOST_USER_F_PROTOCOL_FEATURES 30
+Starting and stopping rings
+----------------------
+Client must only process each ring when it is both started and enabled.
+
+If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated, the ring is initialized
+in an enabled state.
+
+If VHOST_USER_F_PROTOCOL_FEATURES has been negotiated, the ring is initialized
+in a disabled state. Client must not process it until ring is enabled by
+VHOST_USER_SET_VRING_ENABLE with parameter 1, or after it has been disabled by
+VHOST_USER_SET_VRING_ENABLE with parameter 0.
+
+Each ring is initialized in a stopped state, client must not process it until
+ring is started, or after it has been stopped.
+
+Client must start ring upon receiving a kick (that is, detecting that file
+descriptor is readable) on the descriptor specified by
+VHOST_USER_SET_VRING_KICK, and stop ring upon receiving
+VHOST_USER_GET_VRING_BASE.
+
+While processing the rings (when they are started and enabled), client must
+support changing some configuration aspects on the fly.
+
Multiple queue support
----------------------
@@ -162,9 +193,13 @@ the slave makes to the memory mapped regions. The client should mark
the dirty pages in a log. Once it complies to this logging, it may
declare the VHOST_F_LOG_ALL vhost feature.
+To start/stop logging of data/used ring writes, server may send messages
+VHOST_USER_SET_FEATURES with VHOST_F_LOG_ALL and VHOST_USER_SET_VRING_ADDR with
+VHOST_VRING_F_LOG in ring's flags set to 1/0, respectively.
+
All the modifications to memory pointed by vring "descriptor" should
be marked. Modifications to "used" vring should be marked if
-VHOST_VRING_F_LOG is part of ring's features.
+VHOST_VRING_F_LOG is part of ring's flags.
Dirty pages are of size:
#define VHOST_LOG_PAGE 0x1000
@@ -173,22 +208,35 @@ The log memory fd is provided in the ancillary data of
VHOST_USER_SET_LOG_BASE message when the slave has
VHOST_USER_PROTOCOL_F_LOG_SHMFD protocol feature.
-The size of the log may be computed by using all the known guest
-addresses. The log covers from address 0 to the maximum of guest
+The size of the log is supplied as part of VhostUserMsg
+which should be large enough to cover all known guest
+addresses. Log starts at the supplied offset in the
+supplied file descriptor.
+The log covers from address 0 to the maximum of guest
regions. In pseudo-code, to mark page at "addr" as dirty:
page = addr / VHOST_LOG_PAGE
log[page / 8] |= 1 << page % 8
+Where addr is the guest physical address.
+
Use atomic operations, as the log may be concurrently manipulated.
+Note that when logging modifications to the used ring (when VHOST_VRING_F_LOG
+is set for this ring), log_guest_addr should be used to calculate the log
+offset: the write to first byte of the used ring is logged at this offset from
+log start. Also note that this value might be outside the legal guest physical
+address range (i.e. does not have to be covered by the VhostUserMemory table),
+but the bit offset of the last byte of the ring must fall within
+the size supplied by VhostUserLog.
+
VHOST_USER_SET_LOG_FD is an optional message with an eventfd in
ancillary data, it may be used to inform the master that the log has
been modified.
-Once the source has finished migration, VHOST_USER_RESET_OWNER message
-will be sent by the source. No further update must be done before the
-destination takes over with new regions & rings.
+Once the source has finished migration, rings will be stopped by
+the source. No further update must be done before rings are
+restarted.
Protocol features
-----------------
@@ -259,11 +307,13 @@ Message types
* VHOST_USER_RESET_OWNER
Id: 4
- Equivalent ioctl: VHOST_RESET_OWNER
Master payload: N/A
- Issued when a new connection is about to be closed. The Master will no
- longer own this connection (and will usually close it).
+ This is no longer used. Used to be sent to request stopping
+ all rings, but some clients interpreted it to also discard
+ connection state (this interpretation would lead to bugs).
+ It is recommended that clients either ignore this message,
+ or use it to stop all rings.
* VHOST_USER_SET_MEM_TABLE
@@ -388,6 +438,8 @@ Message types
Master payload: vring state description
Signal slave to enable or disable corresponding vring.
+ This request should be sent only when VHOST_USER_F_PROTOCOL_FEATURES
+ has been negotiated.
* VHOST_USER_SEND_RARP
diff --git a/exec.c b/exec.c
index b09f18b2a4..acbd4a2cb5 100644
--- a/exec.c
+++ b/exec.c
@@ -51,6 +51,7 @@
#include "qemu/main-loop.h"
#include "translate-all.h"
#include "sysemu/replay.h"
+#include "sysemu/qtest.h"
#include "exec/memory-internal.h"
#include "exec/ram_addr.h"
@@ -1196,8 +1197,10 @@ static long gethugepagesize(const char *path, Error **errp)
return 0;
}
- if (fs.f_type != HUGETLBFS_MAGIC)
+ if (!qtest_driver() &&
+ fs.f_type != HUGETLBFS_MAGIC) {
fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+ }
return fs.f_bsize;
}
diff --git a/hw/acpi/core.c b/hw/acpi/core.c
index fe6215af4a..21e113d713 100644
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@@ -625,8 +625,12 @@ void acpi_pm1_cnt_reset(ACPIREGS *ar)
void acpi_gpe_init(ACPIREGS *ar, uint8_t len)
{
ar->gpe.len = len;
- ar->gpe.sts = g_malloc0(len / 2);
- ar->gpe.en = g_malloc0(len / 2);
+ /* Only first len / 2 bytes are ever used,
+ * but the caller in ich9.c migrates full len bytes.
+ * TODO: fix ich9.c and drop the extra allocation.
+ */
+ ar->gpe.sts = g_malloc0(len);
+ ar->gpe.en = g_malloc0(len);
}
void acpi_gpe_reset(ACPIREGS *ar)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index f4db340468..acca00db22 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -462,11 +462,6 @@ bool machine_usb(MachineState *machine)
return machine->usb;
}
-bool machine_iommu(MachineState *machine)
-{
- return machine->iommu;
-}
-
bool machine_kernel_irqchip_allowed(MachineState *machine)
{
return machine->kernel_irqchip_allowed;
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index d91b7b155e..318c3e6ad2 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -77,14 +77,8 @@ static const int user_feature_bits[] = {
VIRTIO_NET_F_HOST_ECN,
VIRTIO_NET_F_HOST_UFO,
VIRTIO_NET_F_MRG_RXBUF,
- VIRTIO_NET_F_STATUS,
- VIRTIO_NET_F_CTRL_VQ,
- VIRTIO_NET_F_CTRL_RX,
- VIRTIO_NET_F_CTRL_VLAN,
- VIRTIO_NET_F_CTRL_RX_EXTRA,
- VIRTIO_NET_F_CTRL_MAC_ADDR,
- VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
+ /* This bit implies RARP isn't sent by QEMU out of band */
VIRTIO_NET_F_GUEST_ANNOUNCE,
VIRTIO_NET_F_MQ,
@@ -292,12 +286,6 @@ static void vhost_net_stop_one(struct vhost_net *net,
int r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
assert(r >= 0);
}
- } else if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) {
- for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
- const VhostOps *vhost_ops = net->dev.vhost_ops;
- int r = vhost_ops->vhost_reset_device(&net->dev);
- assert(r >= 0);
- }
}
if (net->nc->info->poll) {
net->nc->info->poll(net->nc, true);
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index 7b2fbf9598..715208b22a 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -34,6 +34,7 @@
#include "sysemu/sysemu.h"
#include "hw/i386/ioapic.h"
#include "qapi/visitor.h"
+#include "qemu/error-report.h"
/*
* I440FX chipset data sheet.
@@ -301,6 +302,10 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp)
static void i440fx_realize(PCIDevice *dev, Error **errp)
{
dev->config[I440FX_SMRAM] = 0x02;
+
+ if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) {
+ error_report("warning: i440fx doesn't support emulated iommu");
+ }
}
PCIBus *i440fx_init(const char *host_type, const char *pci_type,
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index c81507d710..1fb470758b 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -506,7 +506,7 @@ static void mch_realize(PCIDevice *d, Error **errp)
PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
}
/* Intel IOMMU (VT-d) */
- if (machine_iommu(current_machine)) {
+ if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) {
mch_init_dmar(mch);
}
}
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index c44360219f..1b6c5ac238 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -121,8 +121,8 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
r = qemu_chr_fe_read_all(chr, p, size);
if (r != size) {
- error_report("Failed to read msg header. Read %d instead of %d.", r,
- size);
+ error_report("Failed to read msg header. Read %d instead of %d."
+ " Original request %d.", r, size, msg->request);
goto fail;
}
@@ -206,7 +206,7 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
VhostUserMsg msg = {
.request = VHOST_USER_SET_LOG_BASE,
.flags = VHOST_USER_VERSION,
- .payload.log.mmap_size = log->size,
+ .payload.log.mmap_size = log->size * sizeof(*(log->log)),
.payload.log.mmap_offset = 0,
.size = sizeof(msg.payload.log),
};
@@ -333,18 +333,23 @@ static int vhost_user_set_vring_base(struct vhost_dev *dev,
static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
{
- struct vhost_vring_state state = {
- .index = dev->vq_index,
- .num = enable,
- };
+ int i;
- if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) {
+ if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
return -1;
}
- return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
-}
+ for (i = 0; i < dev->nvqs; ++i) {
+ struct vhost_vring_state state = {
+ .index = dev->vq_index + i,
+ .num = enable,
+ };
+
+ vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
+ }
+ return 0;
+}
static int vhost_user_get_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 3e9a92c055..24eb6f0e77 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -33,7 +33,6 @@ MachineClass *find_default_machine(void);
extern MachineState *current_machine;
bool machine_usb(MachineState *machine);
-bool machine_iommu(MachineState *machine);
bool machine_kernel_irqchip_allowed(MachineState *machine);
bool machine_kernel_irqchip_required(MachineState *machine);
int machine_kvm_shadow_mem(MachineState *machine);
diff --git a/tests/Makefile b/tests/Makefile
index 90c4141ac5..b9379841d8 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -197,8 +197,9 @@ gcov-files-i386-y += hw/usb/hcd-xhci.c
check-qtest-i386-y += tests/pc-cpu-test$(EXESUF)
check-qtest-i386-y += tests/q35-test$(EXESUF)
gcov-files-i386-y += hw/pci-host/q35.c
-ifeq ($(CONFIG_VHOST_NET),y)
-check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF)
+check-qtest-i386-$(CONFIG_VHOST_NET_TEST_i386) += tests/vhost-user-test$(EXESUF)
+ifeq ($(CONFIG_VHOST_NET_TEST_i386),)
+check-qtest-x86_64-$(CONFIG_VHOST_NET_TEST_x86_64) += tests/vhost-user-test$(EXESUF)
endif
check-qtest-i386-y += tests/test-netfilter$(EXESUF)
check-qtest-x86_64-y = $(check-qtest-i386-y)
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 864f69e738..7bdfc98615 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -13,16 +13,22 @@
/*
* TODO:
* - main should get parameters from the command line.
- * - implement all request handlers.
+ * - implement all request handlers. Still not implemented:
+ * vubr_get_queue_num_exec()
+ * vubr_send_rarp_exec()
* - test for broken requests and virtqueue.
* - implement features defined by Virtio 1.0 spec.
* - support mergeable buffers and indirect descriptors.
- * - implement RESET_DEVICE request.
* - implement clean shutdown.
* - implement non-blocking writes to UDP backend.
* - implement polling strategy.
+ * - implement clean starting/stopping of vq processing
+ * - implement clean starting/stopping of used and buffers
+ * dirty page logging.
*/
+#define _FILE_OFFSET_BITS 64
+
#include <stddef.h>
#include <assert.h>
#include <stdio.h>
@@ -166,6 +172,8 @@ typedef struct VubrVirtq {
struct vring_desc *desc;
struct vring_avail *avail;
struct vring_used *used;
+ uint64_t log_guest_addr;
+ int enable;
} VubrVirtq;
/* Based on qemu/hw/virtio/vhost-user.c */
@@ -173,6 +181,8 @@ typedef struct VubrVirtq {
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_LOG_PAGE 4096
+
enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_MQ = 0,
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@@ -220,6 +230,11 @@ typedef struct VhostUserMemory {
VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;
+typedef struct VhostUserLog {
+ uint64_t mmap_size;
+ uint64_t mmap_offset;
+} VhostUserLog;
+
typedef struct VhostUserMsg {
VhostUserRequest request;
@@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
struct vhost_vring_state state;
struct vhost_vring_addr addr;
VhostUserMemory memory;
+ VhostUserLog log;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int fd_num;
@@ -265,8 +281,13 @@ typedef struct VubrDev {
uint32_t nregions;
VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
VubrVirtq vq[MAX_NR_VIRTQUEUE];
+ int log_call_fd;
+ uint64_t log_size;
+ uint8_t *log_table;
int backend_udp_sock;
struct sockaddr_in backend_udp_dest;
+ int ready;
+ uint64_t features;
} VubrDev;
static const char *vubr_request_str[] = {
@@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
rc = recvmsg(conn_fd, &msg, 0);
- if (rc <= 0) {
+ if (rc == 0) {
+ vubr_die("recvmsg");
+ fprintf(stderr, "Peer disconnected.\n");
+ exit(1);
+ }
+ if (rc < 0) {
vubr_die("recvmsg");
}
@@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
if (vmsg->size) {
rc = read(conn_fd, &vmsg->payload, vmsg->size);
- if (rc <= 0) {
+ if (rc == 0) {
+ vubr_die("recvmsg");
+ fprintf(stderr, "Peer disconnected.\n");
+ exit(1);
+ }
+ if (rc < 0) {
vubr_die("recvmsg");
}
@@ -455,6 +486,16 @@ vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
}
+/* Kick the log_call_fd if required. */
+static void
+vubr_log_kick(VubrDev *dev)
+{
+ if (dev->log_call_fd != -1) {
+ DPRINT("Kicking the QEMU's log...\n");
+ eventfd_write(dev->log_call_fd, 1);
+ }
+}
+
/* Kick the guest if necessary. */
static void
vubr_virtqueue_kick(VubrVirtq *vq)
@@ -466,11 +507,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
}
static void
+vubr_log_page(uint8_t *log_table, uint64_t page)
+{
+ DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+ atomic_or(&log_table[page / 8], 1 << (page % 8));
+}
+
+static void
+vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
+{
+ uint64_t page;
+
+ if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
+ !dev->log_table || !length) {
+ return;
+ }
+
+ assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
+
+ page = address / VHOST_LOG_PAGE;
+ while (page * VHOST_LOG_PAGE < address + length) {
+ vubr_log_page(dev->log_table, page);
+ page += VHOST_LOG_PAGE;
+ }
+ vubr_log_kick(dev);
+}
+
+static void
vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
{
- struct vring_desc *desc = vq->desc;
+ struct vring_desc *desc = vq->desc;
struct vring_avail *avail = vq->avail;
- struct vring_used *used = vq->used;
+ struct vring_used *used = vq->used;
+ uint64_t log_guest_addr = vq->log_guest_addr;
unsigned int size = vq->size;
@@ -510,6 +579,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
if (len <= chunk_len) {
memcpy(chunk_start, buf, len);
+ vubr_log_write(dev, desc[i].addr, len);
} else {
fprintf(stderr,
"Received too long packet from the backend. Dropping...\n");
@@ -519,11 +589,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
/* Add descriptor to the used ring. */
used->ring[u_index].id = d_index;
used->ring[u_index].len = len;
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+ sizeof(used->ring[u_index]));
vq->last_avail_index++;
vq->last_used_index++;
atomic_mb_set(&used->idx, vq->last_used_index);
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, idx),
+ sizeof(used->idx));
/* Kick the guest if necessary. */
vubr_virtqueue_kick(vq);
@@ -532,9 +608,10 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
static int
vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
{
- struct vring_desc *desc = vq->desc;
+ struct vring_desc *desc = vq->desc;
struct vring_avail *avail = vq->avail;
- struct vring_used *used = vq->used;
+ struct vring_used *used = vq->used;
+ uint64_t log_guest_addr = vq->log_guest_addr;
unsigned int size = vq->size;
@@ -552,6 +629,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
uint32_t chunk_len = desc[i].len;
+ assert(!(desc[i].flags & VRING_DESC_F_WRITE));
+
if (len + chunk_len < buf_size) {
memcpy(buf + len, chunk_start, chunk_len);
DPRINT("%d ", chunk_len);
@@ -577,6 +656,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
/* Add descriptor to the used ring. */
used->ring[u_index].id = d_index;
used->ring[u_index].len = len;
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+ sizeof(used->ring[u_index]));
vubr_consume_raw_packet(dev, buf, len);
@@ -588,6 +670,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
{
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
+ uint64_t log_guest_addr = vq->log_guest_addr;
while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
vubr_process_desc(dev, vq);
@@ -596,6 +679,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
}
atomic_mb_set(&used->idx, vq->last_used_index);
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, idx),
+ sizeof(used->idx));
}
static void
@@ -609,6 +695,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
int buflen = sizeof(buf);
int len;
+ if (!dev->ready) {
+ return;
+ }
+
DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
@@ -656,14 +746,14 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
vmsg->payload.u64 =
((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VIRTIO_NET_F_CTRL_VQ) |
- (1ULL << VIRTIO_NET_F_CTRL_RX) |
- (1ULL << VHOST_F_LOG_ALL));
+ (1ULL << VHOST_F_LOG_ALL) |
+ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
+
vmsg->size = sizeof(vmsg->payload.u64);
DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- /* reply */
+ /* Reply */
return 1;
}
@@ -671,6 +761,7 @@ static int
vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+ dev->features = vmsg->payload.u64;
return 0;
}
@@ -680,10 +771,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
return 0;
}
+static void
+vubr_close_log(VubrDev *dev)
+{
+ if (dev->log_table) {
+ if (munmap(dev->log_table, dev->log_size) != 0) {
+ vubr_die("munmap()");
+ }
+
+ dev->log_table = 0;
+ }
+ if (dev->log_call_fd != -1) {
+ close(dev->log_call_fd);
+ dev->log_call_fd = -1;
+ }
+}
+
static int
vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- DPRINT("Function %s() not implemented yet.\n", __func__);
+ vubr_close_log(dev);
+ dev->ready = 0;
+ dev->features = 0;
return 0;
}
@@ -710,9 +819,9 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
DPRINT(" mmap_offset 0x%016"PRIx64"\n",
msg_region->mmap_offset);
- dev_region->gpa = msg_region->guest_phys_addr;
- dev_region->size = msg_region->memory_size;
- dev_region->qva = msg_region->userspace_addr;
+ dev_region->gpa = msg_region->guest_phys_addr;
+ dev_region->size = msg_region->memory_size;
+ dev_region->qva = msg_region->userspace_addr;
dev_region->mmap_offset = msg_region->mmap_offset;
/* We don't use offset argument of mmap() since the
@@ -736,14 +845,38 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
+ int fd;
+ uint64_t log_mmap_size, log_mmap_offset;
+ void *rc;
+
+ assert(vmsg->fd_num == 1);
+ fd = vmsg->fds[0];
+
+ assert(vmsg->size == sizeof(vmsg->payload.log));
+ log_mmap_offset = vmsg->payload.log.mmap_offset;
+ log_mmap_size = vmsg->payload.log.mmap_size;
+ DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
+ DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
+
+ rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+ log_mmap_offset);
+ if (rc == MAP_FAILED) {
+ vubr_die("mmap");
+ }
+ dev->log_table = rc;
+ dev->log_size = log_mmap_size;
+
+ vmsg->size = sizeof(vmsg->payload.u64);
+ /* Reply */
+ return 1;
}
static int
vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- DPRINT("Function %s() not implemented yet.\n", __func__);
+ assert(vmsg->fd_num == 1);
+ dev->log_call_fd = vmsg->fds[0];
+ DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
return 0;
}
@@ -777,6 +910,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
+ vq->log_guest_addr = vra->log_guest_addr;
DPRINT("Setting virtq addresses:\n");
DPRINT(" vring_desc at %p\n", vq->desc);
@@ -803,8 +937,18 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
+ unsigned int index = vmsg->payload.state.index;
+
+ DPRINT("State.index: %d\n", index);
+ vmsg->payload.state.num = dev->vq[index].last_avail_index;
+ vmsg->size = sizeof(vmsg->payload.state);
+ /* FIXME: this is a work-around for a bug in QEMU enabling
+ * too early vrings. When protocol features are enabled,
+ * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
+ dev->ready = 0;
+
+ /* Reply */
+ return 1;
}
static int
@@ -829,7 +973,17 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
dev->vq[index].kick_fd, index);
}
+ /* We temporarily use this hack to determine that both TX and RX
+ * queues are set up and ready for processing.
+ * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
+ * actual kicks. */
+ if (dev->vq[0].kick_fd != -1 &&
+ dev->vq[1].kick_fd != -1) {
+ dev->ready = 1;
+ DPRINT("vhost-user-bridge is ready for processing queues.\n");
+ }
return 0;
+
}
static int
@@ -858,9 +1012,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- /* FIXME: unimplented */
+ vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- return 0;
+ vmsg->size = sizeof(vmsg->payload.u64);
+
+ /* Reply */
+ return 1;
}
static int
@@ -881,7 +1038,12 @@ vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- DPRINT("Function %s() not implemented yet.\n", __func__);
+ unsigned int index = vmsg->payload.state.index;
+ unsigned int enable = vmsg->payload.state.num;
+
+ DPRINT("State.index: %d\n", index);
+ DPRINT("State.enable: %d\n", enable);
+ dev->vq[index].enable = enable;
return 0;
}
@@ -987,7 +1149,7 @@ vubr_accept_cb(int sock, void *ctx)
socklen_t len = sizeof(un);
conn_fd = accept(sock, (struct sockaddr *) &un, &len);
- if (conn_fd == -1) {
+ if (conn_fd == -1) {
vubr_die("accept()");
}
DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
@@ -1009,9 +1171,17 @@ vubr_new(const char *path)
.size = 0,
.last_avail_index = 0, .last_used_index = 0,
.desc = 0, .avail = 0, .used = 0,
+ .enable = 0,
};
}
+ /* Init log */
+ dev->log_call_fd = -1;
+ dev->log_size = 0;
+ dev->log_table = 0;
+ dev->ready = 0;
+ dev->features = 0;
+
/* Get a UNIX socket. */
dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
if (dev->sock == -1) {
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 01cfc7e25d..022223b2a7 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -70,6 +70,7 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_VRING_ERR = 14,
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_MAX
} VhostUserRequest;
@@ -315,8 +316,10 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
g_cond_signal(&s->data_cond);
break;
- case VHOST_USER_RESET_OWNER:
- s->fds_num = 0;
+ case VHOST_USER_SET_VRING_ENABLE:
+ if (!msg.payload.state.num) {
+ s->fds_num = 0;
+ }
break;
default:
diff --git a/vl.c b/vl.c
index 7d993a5243..f9c661a4d0 100644
--- a/vl.c
+++ b/vl.c
@@ -4288,14 +4288,23 @@ int main(int argc, char **argv, char **envp)
page_size_init();
socket_init();
- if (qemu_opts_foreach(qemu_find_opts("object"),
- object_create,
- object_create_initial, NULL)) {
+ if (qemu_opts_foreach(qemu_find_opts("chardev"),
+ chardev_init_func, NULL, NULL)) {
exit(1);
}
- if (qemu_opts_foreach(qemu_find_opts("chardev"),
- chardev_init_func, NULL, NULL)) {
+ if (qtest_chrdev) {
+ Error *local_err = NULL;
+ qtest_init(qtest_chrdev, qtest_log, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ exit(1);
+ }
+ }
+
+ if (qemu_opts_foreach(qemu_find_opts("object"),
+ object_create,
+ object_create_initial, NULL)) {
exit(1);
}
@@ -4325,15 +4334,6 @@ int main(int argc, char **argv, char **envp)
configure_accelerator(current_machine);
- if (qtest_chrdev) {
- Error *local_err = NULL;
- qtest_init(qtest_chrdev, qtest_log, &local_err);
- if (local_err) {
- error_report_err(local_err);
- exit(1);
- }
- }
-
machine_opts = qemu_get_machine_opts();
kernel_filename = qemu_opt_get(machine_opts, "kernel");
initrd_filename = qemu_opt_get(machine_opts, "initrd");