summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2017-03-02 17:39:12 +0000
committerPeter Maydell <peter.maydell@linaro.org>2017-03-02 17:39:12 +0000
commit251501a3714096f807778f6d3f03711dcdb9ce29 (patch)
treefd80eb80a4ea2eb24f07411aa8eb8b294aec4213
parentc9fc677a35e6e00cdf00c7d085cbd74e0b90b2e6 (diff)
parent665414ad06aa1bc92e615db9641e58fb13d07de1 (diff)
downloadqemu-251501a3714096f807778f6d3f03711dcdb9ce29.zip
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20170228a' into staging
Migration pull Note: The 'postcopy: Update userfaultfd.h header' is part of Paolo's header update and will disappear if applied after it. Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> # gpg: Signature made Tue 28 Feb 2017 12:38:34 GMT # gpg: using RSA key 0x0516331EBC5BFDE7 # gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7 * remotes/dgilbert/tags/pull-migration-20170228a: (27 commits) postcopy: Add extra check for COPY function postcopy: Add doc about hugepages and postcopy postcopy: Check for userfault+hugepage feature postcopy: Update userfaultfd.h header postcopy: Allow hugepages postcopy: Send whole huge pages postcopy: Mask fault addresses to huge page boundary postcopy: Load huge pages in one go postcopy: Use temporary for placing zero huge pages postcopy: Plumb pagesize down into place helpers postcopy: Record largest page size postcopy: enhance ram_block_discard_range for hugepages exec: ram_block_discard_range postcopy: Chunk discards for hugepages postcopy: Transmit and compare individual page sizes postcopy: Transmit ram size summary word migration: fix use-after-free of to_dst_file migration: Update docs to discourage version bumps migration: fix id leak regression migrate: Introduce a 'dc->vmsd' check to avoid segfault for --only-migratable ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--docs/migration.txt71
-rw-r--r--exec.c83
-rw-r--r--hw/core/qdev.c7
-rw-r--r--hw/usb/bus.c19
-rw-r--r--include/exec/cpu-common.h2
-rw-r--r--include/migration/migration.h6
-rw-r--r--include/migration/postcopy-ram.h13
-rw-r--r--include/migration/vmstate.h4
-rw-r--r--migration/migration.c36
-rw-r--r--migration/postcopy-ram.c142
-rw-r--r--migration/ram.c109
-rw-r--r--migration/savevm.c38
-rw-r--r--migration/trace-events2
-rw-r--r--migration/vmstate.c97
-rw-r--r--qdev-monitor.c9
-rwxr-xr-xscripts/vmstate-static-checker.py5
-rw-r--r--stubs/vmstate.c6
-rw-r--r--tests/test-vmstate.c122
18 files changed, 550 insertions, 221 deletions
diff --git a/docs/migration.txt b/docs/migration.txt
index 6503c17685..1b940a829b 100644
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -161,6 +161,11 @@ include/hw/hw.h.
=== More about versions ===
+Version numbers are intended for major incompatible changes to the
+migration of a device, and using them breaks backwards-migration
+compatibility; in general most changes can be made by adding Subsections
+(see below) or _TEST macros (see below) which won't break compatibility.
+
You can see that there are several version fields:
- version_id: the maximum version_id supported by VMState for that device.
@@ -175,6 +180,9 @@ version_id. And the function load_state_old() (if present) is able to
load state from minimum_version_id_old to minimum_version_id. This
function is deprecated and will be removed when no more users are left.
+Saving state will always create a section with the 'version_id' value
+and thus can't be loaded by any older QEMU.
+
=== Massaging functions ===
Sometimes, it is not enough to be able to save the state directly
@@ -292,6 +300,56 @@ save/send this state when we are in the middle of a pio operation
not enabled, the values on that fields are garbage and don't need to
be sent.
+Using a condition function that checks a 'property' to determine whether
+to send a subsection allows backwards migration compatibility when
+new subsections are added.
+
+For example;
+ a) Add a new property using DEFINE_PROP_BOOL - e.g. support-foo and
+ default it to true.
+ b) Add an entry to the HW_COMPAT_ for the previous version
+ that sets the property to false.
+ c) Add a static bool support_foo function that tests the property.
+ d) Add a subsection with a .needed set to the support_foo function
+ e) (potentially) Add a pre_load that sets up a default value for 'foo'
+ to be used if the subsection isn't loaded.
+
+Now that subsection will not be generated when using an older
+machine type and the migration stream will be accepted by older
+QEMU versions. pre-load functions can be used to initialise state
+on the newer version so that they default to suitable values
+when loading streams created by older QEMU versions that do not
+generate the subsection.
+
+In some cases subsections are added for data that had been accidentally
+omitted by earlier versions; if the missing data causes the migration
+process to succeed but the guest to behave badly then it may be better
+to send the subsection and cause the migration to explicitly fail
+with the unknown subsection error. If the bad behaviour only happens
+with certain data values, making the subsection conditional on
+the data value (rather than the machine type) allows migrations to succeed
+in most cases. In general the preference is to tie the subsection to
+the machine type, and allow reliable migrations, unless the behaviour
+from omission of the subsection is really bad.
+
+= Not sending existing elements =
+
+Sometimes members of the VMState are no longer needed;
+ removing them will break migration compatibility
+ making them version dependent and bumping the version will break backwards
+ migration compatibility.
+
+The best way is to:
+ a) Add a new property/compatibility/function in the same way for subsections
+ above.
+ b) replace the VMSTATE macro with the _TEST version of the macro, e.g.:
+ VMSTATE_UINT32(foo, barstruct)
+ becomes
+ VMSTATE_UINT32_TEST(foo, barstruct, pre_version_baz)
+
+ Sometime in the future when we no longer care about the ancient
+versions these can be killed off.
+
= Return path =
In most migration scenarios there is only a single data path that runs
@@ -482,3 +540,16 @@ request for a page that has already been sent is ignored. Duplicate requests
such as this can happen as a page is sent at about the same time the
destination accesses it.
+=== Postcopy with hugepages ===
+
+Postcopy now works with hugetlbfs backed memory:
+ a) The linux kernel on the destination must support userfault on hugepages.
+ b) The huge-page configuration on the source and destination VMs must be
+ identical; i.e. RAMBlocks on both sides must use the same page size.
+ c) Note that -mem-path /dev/hugepages will fall back to allocating normal
+ RAM if it doesn't have enough hugepages, triggering (b) to fail.
+ Using -mem-prealloc enforces the allocation using hugepages.
+ d) Care should be taken with the size of hugepage used; postcopy with 2MB
+ hugepages works well, however 1GB hugepages are likely to be problematic
+ since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link,
+ and until the full page is transferred the destination thread is blocked.
diff --git a/exec.c b/exec.c
index 3adf2b1861..785d20f648 100644
--- a/exec.c
+++ b/exec.c
@@ -45,6 +45,12 @@
#include "exec/address-spaces.h"
#include "sysemu/xen-mapcache.h"
#include "trace-root.h"
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+
#endif
#include "exec/cpu-all.h"
#include "qemu/rcu_queue.h"
@@ -1518,6 +1524,19 @@ size_t qemu_ram_pagesize(RAMBlock *rb)
return rb->page_size;
}
+/* Returns the largest size of page in use */
+size_t qemu_ram_pagesize_largest(void)
+{
+ RAMBlock *block;
+ size_t largest = 0;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ largest = MAX(largest, qemu_ram_pagesize(block));
+ }
+
+ return largest;
+}
+
static int memory_try_enable_merging(void *addr, size_t len)
{
if (!machine_mem_merge(current_machine)) {
@@ -3294,4 +3313,68 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
rcu_read_unlock();
return ret;
}
+
+/*
+ * Unmap pages of memory from start to start+length such that
+ * they a) read as 0, b) Trigger whatever fault mechanism
+ * the OS provides for postcopy.
+ * The pages must be unmapped by the end of the function.
+ * Returns: 0 on success, none-0 on failure
+ *
+ */
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
+{
+ int ret = -1;
+
+ uint8_t *host_startaddr = rb->host + start;
+
+ if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
+ error_report("ram_block_discard_range: Unaligned start address: %p",
+ host_startaddr);
+ goto err;
+ }
+
+ if ((start + length) <= rb->used_length) {
+ uint8_t *host_endaddr = host_startaddr + length;
+ if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
+ error_report("ram_block_discard_range: Unaligned end address: %p",
+ host_endaddr);
+ goto err;
+ }
+
+ errno = ENOTSUP; /* If we are missing MADVISE etc */
+
+ if (rb->page_size == qemu_host_page_size) {
+#if defined(CONFIG_MADVISE)
+ /* Note: We need the madvise MADV_DONTNEED behaviour of definitely
+ * freeing the page.
+ */
+ ret = madvise(host_startaddr, length, MADV_DONTNEED);
+#endif
+ } else {
+ /* Huge page case - unfortunately it can't do DONTNEED, but
+ * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
+ * huge page file.
+ */
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ start, length);
+#endif
+ }
+ if (ret) {
+ ret = -errno;
+ error_report("ram_block_discard_range: Failed to discard range "
+ "%s:%" PRIx64 " +%zx (%d)",
+ rb->idstr, start, length, ret);
+ }
+ } else {
+ error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
+ "/%zx/" RAM_ADDR_FMT")",
+ rb->idstr, start, length, rb->used_length);
+ }
+
+err:
+ return ret;
+}
+
#endif
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 923e626333..1e7fb33246 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -37,6 +37,7 @@
#include "hw/boards.h"
#include "hw/sysbus.h"
#include "qapi-event.h"
+#include "migration/migration.h"
int qdev_hotplug = 0;
static bool qdev_hot_added = false;
@@ -903,6 +904,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
Error *local_err = NULL;
bool unattached_parent = false;
static int unattached_count;
+ int ret;
if (dev->hotplugged && !dc->hotpluggable) {
error_setg(errp, QERR_DEVICE_NO_HOTPLUG, object_get_typename(obj));
@@ -910,6 +912,11 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
}
if (value && !dev->realized) {
+ ret = check_migratable(obj, &local_err);
+ if (ret < 0) {
+ goto fail;
+ }
+
if (!obj->parent) {
gchar *name = g_strdup_printf("device[%d]", unattached_count++);
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index efe4b8e1a6..24f1608b4b 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -8,7 +8,6 @@
#include "monitor/monitor.h"
#include "trace.h"
#include "qemu/cutils.h"
-#include "migration/migration.h"
static void usb_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent);
@@ -688,8 +687,6 @@ USBDevice *usbdevice_create(const char *cmdline)
const char *params;
int len;
USBDevice *dev;
- ObjectClass *klass;
- DeviceClass *dc;
params = strchr(cmdline,':');
if (params) {
@@ -724,22 +721,6 @@ USBDevice *usbdevice_create(const char *cmdline)
return NULL;
}
- klass = object_class_by_name(f->name);
- if (klass == NULL) {
- error_report("Device '%s' not found", f->name);
- return NULL;
- }
-
- dc = DEVICE_CLASS(klass);
-
- if (only_migratable) {
- if (dc->vmsd->unmigratable) {
- error_report("Device %s is not migratable, but --only-migratable "
- "was specified", f->name);
- return NULL;
- }
- }
-
if (f->usbdevice_init) {
dev = f->usbdevice_init(bus, params);
} else {
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index bd15853e51..8c305aa4fa 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -64,6 +64,7 @@ void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
void qemu_ram_unset_idstr(RAMBlock *block);
const char *qemu_ram_get_idstr(RAMBlock *rb);
size_t qemu_ram_pagesize(RAMBlock *block);
+size_t qemu_ram_pagesize_largest(void);
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
int len, int is_write);
@@ -105,6 +106,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
ram_addr_t offset, ram_addr_t length, void *opaque);
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
#endif
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 1735d66512..5720c884f4 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -22,6 +22,7 @@
#include "qapi-types.h"
#include "exec/cpu-common.h"
#include "qemu/coroutine_int.h"
+#include "qom/object.h"
#define QEMU_VM_FILE_MAGIC 0x5145564d
#define QEMU_VM_FILE_VERSION_COMPAT 0x00000002
@@ -92,6 +93,7 @@ struct MigrationIncomingState {
*/
QemuEvent main_thread_load_event;
+ size_t largest_page_size;
bool have_fault_thread;
QemuThread fault_thread;
QemuSemaphore fault_thread_sem;
@@ -107,6 +109,7 @@ struct MigrationIncomingState {
QEMUFile *to_src_file;
QemuMutex rp_mutex; /* We send replies from multiple threads */
void *postcopy_tmp_page;
+ void *postcopy_tmp_zero_page;
QEMUBH *bh;
@@ -313,6 +316,8 @@ int migrate_add_blocker(Error *reason, Error **errp);
*/
void migrate_del_blocker(Error *reason);
+int check_migratable(Object *obj, Error **err);
+
bool migrate_release_ram(void);
bool migrate_postcopy_ram(void);
bool migrate_zero_blocks(void);
@@ -375,6 +380,7 @@ void global_state_store_running(void);
void flush_page_queue(MigrationState *ms);
int ram_save_queue_pages(MigrationState *ms, const char *rbname,
ram_addr_t start, ram_addr_t len);
+uint64_t ram_pagesize_summary(void);
PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index b6a7491f2d..8e036b95a2 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -35,13 +35,6 @@ int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages);
int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis);
/*
- * Discard the contents of 'length' bytes from 'start'
- * We can assume that if we've been called postcopy_ram_hosttest returned true
- */
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
- size_t length);
-
-/*
* Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
* however leaving it until after precopy means that most of the precopy
* data is still THPd
@@ -81,13 +74,15 @@ void postcopy_discard_send_finish(MigrationState *ms,
* to use other postcopy_ routines to allocate.
* returns 0 on success
*/
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from);
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+ size_t pagesize);
/*
* Place a zero page at (host) atomically
* returns 0 on success
*/
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host);
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+ size_t pagesize);
/*
* Allocate a page of memory that can be mapped at a later point in time
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 63e7b02e05..f2dbf8410a 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -253,6 +253,10 @@ extern const VMStateInfo vmstate_info_uint16;
extern const VMStateInfo vmstate_info_uint32;
extern const VMStateInfo vmstate_info_uint64;
+/** Put this in the stream when migrating a null pointer.*/
+#define VMS_NULLPTR_MARKER (0x30U) /* '0' */
+extern const VMStateInfo vmstate_info_nullptr;
+
extern const VMStateInfo vmstate_info_float64;
extern const VMStateInfo vmstate_info_cpudouble;
diff --git a/migration/migration.c b/migration/migration.c
index c6ae69d371..3dab6845b1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -49,6 +49,10 @@
* for sending the last part */
#define DEFAULT_MIGRATE_SET_DOWNTIME 300
+/* Maximum migrate downtime set to 2000 seconds */
+#define MAX_MIGRATE_DOWNTIME_SECONDS 2000
+#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
+
/* Default compression thread count */
#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
/* Default decompression thread count, usually decompression is at
@@ -383,6 +387,7 @@ static void process_incoming_migration_co(void *opaque)
int ret;
mis->from_src_file = f;
+ mis->largest_page_size = qemu_ram_pagesize_largest();
postcopy_state_set(POSTCOPY_INCOMING_NONE);
migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
MIGRATION_STATUS_ACTIVE);
@@ -843,10 +848,11 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
return;
}
if (params->has_downtime_limit &&
- (params->downtime_limit < 0 || params->downtime_limit > 2000000)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "downtime_limit",
- "an integer in the range of 0 to 2000000 milliseconds");
+ (params->downtime_limit < 0 ||
+ params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
+ error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
+ "the range of 0 to %d milliseconds",
+ MAX_MIGRATE_DOWNTIME);
return;
}
if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
@@ -1145,6 +1151,21 @@ void migrate_del_blocker(Error *reason)
migration_blockers = g_slist_remove(migration_blockers, reason);
}
+int check_migratable(Object *obj, Error **err)
+{
+ DeviceClass *dc = DEVICE_GET_CLASS(obj);
+ if (only_migratable && dc->vmsd) {
+ if (dc->vmsd->unmigratable) {
+ error_setg(err, "Device %s is not migratable, but "
+ "--only-migratable was specified",
+ object_get_typename(obj));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
void qmp_migrate_incoming(const char *uri, Error **errp)
{
Error *local_err = NULL;
@@ -1289,6 +1310,13 @@ void qmp_migrate_set_speed(int64_t value, Error **errp)
void qmp_migrate_set_downtime(double value, Error **errp)
{
+ if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
+ error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
+ "the range of 0 to %d seconds",
+ MAX_MIGRATE_DOWNTIME_SECONDS);
+ return;
+ }
+
value *= 1000; /* Convert to milliseconds */
value = MAX(0, MIN(INT64_MAX, value));
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index a40dddbaf6..effbeb64fb 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -81,25 +81,18 @@ static bool ufd_version_check(int ufd)
return false;
}
- return true;
-}
-
-/*
- * Check for things that postcopy won't support; returns 0 if the block
- * is fine.
- */
-static int check_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
-{
- RAMBlock *rb = qemu_ram_block_by_name(block_name);
-
- if (qemu_ram_pagesize(rb) > getpagesize()) {
- error_report("Postcopy doesn't support large page sizes yet (%s)",
- block_name);
- return -E2BIG;
+ if (getpagesize() != ram_pagesize_summary()) {
+ bool have_hp = false;
+ /* We've got a huge page */
+#ifdef UFFD_FEATURE_MISSING_HUGETLBFS
+ have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
+#endif
+ if (!have_hp) {
+ error_report("Userfault on this host does not support huge pages");
+ return false;
+ }
}
-
- return 0;
+ return true;
}
/*
@@ -122,12 +115,6 @@ bool postcopy_ram_supported_by_host(void)
goto out;
}
- /* Check for anything about the RAMBlocks we don't support */
- if (qemu_ram_foreach_block(check_range, NULL)) {
- /* check_range will have printed its own error */
- goto out;
- }
-
ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
if (ufd == -1) {
error_report("%s: userfaultfd not available: %s", __func__,
@@ -200,27 +187,6 @@ out:
return ret;
}
-/**
- * postcopy_ram_discard_range: Discard a range of memory.
- * We can assume that if we've been called postcopy_ram_hosttest returned true.
- *
- * @mis: Current incoming migration state.
- * @start, @length: range of memory to discard.
- *
- * returns: 0 on success.
- */
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
- size_t length)
-{
- trace_postcopy_ram_discard_range(start, length);
- if (madvise(start, length, MADV_DONTNEED)) {
- error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
- return -1;
- }
-
- return 0;
-}
-
/*
* Setup an area of RAM so that it *can* be used for postcopy later; this
* must be done right at the start prior to pre-copy.
@@ -239,7 +205,7 @@ static int init_range(const char *block_name, void *host_addr,
* - we're going to get the copy from the source anyway.
* (Precopy will just overwrite this data, so doesn't need the discard)
*/
- if (postcopy_ram_discard_range(mis, host_addr, length)) {
+ if (ram_discard_range(mis, block_name, 0, length)) {
return -1;
}
@@ -342,9 +308,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
if (mis->postcopy_tmp_page) {
- munmap(mis->postcopy_tmp_page, getpagesize());
+ munmap(mis->postcopy_tmp_page, mis->largest_page_size);
mis->postcopy_tmp_page = NULL;
}
+ if (mis->postcopy_tmp_zero_page) {
+ munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
+ mis->postcopy_tmp_zero_page = NULL;
+ }
trace_postcopy_ram_incoming_cleanup_exit();
return 0;
}
@@ -408,6 +378,10 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
error_report("%s userfault register: %s", __func__, strerror(errno));
return -1;
}
+ if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
+ error_report("%s userfault: Region doesn't support COPY", __func__);
+ return -1;
+ }
return 0;
}
@@ -420,7 +394,6 @@ static void *postcopy_ram_fault_thread(void *opaque)
MigrationIncomingState *mis = opaque;
struct uffd_msg msg;
int ret;
- size_t hostpagesize = getpagesize();
RAMBlock *rb = NULL;
RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
@@ -487,7 +460,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
break;
}
- rb_offset &= ~(hostpagesize - 1);
+ rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
qemu_ram_get_idstr(rb),
rb_offset);
@@ -499,11 +472,11 @@ static void *postcopy_ram_fault_thread(void *opaque)
if (rb != last_rb) {
last_rb = rb;
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
- rb_offset, hostpagesize);
+ rb_offset, qemu_ram_pagesize(rb));
} else {
/* Save some space */
migrate_send_rp_req_pages(mis, NULL,
- rb_offset, hostpagesize);
+ rb_offset, qemu_ram_pagesize(rb));
}
}
trace_postcopy_ram_fault_thread_exit();
@@ -564,13 +537,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
* Place a host page (from) at (host) atomically
* returns 0 on success
*/
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+ size_t pagesize)
{
struct uffdio_copy copy_struct;
copy_struct.dst = (uint64_t)(uintptr_t)host;
copy_struct.src = (uint64_t)(uintptr_t)from;
- copy_struct.len = getpagesize();
+ copy_struct.len = pagesize;
copy_struct.mode = 0;
/* copy also acks to the kernel waking the stalled thread up
@@ -580,8 +554,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
*/
if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
int e = errno;
- error_report("%s: %s copy host: %p from: %p",
- __func__, strerror(e), host, from);
+ error_report("%s: %s copy host: %p from: %p (size: %zd)",
+ __func__, strerror(e), host, from, pagesize);
return -e;
}
@@ -594,23 +568,44 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
* Place a zero page at (host) atomically
* returns 0 on success
*/
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+ size_t pagesize)
{
- struct uffdio_zeropage zero_struct;
+ trace_postcopy_place_page_zero(host);
- zero_struct.range.start = (uint64_t)(uintptr_t)host;
- zero_struct.range.len = getpagesize();
- zero_struct.mode = 0;
+ if (pagesize == getpagesize()) {
+ struct uffdio_zeropage zero_struct;
+ zero_struct.range.start = (uint64_t)(uintptr_t)host;
+ zero_struct.range.len = getpagesize();
+ zero_struct.mode = 0;
- if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
- int e = errno;
- error_report("%s: %s zero host: %p",
- __func__, strerror(e), host);
+ if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
+ int e = errno;
+ error_report("%s: %s zero host: %p",
+ __func__, strerror(e), host);
- return -e;
+ return -e;
+ }
+ } else {
+ /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
+ if (!mis->postcopy_tmp_zero_page) {
+ mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
+ int e = errno;
+ mis->postcopy_tmp_zero_page = NULL;
+ error_report("%s: %s mapping large zero page",
+ __func__, strerror(e));
+ return -e;
+ }
+ memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
+ }
+ return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
+ pagesize);
}
- trace_postcopy_place_page_zero(host);
return 0;
}
@@ -625,7 +620,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
void *postcopy_get_tmp_page(MigrationIncomingState *mis)
{
if (!mis->postcopy_tmp_page) {
- mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
+ mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
PROT_READ | PROT_WRITE, MAP_PRIVATE |
MAP_ANONYMOUS, -1, 0);
if (mis->postcopy_tmp_page == MAP_FAILED) {
@@ -658,13 +653,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
return -1;
}
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
- size_t length)
-{
- assert(0);
- return -1;
-}
-
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
{
assert(0);
@@ -677,13 +665,15 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
return -1;
}
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+ size_t pagesize)
{
assert(0);
return -1;
}
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+ size_t pagesize)
{
assert(0);
return -1;
diff --git a/migration/ram.c b/migration/ram.c
index f289fcddd5..719425b9b8 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -600,6 +600,23 @@ static void migration_bitmap_sync_init(void)
iterations_prev = 0;
}
+/* Returns a summary bitmap of the page sizes of all RAMBlocks;
+ * for VMs with just normal pages this is equivalent to the
+ * host page size. If it's got some huge pages then it's the OR
+ * of all the different page sizes.
+ */
+uint64_t ram_pagesize_summary(void)
+{
+ RAMBlock *block;
+ uint64_t summary = 0;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ summary |= block->page_size;
+ }
+
+ return summary;
+}
+
static void migration_bitmap_sync(void)
{
RAMBlock *block;
@@ -1285,6 +1302,8 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
* offset to point into the middle of a host page
* in which case the remainder of the hostpage is sent.
* Only dirty target pages are sent.
+ * Note that the host page size may be a huge page for this
+ * block.
*
* Returns: Number of pages written.
*
@@ -1303,6 +1322,8 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
ram_addr_t dirty_ram_abs)
{
int tmppages, pages = 0;
+ size_t pagesize = qemu_ram_pagesize(pss->block);
+
do {
tmppages = ram_save_target_page(ms, f, pss, last_stage,
bytes_transferred, dirty_ram_abs);
@@ -1313,7 +1334,7 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
pages += tmppages;
pss->offset += TARGET_PAGE_SIZE;
dirty_ram_abs += TARGET_PAGE_SIZE;
- } while (pss->offset & (qemu_host_page_size - 1));
+ } while (pss->offset & (pagesize - 1));
/* The offset we leave with is the last one we looked at */
pss->offset -= TARGET_PAGE_SIZE;
@@ -1655,12 +1676,17 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
{
unsigned long *bitmap;
unsigned long *unsentmap;
- unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
+ unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
unsigned long first = block->offset >> TARGET_PAGE_BITS;
unsigned long len = block->used_length >> TARGET_PAGE_BITS;
unsigned long last = first + (len - 1);
unsigned long run_start;
+ if (block->page_size == TARGET_PAGE_SIZE) {
+ /* Easy case - TPS==HPS for a non-huge page RAMBlock */
+ return;
+ }
+
bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
@@ -1764,7 +1790,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
* Utility for the outgoing postcopy code.
*
* Discard any partially sent host-page size chunks, mark any partially
- * dirty host-page size chunks as all dirty.
+ * dirty host-page size chunks as all dirty. In this case the host-page
+ * is the host-page for the particular RAMBlock, i.e. it might be a huge page
*
* Returns: 0 on success
*/
@@ -1772,11 +1799,6 @@ static int postcopy_chunk_hostpages(MigrationState *ms)
{
struct RAMBlock *block;
- if (qemu_host_page_size == TARGET_PAGE_SIZE) {
- /* Easy case - TPS==HPS - nothing to be done */
- return 0;
- }
-
/* Easiest way to make sure we don't resume in the middle of a host-page */
last_seen_block = NULL;
last_sent_block = NULL;
@@ -1832,7 +1854,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
return -EINVAL;
}
- /* Deal with TPS != HPS */
+ /* Deal with TPS != HPS and huge pages */
ret = postcopy_chunk_hostpages(ms);
if (ret) {
rcu_read_unlock();
@@ -1872,6 +1894,8 @@ int ram_discard_range(MigrationIncomingState *mis,
{
int ret = -1;
+ trace_ram_discard_range(block_name, start, length);
+
rcu_read_lock();
RAMBlock *rb = qemu_ram_block_by_name(block_name);
@@ -1881,27 +1905,7 @@ int ram_discard_range(MigrationIncomingState *mis,
goto err;
}
- uint8_t *host_startaddr = rb->host + start;
-
- if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
- error_report("ram_discard_range: Unaligned start address: %p",
- host_startaddr);
- goto err;
- }
-
- if ((start + length) <= rb->used_length) {
- uint8_t *host_endaddr = host_startaddr + length;
- if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
- error_report("ram_discard_range: Unaligned end address: %p",
- host_endaddr);
- goto err;
- }
- ret = postcopy_ram_discard_range(mis, host_startaddr, length);
- } else {
- error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
- "/%zx/" RAM_ADDR_FMT")",
- block_name, start, length, rb->used_length);
- }
+ ret = ram_block_discard_range(rb, start, length);
err:
rcu_read_unlock();
@@ -2010,6 +2014,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
qemu_put_byte(f, strlen(block->idstr));
qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
qemu_put_be64(f, block->used_length);
+ if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
+ qemu_put_be64(f, block->page_size);
+ }
}
rcu_read_unlock();
@@ -2387,7 +2394,7 @@ static int ram_load_postcopy(QEMUFile *f)
{
int flags = 0, ret = 0;
bool place_needed = false;
- bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
+ bool matching_page_sizes = false;
MigrationIncomingState *mis = migration_incoming_get_current();
/* Temporary page that is later 'placed' */
void *postcopy_host_page = postcopy_get_tmp_page(mis);
@@ -2399,6 +2406,7 @@ static int ram_load_postcopy(QEMUFile *f)
void *host = NULL;
void *page_buffer = NULL;
void *place_source = NULL;
+ RAMBlock *block = NULL;
uint8_t ch;
addr = qemu_get_be64(f);
@@ -2408,7 +2416,7 @@ static int ram_load_postcopy(QEMUFile *f)
trace_ram_load_postcopy_loop((uint64_t)addr, flags);
place_needed = false;
if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
- RAMBlock *block = ram_block_from_stream(f, flags);
+ block = ram_block_from_stream(f, flags);
host = host_from_ram_block_offset(block, addr);
if (!host) {
@@ -2416,8 +2424,11 @@ static int ram_load_postcopy(QEMUFile *f)
ret = -EINVAL;
break;
}
+ matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
/*
- * Postcopy requires that we place whole host pages atomically.
+ * Postcopy requires that we place whole host pages atomically;
+ * these may be huge pages for RAMBlocks that are backed by
+ * hugetlbfs.
* To make it atomic, the data is read into a temporary page
* that's moved into place later.
* The migration protocol uses, possibly smaller, target-pages
@@ -2425,9 +2436,9 @@ static int ram_load_postcopy(QEMUFile *f)
* of a host page in order.
*/
page_buffer = postcopy_host_page +
- ((uintptr_t)host & ~qemu_host_page_mask);
+ ((uintptr_t)host & (block->page_size - 1));
/* If all TP are zero then we can optimise the place */
- if (!((uintptr_t)host & ~qemu_host_page_mask)) {
+ if (!((uintptr_t)host & (block->page_size - 1))) {
all_zero = true;
} else {
/* not the 1st TP within the HP */
@@ -2445,7 +2456,7 @@ static int ram_load_postcopy(QEMUFile *f)
* page
*/
place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
- ~qemu_host_page_mask) == 0;
+ (block->page_size - 1)) == 0;
place_source = postcopy_host_page;
}
last_host = host;
@@ -2483,14 +2494,14 @@ static int ram_load_postcopy(QEMUFile *f)
if (place_needed) {
/* This gets called at the last target page in the host page */
+ void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
+
if (all_zero) {
- ret = postcopy_place_page_zero(mis,
- host + TARGET_PAGE_SIZE -
- qemu_host_page_size);
+ ret = postcopy_place_page_zero(mis, place_dest,
+ block->page_size);
} else {
- ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
- qemu_host_page_size,
- place_source);
+ ret = postcopy_place_page(mis, place_dest,
+ place_source, block->page_size);
}
}
if (!ret) {
@@ -2511,6 +2522,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
* be atomic
*/
bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
+ /* ADVISE is earlier, it shows the source has the postcopy capability on */
+ bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
seq_iter++;
@@ -2575,6 +2588,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
error_report_err(local_err);
}
}
+ /* For postcopy we need to check hugepage sizes match */
+ if (postcopy_advised &&
+ block->page_size != qemu_host_page_size) {
+ uint64_t remote_page_size = qemu_get_be64(f);
+ if (remote_page_size != block->page_size) {
+ error_report("Mismatched RAM page size %s "
+ "(local) %zd != %" PRId64,
+ id, block->page_size,
+ remote_page_size);
+ ret = -EINVAL;
+ }
+ }
ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
block->idstr);
} else {
diff --git a/migration/savevm.c b/migration/savevm.c
index 5ecd264134..3b19a4a274 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -688,6 +688,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
return -1;
}
+ g_free(id);
se->compat = g_new0(CompatEntry, 1);
pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
@@ -869,7 +870,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
void qemu_savevm_send_postcopy_advise(QEMUFile *f)
{
uint64_t tmp[2];
- tmp[0] = cpu_to_be64(getpagesize());
+ tmp[0] = cpu_to_be64(ram_pagesize_summary());
tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
trace_qemu_savevm_send_postcopy_advise();
@@ -1276,6 +1277,11 @@ done:
status = MIGRATION_STATUS_COMPLETED;
}
migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
+
+ /* f is outer parameter, it should not stay in global migration state after
+ * this function finished */
+ ms->to_dst_file = NULL;
+
return ret;
}
@@ -1346,7 +1352,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
{
PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
- uint64_t remote_hps, remote_tps;
+ uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
trace_loadvm_postcopy_handle_advise();
if (ps != POSTCOPY_INCOMING_NONE) {
@@ -1359,17 +1365,27 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
return -1;
}
- remote_hps = qemu_get_be64(mis->from_src_file);
- if (remote_hps != getpagesize()) {
+ remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
+ local_pagesize_summary = ram_pagesize_summary();
+
+ if (remote_pagesize_summary != local_pagesize_summary) {
/*
- * Some combinations of mismatch are probably possible but it gets
- * a bit more complicated. In particular we need to place whole
- * host pages on the dest at once, and we need to ensure that we
- * handle dirtying to make sure we never end up sending part of
- * a hostpage on it's own.
+ * This detects two potential causes of mismatch:
+ * a) A mismatch in host page sizes
+ * Some combinations of mismatch are probably possible but it gets
+ * a bit more complicated. In particular we need to place whole
+ * host pages on the dest at once, and we need to ensure that we
+ * handle dirtying to make sure we never end up sending part of
+ * a hostpage on it's own.
+ * b) The use of different huge page sizes on source/destination
+ * a more fine grain test is performed during RAM block migration
+ * but this test here causes a nice early clear failure, and
+ * also fails when passed to an older qemu that doesn't
+ * do huge pages.
*/
- error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
- (int)remote_hps, getpagesize());
+ error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
+ " d=%" PRIx64 ")",
+ remote_pagesize_summary, local_pagesize_summary);
return -1;
}
diff --git a/migration/trace-events b/migration/trace-events
index fa660e35b1..7372ce2a51 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -68,6 +68,7 @@ get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t
migration_bitmap_sync_start(void) ""
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
migration_throttle(void) ""
+ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx"
ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
ram_postcopy_send_discard_bitmap(void) ""
ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx"
@@ -176,7 +177,6 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) ""
# migration/postcopy-ram.c
postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx"
-postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx"
postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
diff --git a/migration/vmstate.c b/migration/vmstate.c
index b4d8ae982a..78b3cd48e7 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -52,29 +52,15 @@ static int vmstate_size(void *opaque, VMStateField *field)
return size;
}
-static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
+static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque)
{
- void *base_addr = opaque + field->offset;
-
- if (field->flags & VMS_POINTER) {
- if (alloc && (field->flags & VMS_ALLOC)) {
- gsize size = 0;
- if (field->flags & VMS_VBUFFER) {
- size = vmstate_size(opaque, field);
- } else {
- int n_elems = vmstate_n_elems(opaque, field);
- if (n_elems) {
- size = n_elems * field->size;
- }
- }
- if (size) {
- *(void **)base_addr = g_malloc(size);
- }
+ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) {
+ gsize size = vmstate_size(opaque, field);
+ size *= vmstate_n_elems(opaque, field);
+ if (size) {
+ *(void **)ptr = g_malloc(size);
}
- base_addr = *(void **)base_addr;
}
-
- return base_addr;
}
int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
@@ -116,21 +102,30 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
field->field_exists(opaque, version_id)) ||
(!field->field_exists &&
field->version_id <= version_id)) {
- void *base_addr = vmstate_base_addr(opaque, field, true);
+ void *first_elem = opaque + field->offset;
int i, n_elems = vmstate_n_elems(opaque, field);
int size = vmstate_size(opaque, field);
+ vmstate_handle_alloc(first_elem, field, opaque);
+ if (field->flags & VMS_POINTER) {
+ first_elem = *(void **)first_elem;
+ assert(first_elem || !n_elems);
+ }
for (i = 0; i < n_elems; i++) {
- void *addr = base_addr + size * i;
+ void *curr_elem = first_elem + size * i;
if (field->flags & VMS_ARRAY_OF_POINTER) {
- addr = *(void **)addr;
+ curr_elem = *(void **)curr_elem;
}
- if (field->flags & VMS_STRUCT) {
- ret = vmstate_load_state(f, field->vmsd, addr,
+ if (!curr_elem) {
+ /* if null pointer check placeholder and do not follow */
+ assert(field->flags & VMS_ARRAY_OF_POINTER);
+ ret = vmstate_info_nullptr.get(f, curr_elem, size, NULL);
+ } else if (field->flags & VMS_STRUCT) {
+ ret = vmstate_load_state(f, field->vmsd, curr_elem,
field->vmsd->version_id);
} else {
- ret = field->info->get(f, addr, size, field);
+ ret = field->info->get(f, curr_elem, size, field);
}
if (ret >= 0) {
ret = qemu_file_get_error(f);
@@ -321,26 +316,34 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
while (field->name) {
if (!field->field_exists ||
field->field_exists(opaque, vmsd->version_id)) {
- void *base_addr = vmstate_base_addr(opaque, field, false);
+ void *first_elem = opaque + field->offset;
int i, n_elems = vmstate_n_elems(opaque, field);
int size = vmstate_size(opaque, field);
int64_t old_offset, written_bytes;
QJSON *vmdesc_loop = vmdesc;
trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems);
+ if (field->flags & VMS_POINTER) {
+ first_elem = *(void **)first_elem;
+ assert(first_elem || !n_elems);
+ }
for (i = 0; i < n_elems; i++) {
- void *addr = base_addr + size * i;
+ void *curr_elem = first_elem + size * i;
vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems);
old_offset = qemu_ftell_fast(f);
-
if (field->flags & VMS_ARRAY_OF_POINTER) {
- addr = *(void **)addr;
+ assert(curr_elem);
+ curr_elem = *(void **)curr_elem;
}
- if (field->flags & VMS_STRUCT) {
- vmstate_save_state(f, field->vmsd, addr, vmdesc_loop);
+ if (!curr_elem) {
+ /* if null pointer write placeholder and do not follow */
+ assert(field->flags & VMS_ARRAY_OF_POINTER);
+ vmstate_info_nullptr.put(f, curr_elem, size, NULL, NULL);
+ } else if (field->flags & VMS_STRUCT) {
+ vmstate_save_state(f, field->vmsd, curr_elem, vmdesc_loop);
} else {
- field->info->put(f, addr, size, field, vmdesc_loop);
+ field->info->put(f, curr_elem, size, field, vmdesc_loop);
}
written_bytes = qemu_ftell_fast(f) - old_offset;
@@ -752,6 +755,34 @@ const VMStateInfo vmstate_info_uint64 = {
.put = put_uint64,
};
+static int get_nullptr(QEMUFile *f, void *pv, size_t size, VMStateField *field)
+
+{
+ if (qemu_get_byte(f) == VMS_NULLPTR_MARKER) {
+ return 0;
+ }
+ error_report("vmstate: get_nullptr expected VMS_NULLPTR_MARKER");
+ return -EINVAL;
+}
+
+static int put_nullptr(QEMUFile *f, void *pv, size_t size,
+ VMStateField *field, QJSON *vmdesc)
+
+{
+ if (pv == NULL) {
+ qemu_put_byte(f, VMS_NULLPTR_MARKER);
+ return 0;
+ }
+ error_report("vmstate: put_nullptr must be called with pv == NULL");
+ return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_nullptr = {
+ .name = "uint64",
+ .get = get_nullptr,
+ .put = put_nullptr,
+};
+
/* 64 bit unsigned int. See that the received value is the same than the one
in the field */
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 549f45f066..5f2fcdfc45 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -29,7 +29,6 @@
#include "qemu/error-report.h"
#include "qemu/help_option.h"
#include "sysemu/block-backend.h"
-#include "migration/migration.h"
/*
* Aliases were a bad idea from the start. Let's keep them
@@ -579,14 +578,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
return NULL;
}
- if (only_migratable) {
- if (dc->vmsd->unmigratable) {
- error_setg(errp, "Device %s is not migratable, but "
- "--only-migratable was specified", driver);
- return NULL;
- }
- }
-
/* find bus */
path = qemu_opt_get(opts, "bus");
if (path != NULL) {
diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py
index 14a27e7f6a..bcef7ee28e 100755
--- a/scripts/vmstate-static-checker.py
+++ b/scripts/vmstate-static-checker.py
@@ -85,6 +85,11 @@ def check_fields_match(name, s_field, d_field):
'xio3130-express-upstream-port': ['br.dev', 'parent_obj.parent_obj',
'br.dev.exp.aer_log',
'parent_obj.parent_obj.exp.aer_log'],
+ 'spapr_pci': ['dma_liobn[0]', 'mig_liobn',
+ 'mem_win_addr', 'mig_mem_win_addr',
+ 'mem_win_size', 'mig_mem_win_size',
+ 'io_win_addr', 'mig_io_win_addr',
+ 'io_win_size', 'mig_io_win_size'],
}
if not name in changed_names:
diff --git a/stubs/vmstate.c b/stubs/vmstate.c
index bbe158fe3b..6d52f29bb2 100644
--- a/stubs/vmstate.c
+++ b/stubs/vmstate.c
@@ -1,6 +1,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "migration/vmstate.h"
+#include "migration/migration.h"
const VMStateDescription vmstate_dummy = {};
@@ -19,3 +20,8 @@ void vmstate_unregister(DeviceState *dev,
void *opaque)
{
}
+
+int check_migratable(Object *obj, Error **err)
+{
+ return 0;
+}
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 39f338a4c4..f694a89782 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -476,6 +476,8 @@ const VMStateDescription vmsd_tst = {
}
};
+/* test array migration */
+
#define AR_SIZE 4
typedef struct {
@@ -492,20 +494,22 @@ const VMStateDescription vmsd_arps = {
VMSTATE_END_OF_LIST()
}
};
+
+static uint8_t wire_arr_ptr_no0[] = {
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x03,
+ QEMU_VM_EOF
+};
+
static void test_arr_ptr_str_no0_save(void)
{
TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
TestArrayOfPtrToStuct sample = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
- uint8_t wire_sample[] = {
- 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01,
- 0x00, 0x00, 0x00, 0x02,
- 0x00, 0x00, 0x00, 0x03,
- QEMU_VM_EOF
- };
save_vmstate(&vmsd_arps, &sample);
- compare_vmstate(wire_sample, sizeof(wire_sample));
+ compare_vmstate(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0));
}
static void test_arr_ptr_str_no0_load(void)
@@ -514,21 +518,98 @@ static void test_arr_ptr_str_no0_load(void)
TestStructTriv ar[AR_SIZE] = {};
TestArrayOfPtrToStuct obj = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
int idx;
- uint8_t wire_sample[] = {
- 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01,
- 0x00, 0x00, 0x00, 0x02,
- 0x00, 0x00, 0x00, 0x03,
- QEMU_VM_EOF
- };
- save_buffer(wire_sample, sizeof(wire_sample));
+ save_buffer(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0));
+ SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1,
+ wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0)));
+ for (idx = 0; idx < AR_SIZE; ++idx) {
+ /* compare the target array ar with the ground truth array ar_gt */
+ g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i);
+ }
+}
+
+static uint8_t wire_arr_ptr_0[] = {
+ 0x00, 0x00, 0x00, 0x00,
+ VMS_NULLPTR_MARKER,
+ 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x03,
+ QEMU_VM_EOF
+};
+
+static void test_arr_ptr_str_0_save(void)
+{
+ TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
+ TestArrayOfPtrToStuct sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+
+ save_vmstate(&vmsd_arps, &sample);
+ compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+}
+
+static void test_arr_ptr_str_0_load(void)
+{
+ TestStructTriv ar_gt[AR_SIZE] = {{.i = 0}, {.i = 0}, {.i = 2}, {.i = 3} };
+ TestStructTriv ar[AR_SIZE] = {};
+ TestArrayOfPtrToStuct obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+ int idx;
+
+ save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1,
- wire_sample, sizeof(wire_sample)));
+ wire_arr_ptr_0, sizeof(wire_arr_ptr_0)));
for (idx = 0; idx < AR_SIZE; ++idx) {
/* compare the target array ar with the ground truth array ar_gt */
g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i);
}
+ for (idx = 0; idx < AR_SIZE; ++idx) {
+ if (idx == 1) {
+ g_assert_cmpint((uintptr_t)(obj.ar[idx]), ==, 0);
+ } else {
+ g_assert_cmpint((uintptr_t)(obj.ar[idx]), !=, 0);
+ }
+ }
+}
+
+typedef struct TestArrayOfPtrToInt {
+ int32_t *ar[AR_SIZE];
+} TestArrayOfPtrToInt;
+
+const VMStateDescription vmsd_arpp = {
+ .name = "test/arps",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_ARRAY_OF_POINTER(ar, TestArrayOfPtrToInt,
+ AR_SIZE, 0, vmstate_info_int32, int32_t*),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void test_arr_ptr_prim_0_save(void)
+{
+ int32_t ar[AR_SIZE] = {0 , 1, 2, 3};
+ TestArrayOfPtrToInt sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+
+ save_vmstate(&vmsd_arpp, &sample);
+ compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+}
+
+static void test_arr_ptr_prim_0_load(void)
+{
+ int32_t ar_gt[AR_SIZE] = {0, 1, 2, 3};
+ int32_t ar[AR_SIZE] = {3 , 42, 1, 0};
+ TestArrayOfPtrToInt obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+ int idx;
+
+ save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+ SUCCESS(load_vmstate_one(&vmsd_arpp, &obj, 1,
+ wire_arr_ptr_0, sizeof(wire_arr_ptr_0)));
+ for (idx = 0; idx < AR_SIZE; ++idx) {
+ /* compare the target array ar with the ground truth array ar_gt */
+ if (idx == 1) {
+ g_assert_cmpint(42, ==, ar[idx]);
+ } else {
+ g_assert_cmpint(ar_gt[idx], ==, ar[idx]);
+ }
+ }
}
/* test QTAILQ migration */
@@ -781,6 +862,13 @@ int main(int argc, char **argv)
test_arr_ptr_str_no0_save);
g_test_add_func("/vmstate/array/ptr/str/no0/load",
test_arr_ptr_str_no0_load);
+ g_test_add_func("/vmstate/array/ptr/str/0/save", test_arr_ptr_str_0_save);
+ g_test_add_func("/vmstate/array/ptr/str/0/load",
+ test_arr_ptr_str_0_load);
+ g_test_add_func("/vmstate/array/ptr/prim/0/save",
+ test_arr_ptr_prim_0_save);
+ g_test_add_func("/vmstate/array/ptr/prim/0/load",
+ test_arr_ptr_prim_0_load);
g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q);
g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q);
g_test_add_func("/vmstate/tmp_struct", test_tmp_struct);