diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2017-03-02 17:39:12 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2017-03-02 17:39:12 +0000 |
commit | 251501a3714096f807778f6d3f03711dcdb9ce29 (patch) | |
tree | fd80eb80a4ea2eb24f07411aa8eb8b294aec4213 | |
parent | c9fc677a35e6e00cdf00c7d085cbd74e0b90b2e6 (diff) | |
parent | 665414ad06aa1bc92e615db9641e58fb13d07de1 (diff) | |
download | qemu-251501a3714096f807778f6d3f03711dcdb9ce29.zip |
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20170228a' into staging
Migration pull
Note: The 'postcopy: Update userfaultfd.h header' is part of
Paolo's header update and will disappear if applied after it.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
# gpg: Signature made Tue 28 Feb 2017 12:38:34 GMT
# gpg: using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7
* remotes/dgilbert/tags/pull-migration-20170228a: (27 commits)
postcopy: Add extra check for COPY function
postcopy: Add doc about hugepages and postcopy
postcopy: Check for userfault+hugepage feature
postcopy: Update userfaultfd.h header
postcopy: Allow hugepages
postcopy: Send whole huge pages
postcopy: Mask fault addresses to huge page boundary
postcopy: Load huge pages in one go
postcopy: Use temporary for placing zero huge pages
postcopy: Plumb pagesize down into place helpers
postcopy: Record largest page size
postcopy: enhance ram_block_discard_range for hugepages
exec: ram_block_discard_range
postcopy: Chunk discards for hugepages
postcopy: Transmit and compare individual page sizes
postcopy: Transmit ram size summary word
migration: fix use-after-free of to_dst_file
migration: Update docs to discourage version bumps
migration: fix id leak regression
migrate: Introduce a 'dc->vmsd' check to avoid segfault for --only-migratable
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | docs/migration.txt | 71 | ||||
-rw-r--r-- | exec.c | 83 | ||||
-rw-r--r-- | hw/core/qdev.c | 7 | ||||
-rw-r--r-- | hw/usb/bus.c | 19 | ||||
-rw-r--r-- | include/exec/cpu-common.h | 2 | ||||
-rw-r--r-- | include/migration/migration.h | 6 | ||||
-rw-r--r-- | include/migration/postcopy-ram.h | 13 | ||||
-rw-r--r-- | include/migration/vmstate.h | 4 | ||||
-rw-r--r-- | migration/migration.c | 36 | ||||
-rw-r--r-- | migration/postcopy-ram.c | 142 | ||||
-rw-r--r-- | migration/ram.c | 109 | ||||
-rw-r--r-- | migration/savevm.c | 38 | ||||
-rw-r--r-- | migration/trace-events | 2 | ||||
-rw-r--r-- | migration/vmstate.c | 97 | ||||
-rw-r--r-- | qdev-monitor.c | 9 | ||||
-rwxr-xr-x | scripts/vmstate-static-checker.py | 5 | ||||
-rw-r--r-- | stubs/vmstate.c | 6 | ||||
-rw-r--r-- | tests/test-vmstate.c | 122 |
18 files changed, 550 insertions, 221 deletions
diff --git a/docs/migration.txt b/docs/migration.txt index 6503c17685..1b940a829b 100644 --- a/docs/migration.txt +++ b/docs/migration.txt @@ -161,6 +161,11 @@ include/hw/hw.h. === More about versions === +Version numbers are intended for major incompatible changes to the +migration of a device, and using them breaks backwards-migration +compatibility; in general most changes can be made by adding Subsections +(see below) or _TEST macros (see below) which won't break compatibility. + You can see that there are several version fields: - version_id: the maximum version_id supported by VMState for that device. @@ -175,6 +180,9 @@ version_id. And the function load_state_old() (if present) is able to load state from minimum_version_id_old to minimum_version_id. This function is deprecated and will be removed when no more users are left. +Saving state will always create a section with the 'version_id' value +and thus can't be loaded by any older QEMU. + === Massaging functions === Sometimes, it is not enough to be able to save the state directly @@ -292,6 +300,56 @@ save/send this state when we are in the middle of a pio operation not enabled, the values on that fields are garbage and don't need to be sent. +Using a condition function that checks a 'property' to determine whether +to send a subsection allows backwards migration compatibility when +new subsections are added. + +For example; + a) Add a new property using DEFINE_PROP_BOOL - e.g. support-foo and + default it to true. + b) Add an entry to the HW_COMPAT_ for the previous version + that sets the property to false. + c) Add a static bool support_foo function that tests the property. + d) Add a subsection with a .needed set to the support_foo function + e) (potentially) Add a pre_load that sets up a default value for 'foo' + to be used if the subsection isn't loaded. + +Now that subsection will not be generated when using an older +machine type and the migration stream will be accepted by older +QEMU versions. pre-load functions can be used to initialise state +on the newer version so that they default to suitable values +when loading streams created by older QEMU versions that do not +generate the subsection. + +In some cases subsections are added for data that had been accidentally +omitted by earlier versions; if the missing data causes the migration +process to succeed but the guest to behave badly then it may be better +to send the subsection and cause the migration to explicitly fail +with the unknown subsection error. If the bad behaviour only happens +with certain data values, making the subsection conditional on +the data value (rather than the machine type) allows migrations to succeed +in most cases. In general the preference is to tie the subsection to +the machine type, and allow reliable migrations, unless the behaviour +from omission of the subsection is really bad. + += Not sending existing elements = + +Sometimes members of the VMState are no longer needed; + removing them will break migration compatibility + making them version dependent and bumping the version will break backwards + migration compatibility. + +The best way is to: + a) Add a new property/compatibility/function in the same way for subsections + above. + b) replace the VMSTATE macro with the _TEST version of the macro, e.g.: + VMSTATE_UINT32(foo, barstruct) + becomes + VMSTATE_UINT32_TEST(foo, barstruct, pre_version_baz) + + Sometime in the future when we no longer care about the ancient +versions these can be killed off. + = Return path = In most migration scenarios there is only a single data path that runs @@ -482,3 +540,16 @@ request for a page that has already been sent is ignored. Duplicate requests such as this can happen as a page is sent at about the same time the destination accesses it. +=== Postcopy with hugepages === + +Postcopy now works with hugetlbfs backed memory: + a) The linux kernel on the destination must support userfault on hugepages. + b) The huge-page configuration on the source and destination VMs must be + identical; i.e. RAMBlocks on both sides must use the same page size. + c) Note that -mem-path /dev/hugepages will fall back to allocating normal + RAM if it doesn't have enough hugepages, triggering (b) to fail. + Using -mem-prealloc enforces the allocation using hugepages. + d) Care should be taken with the size of hugepage used; postcopy with 2MB + hugepages works well, however 1GB hugepages are likely to be problematic + since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link, + and until the full page is transferred the destination thread is blocked. @@ -45,6 +45,12 @@ #include "exec/address-spaces.h" #include "sysemu/xen-mapcache.h" #include "trace-root.h" + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE +#include <fcntl.h> +#include <linux/falloc.h> +#endif + #endif #include "exec/cpu-all.h" #include "qemu/rcu_queue.h" @@ -1518,6 +1524,19 @@ size_t qemu_ram_pagesize(RAMBlock *rb) return rb->page_size; } +/* Returns the largest size of page in use */ +size_t qemu_ram_pagesize_largest(void) +{ + RAMBlock *block; + size_t largest = 0; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + largest = MAX(largest, qemu_ram_pagesize(block)); + } + + return largest; +} + static int memory_try_enable_merging(void *addr, size_t len) { if (!machine_mem_merge(current_machine)) { @@ -3294,4 +3313,68 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) rcu_read_unlock(); return ret; } + +/* + * Unmap pages of memory from start to start+length such that + * they a) read as 0, b) Trigger whatever fault mechanism + * the OS provides for postcopy. + * The pages must be unmapped by the end of the function. + * Returns: 0 on success, none-0 on failure + * + */ +int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) +{ + int ret = -1; + + uint8_t *host_startaddr = rb->host + start; + + if ((uintptr_t)host_startaddr & (rb->page_size - 1)) { + error_report("ram_block_discard_range: Unaligned start address: %p", + host_startaddr); + goto err; + } + + if ((start + length) <= rb->used_length) { + uint8_t *host_endaddr = host_startaddr + length; + if ((uintptr_t)host_endaddr & (rb->page_size - 1)) { + error_report("ram_block_discard_range: Unaligned end address: %p", + host_endaddr); + goto err; + } + + errno = ENOTSUP; /* If we are missing MADVISE etc */ + + if (rb->page_size == qemu_host_page_size) { +#if defined(CONFIG_MADVISE) + /* Note: We need the madvise MADV_DONTNEED behaviour of definitely + * freeing the page. + */ + ret = madvise(host_startaddr, length, MADV_DONTNEED); +#endif + } else { + /* Huge page case - unfortunately it can't do DONTNEED, but + * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the + * huge page file. + */ +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, length); +#endif + } + if (ret) { + ret = -errno; + error_report("ram_block_discard_range: Failed to discard range " + "%s:%" PRIx64 " +%zx (%d)", + rb->idstr, start, length, ret); + } + } else { + error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64 + "/%zx/" RAM_ADDR_FMT")", + rb->idstr, start, length, rb->used_length); + } + +err: + return ret; +} + #endif diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 923e626333..1e7fb33246 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -37,6 +37,7 @@ #include "hw/boards.h" #include "hw/sysbus.h" #include "qapi-event.h" +#include "migration/migration.h" int qdev_hotplug = 0; static bool qdev_hot_added = false; @@ -903,6 +904,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp) Error *local_err = NULL; bool unattached_parent = false; static int unattached_count; + int ret; if (dev->hotplugged && !dc->hotpluggable) { error_setg(errp, QERR_DEVICE_NO_HOTPLUG, object_get_typename(obj)); @@ -910,6 +912,11 @@ static void device_set_realized(Object *obj, bool value, Error **errp) } if (value && !dev->realized) { + ret = check_migratable(obj, &local_err); + if (ret < 0) { + goto fail; + } + if (!obj->parent) { gchar *name = g_strdup_printf("device[%d]", unattached_count++); diff --git a/hw/usb/bus.c b/hw/usb/bus.c index efe4b8e1a6..24f1608b4b 100644 --- a/hw/usb/bus.c +++ b/hw/usb/bus.c @@ -8,7 +8,6 @@ #include "monitor/monitor.h" #include "trace.h" #include "qemu/cutils.h" -#include "migration/migration.h" static void usb_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent); @@ -688,8 +687,6 @@ USBDevice *usbdevice_create(const char *cmdline) const char *params; int len; USBDevice *dev; - ObjectClass *klass; - DeviceClass *dc; params = strchr(cmdline,':'); if (params) { @@ -724,22 +721,6 @@ USBDevice *usbdevice_create(const char *cmdline) return NULL; } - klass = object_class_by_name(f->name); - if (klass == NULL) { - error_report("Device '%s' not found", f->name); - return NULL; - } - - dc = DEVICE_CLASS(klass); - - if (only_migratable) { - if (dc->vmsd->unmigratable) { - error_report("Device %s is not migratable, but --only-migratable " - "was specified", f->name); - return NULL; - } - } - if (f->usbdevice_init) { dev = f->usbdevice_init(bus, params); } else { diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index bd15853e51..8c305aa4fa 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -64,6 +64,7 @@ void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev); void qemu_ram_unset_idstr(RAMBlock *block); const char *qemu_ram_get_idstr(RAMBlock *rb); size_t qemu_ram_pagesize(RAMBlock *block); +size_t qemu_ram_pagesize_largest(void); void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf, int len, int is_write); @@ -105,6 +106,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr, ram_addr_t offset, ram_addr_t length, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); +int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); #endif diff --git a/include/migration/migration.h b/include/migration/migration.h index 1735d66512..5720c884f4 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -22,6 +22,7 @@ #include "qapi-types.h" #include "exec/cpu-common.h" #include "qemu/coroutine_int.h" +#include "qom/object.h" #define QEMU_VM_FILE_MAGIC 0x5145564d #define QEMU_VM_FILE_VERSION_COMPAT 0x00000002 @@ -92,6 +93,7 @@ struct MigrationIncomingState { */ QemuEvent main_thread_load_event; + size_t largest_page_size; bool have_fault_thread; QemuThread fault_thread; QemuSemaphore fault_thread_sem; @@ -107,6 +109,7 @@ struct MigrationIncomingState { QEMUFile *to_src_file; QemuMutex rp_mutex; /* We send replies from multiple threads */ void *postcopy_tmp_page; + void *postcopy_tmp_zero_page; QEMUBH *bh; @@ -313,6 +316,8 @@ int migrate_add_blocker(Error *reason, Error **errp); */ void migrate_del_blocker(Error *reason); +int check_migratable(Object *obj, Error **err); + bool migrate_release_ram(void); bool migrate_postcopy_ram(void); bool migrate_zero_blocks(void); @@ -375,6 +380,7 @@ void global_state_store_running(void); void flush_page_queue(MigrationState *ms); int ram_save_queue_pages(MigrationState *ms, const char *rbname, ram_addr_t start, ram_addr_t len); +uint64_t ram_pagesize_summary(void); PostcopyState postcopy_state_get(void); /* Set the state and return the old state */ diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h index b6a7491f2d..8e036b95a2 100644 --- a/include/migration/postcopy-ram.h +++ b/include/migration/postcopy-ram.h @@ -35,13 +35,6 @@ int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages); int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis); /* - * Discard the contents of 'length' bytes from 'start' - * We can assume that if we've been called postcopy_ram_hosttest returned true - */ -int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, - size_t length); - -/* * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard * however leaving it until after precopy means that most of the precopy * data is still THPd @@ -81,13 +74,15 @@ void postcopy_discard_send_finish(MigrationState *ms, * to use other postcopy_ routines to allocate. * returns 0 on success */ -int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from); +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, + size_t pagesize); /* * Place a zero page at (host) atomically * returns 0 on success */ -int postcopy_place_page_zero(MigrationIncomingState *mis, void *host); +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, + size_t pagesize); /* * Allocate a page of memory that can be mapped at a later point in time diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 63e7b02e05..f2dbf8410a 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -253,6 +253,10 @@ extern const VMStateInfo vmstate_info_uint16; extern const VMStateInfo vmstate_info_uint32; extern const VMStateInfo vmstate_info_uint64; +/** Put this in the stream when migrating a null pointer.*/ +#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ +extern const VMStateInfo vmstate_info_nullptr; + extern const VMStateInfo vmstate_info_float64; extern const VMStateInfo vmstate_info_cpudouble; diff --git a/migration/migration.c b/migration/migration.c index c6ae69d371..3dab6845b1 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -49,6 +49,10 @@ * for sending the last part */ #define DEFAULT_MIGRATE_SET_DOWNTIME 300 +/* Maximum migrate downtime set to 2000 seconds */ +#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 +#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) + /* Default compression thread count */ #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 /* Default decompression thread count, usually decompression is at @@ -383,6 +387,7 @@ static void process_incoming_migration_co(void *opaque) int ret; mis->from_src_file = f; + mis->largest_page_size = qemu_ram_pagesize_largest(); postcopy_state_set(POSTCOPY_INCOMING_NONE); migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_ACTIVE); @@ -843,10 +848,11 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp) return; } if (params->has_downtime_limit && - (params->downtime_limit < 0 || params->downtime_limit > 2000000)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "downtime_limit", - "an integer in the range of 0 to 2000000 milliseconds"); + (params->downtime_limit < 0 || + params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { + error_setg(errp, "Parameter 'downtime_limit' expects an integer in " + "the range of 0 to %d milliseconds", + MAX_MIGRATE_DOWNTIME); return; } if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) { @@ -1145,6 +1151,21 @@ void migrate_del_blocker(Error *reason) migration_blockers = g_slist_remove(migration_blockers, reason); } +int check_migratable(Object *obj, Error **err) +{ + DeviceClass *dc = DEVICE_GET_CLASS(obj); + if (only_migratable && dc->vmsd) { + if (dc->vmsd->unmigratable) { + error_setg(err, "Device %s is not migratable, but " + "--only-migratable was specified", + object_get_typename(obj)); + return -1; + } + } + + return 0; +} + void qmp_migrate_incoming(const char *uri, Error **errp) { Error *local_err = NULL; @@ -1289,6 +1310,13 @@ void qmp_migrate_set_speed(int64_t value, Error **errp) void qmp_migrate_set_downtime(double value, Error **errp) { + if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) { + error_setg(errp, "Parameter 'downtime_limit' expects an integer in " + "the range of 0 to %d seconds", + MAX_MIGRATE_DOWNTIME_SECONDS); + return; + } + value *= 1000; /* Convert to milliseconds */ value = MAX(0, MIN(INT64_MAX, value)); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index a40dddbaf6..effbeb64fb 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -81,25 +81,18 @@ static bool ufd_version_check(int ufd) return false; } - return true; -} - -/* - * Check for things that postcopy won't support; returns 0 if the block - * is fine. - */ -static int check_range(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque) -{ - RAMBlock *rb = qemu_ram_block_by_name(block_name); - - if (qemu_ram_pagesize(rb) > getpagesize()) { - error_report("Postcopy doesn't support large page sizes yet (%s)", - block_name); - return -E2BIG; + if (getpagesize() != ram_pagesize_summary()) { + bool have_hp = false; + /* We've got a huge page */ +#ifdef UFFD_FEATURE_MISSING_HUGETLBFS + have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS; +#endif + if (!have_hp) { + error_report("Userfault on this host does not support huge pages"); + return false; + } } - - return 0; + return true; } /* @@ -122,12 +115,6 @@ bool postcopy_ram_supported_by_host(void) goto out; } - /* Check for anything about the RAMBlocks we don't support */ - if (qemu_ram_foreach_block(check_range, NULL)) { - /* check_range will have printed its own error */ - goto out; - } - ufd = syscall(__NR_userfaultfd, O_CLOEXEC); if (ufd == -1) { error_report("%s: userfaultfd not available: %s", __func__, @@ -200,27 +187,6 @@ out: return ret; } -/** - * postcopy_ram_discard_range: Discard a range of memory. - * We can assume that if we've been called postcopy_ram_hosttest returned true. - * - * @mis: Current incoming migration state. - * @start, @length: range of memory to discard. - * - * returns: 0 on success. - */ -int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, - size_t length) -{ - trace_postcopy_ram_discard_range(start, length); - if (madvise(start, length, MADV_DONTNEED)) { - error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno)); - return -1; - } - - return 0; -} - /* * Setup an area of RAM so that it *can* be used for postcopy later; this * must be done right at the start prior to pre-copy. @@ -239,7 +205,7 @@ static int init_range(const char *block_name, void *host_addr, * - we're going to get the copy from the source anyway. * (Precopy will just overwrite this data, so doesn't need the discard) */ - if (postcopy_ram_discard_range(mis, host_addr, length)) { + if (ram_discard_range(mis, block_name, 0, length)) { return -1; } @@ -342,9 +308,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); if (mis->postcopy_tmp_page) { - munmap(mis->postcopy_tmp_page, getpagesize()); + munmap(mis->postcopy_tmp_page, mis->largest_page_size); mis->postcopy_tmp_page = NULL; } + if (mis->postcopy_tmp_zero_page) { + munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size); + mis->postcopy_tmp_zero_page = NULL; + } trace_postcopy_ram_incoming_cleanup_exit(); return 0; } @@ -408,6 +378,10 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, error_report("%s userfault register: %s", __func__, strerror(errno)); return -1; } + if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) { + error_report("%s userfault: Region doesn't support COPY", __func__); + return -1; + } return 0; } @@ -420,7 +394,6 @@ static void *postcopy_ram_fault_thread(void *opaque) MigrationIncomingState *mis = opaque; struct uffd_msg msg; int ret; - size_t hostpagesize = getpagesize(); RAMBlock *rb = NULL; RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ @@ -487,7 +460,7 @@ static void *postcopy_ram_fault_thread(void *opaque) break; } - rb_offset &= ~(hostpagesize - 1); + rb_offset &= ~(qemu_ram_pagesize(rb) - 1); trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, qemu_ram_get_idstr(rb), rb_offset); @@ -499,11 +472,11 @@ static void *postcopy_ram_fault_thread(void *opaque) if (rb != last_rb) { last_rb = rb; migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), - rb_offset, hostpagesize); + rb_offset, qemu_ram_pagesize(rb)); } else { /* Save some space */ migrate_send_rp_req_pages(mis, NULL, - rb_offset, hostpagesize); + rb_offset, qemu_ram_pagesize(rb)); } } trace_postcopy_ram_fault_thread_exit(); @@ -564,13 +537,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * Place a host page (from) at (host) atomically * returns 0 on success */ -int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, + size_t pagesize) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host; copy_struct.src = (uint64_t)(uintptr_t)from; - copy_struct.len = getpagesize(); + copy_struct.len = pagesize; copy_struct.mode = 0; /* copy also acks to the kernel waking the stalled thread up @@ -580,8 +554,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) */ if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) { int e = errno; - error_report("%s: %s copy host: %p from: %p", - __func__, strerror(e), host, from); + error_report("%s: %s copy host: %p from: %p (size: %zd)", + __func__, strerror(e), host, from, pagesize); return -e; } @@ -594,23 +568,44 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) * Place a zero page at (host) atomically * returns 0 on success */ -int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, + size_t pagesize) { - struct uffdio_zeropage zero_struct; + trace_postcopy_place_page_zero(host); - zero_struct.range.start = (uint64_t)(uintptr_t)host; - zero_struct.range.len = getpagesize(); - zero_struct.mode = 0; + if (pagesize == getpagesize()) { + struct uffdio_zeropage zero_struct; + zero_struct.range.start = (uint64_t)(uintptr_t)host; + zero_struct.range.len = getpagesize(); + zero_struct.mode = 0; - if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { - int e = errno; - error_report("%s: %s zero host: %p", - __func__, strerror(e), host); + if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { + int e = errno; + error_report("%s: %s zero host: %p", + __func__, strerror(e), host); - return -e; + return -e; + } + } else { + /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */ + if (!mis->postcopy_tmp_zero_page) { + mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (mis->postcopy_tmp_zero_page == MAP_FAILED) { + int e = errno; + mis->postcopy_tmp_zero_page = NULL; + error_report("%s: %s mapping large zero page", + __func__, strerror(e)); + return -e; + } + memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); + } + return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, + pagesize); } - trace_postcopy_place_page_zero(host); return 0; } @@ -625,7 +620,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) void *postcopy_get_tmp_page(MigrationIncomingState *mis) { if (!mis->postcopy_tmp_page) { - mis->postcopy_tmp_page = mmap(NULL, getpagesize(), + mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mis->postcopy_tmp_page == MAP_FAILED) { @@ -658,13 +653,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) return -1; } -int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, - size_t length) -{ - assert(0); - return -1; -} - int postcopy_ram_prepare_discard(MigrationIncomingState *mis) { assert(0); @@ -677,13 +665,15 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return -1; } -int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, + size_t pagesize) { assert(0); return -1; } -int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, + size_t pagesize) { assert(0); return -1; diff --git a/migration/ram.c b/migration/ram.c index f289fcddd5..719425b9b8 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -600,6 +600,23 @@ static void migration_bitmap_sync_init(void) iterations_prev = 0; } +/* Returns a summary bitmap of the page sizes of all RAMBlocks; + * for VMs with just normal pages this is equivalent to the + * host page size. If it's got some huge pages then it's the OR + * of all the different page sizes. + */ +uint64_t ram_pagesize_summary(void) +{ + RAMBlock *block; + uint64_t summary = 0; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + summary |= block->page_size; + } + + return summary; +} + static void migration_bitmap_sync(void) { RAMBlock *block; @@ -1285,6 +1302,8 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f, * offset to point into the middle of a host page * in which case the remainder of the hostpage is sent. * Only dirty target pages are sent. + * Note that the host page size may be a huge page for this + * block. * * Returns: Number of pages written. * @@ -1303,6 +1322,8 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f, ram_addr_t dirty_ram_abs) { int tmppages, pages = 0; + size_t pagesize = qemu_ram_pagesize(pss->block); + do { tmppages = ram_save_target_page(ms, f, pss, last_stage, bytes_transferred, dirty_ram_abs); @@ -1313,7 +1334,7 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f, pages += tmppages; pss->offset += TARGET_PAGE_SIZE; dirty_ram_abs += TARGET_PAGE_SIZE; - } while (pss->offset & (qemu_host_page_size - 1)); + } while (pss->offset & (pagesize - 1)); /* The offset we leave with is the last one we looked at */ pss->offset -= TARGET_PAGE_SIZE; @@ -1655,12 +1676,17 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, { unsigned long *bitmap; unsigned long *unsentmap; - unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE; + unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; unsigned long first = block->offset >> TARGET_PAGE_BITS; unsigned long len = block->used_length >> TARGET_PAGE_BITS; unsigned long last = first + (len - 1); unsigned long run_start; + if (block->page_size == TARGET_PAGE_SIZE) { + /* Easy case - TPS==HPS for a non-huge page RAMBlock */ + return; + } + bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; @@ -1764,7 +1790,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, * Utility for the outgoing postcopy code. * * Discard any partially sent host-page size chunks, mark any partially - * dirty host-page size chunks as all dirty. + * dirty host-page size chunks as all dirty. In this case the host-page + * is the host-page for the particular RAMBlock, i.e. it might be a huge page * * Returns: 0 on success */ @@ -1772,11 +1799,6 @@ static int postcopy_chunk_hostpages(MigrationState *ms) { struct RAMBlock *block; - if (qemu_host_page_size == TARGET_PAGE_SIZE) { - /* Easy case - TPS==HPS - nothing to be done */ - return 0; - } - /* Easiest way to make sure we don't resume in the middle of a host-page */ last_seen_block = NULL; last_sent_block = NULL; @@ -1832,7 +1854,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) return -EINVAL; } - /* Deal with TPS != HPS */ + /* Deal with TPS != HPS and huge pages */ ret = postcopy_chunk_hostpages(ms); if (ret) { rcu_read_unlock(); @@ -1872,6 +1894,8 @@ int ram_discard_range(MigrationIncomingState *mis, { int ret = -1; + trace_ram_discard_range(block_name, start, length); + rcu_read_lock(); RAMBlock *rb = qemu_ram_block_by_name(block_name); @@ -1881,27 +1905,7 @@ int ram_discard_range(MigrationIncomingState *mis, goto err; } - uint8_t *host_startaddr = rb->host + start; - - if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) { - error_report("ram_discard_range: Unaligned start address: %p", - host_startaddr); - goto err; - } - - if ((start + length) <= rb->used_length) { - uint8_t *host_endaddr = host_startaddr + length; - if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) { - error_report("ram_discard_range: Unaligned end address: %p", - host_endaddr); - goto err; - } - ret = postcopy_ram_discard_range(mis, host_startaddr, length); - } else { - error_report("ram_discard_range: Overrun block '%s' (%" PRIu64 - "/%zx/" RAM_ADDR_FMT")", - block_name, start, length, rb->used_length); - } + ret = ram_block_discard_range(rb, start, length); err: rcu_read_unlock(); @@ -2010,6 +2014,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque) qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); qemu_put_be64(f, block->used_length); + if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { + qemu_put_be64(f, block->page_size); + } } rcu_read_unlock(); @@ -2387,7 +2394,7 @@ static int ram_load_postcopy(QEMUFile *f) { int flags = 0, ret = 0; bool place_needed = false; - bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE; + bool matching_page_sizes = false; MigrationIncomingState *mis = migration_incoming_get_current(); /* Temporary page that is later 'placed' */ void *postcopy_host_page = postcopy_get_tmp_page(mis); @@ -2399,6 +2406,7 @@ static int ram_load_postcopy(QEMUFile *f) void *host = NULL; void *page_buffer = NULL; void *place_source = NULL; + RAMBlock *block = NULL; uint8_t ch; addr = qemu_get_be64(f); @@ -2408,7 +2416,7 @@ static int ram_load_postcopy(QEMUFile *f) trace_ram_load_postcopy_loop((uint64_t)addr, flags); place_needed = false; if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) { - RAMBlock *block = ram_block_from_stream(f, flags); + block = ram_block_from_stream(f, flags); host = host_from_ram_block_offset(block, addr); if (!host) { @@ -2416,8 +2424,11 @@ static int ram_load_postcopy(QEMUFile *f) ret = -EINVAL; break; } + matching_page_sizes = block->page_size == TARGET_PAGE_SIZE; /* - * Postcopy requires that we place whole host pages atomically. + * Postcopy requires that we place whole host pages atomically; + * these may be huge pages for RAMBlocks that are backed by + * hugetlbfs. * To make it atomic, the data is read into a temporary page * that's moved into place later. * The migration protocol uses, possibly smaller, target-pages @@ -2425,9 +2436,9 @@ static int ram_load_postcopy(QEMUFile *f) * of a host page in order. */ page_buffer = postcopy_host_page + - ((uintptr_t)host & ~qemu_host_page_mask); + ((uintptr_t)host & (block->page_size - 1)); /* If all TP are zero then we can optimise the place */ - if (!((uintptr_t)host & ~qemu_host_page_mask)) { + if (!((uintptr_t)host & (block->page_size - 1))) { all_zero = true; } else { /* not the 1st TP within the HP */ @@ -2445,7 +2456,7 @@ static int ram_load_postcopy(QEMUFile *f) * page */ place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & - ~qemu_host_page_mask) == 0; + (block->page_size - 1)) == 0; place_source = postcopy_host_page; } last_host = host; @@ -2483,14 +2494,14 @@ static int ram_load_postcopy(QEMUFile *f) if (place_needed) { /* This gets called at the last target page in the host page */ + void *place_dest = host + TARGET_PAGE_SIZE - block->page_size; + if (all_zero) { - ret = postcopy_place_page_zero(mis, - host + TARGET_PAGE_SIZE - - qemu_host_page_size); + ret = postcopy_place_page_zero(mis, place_dest, + block->page_size); } else { - ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE - - qemu_host_page_size, - place_source); + ret = postcopy_place_page(mis, place_dest, + place_source, block->page_size); } } if (!ret) { @@ -2511,6 +2522,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) * be atomic */ bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING; + /* ADVISE is earlier, it shows the source has the postcopy capability on */ + bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE; seq_iter++; @@ -2575,6 +2588,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) error_report_err(local_err); } } + /* For postcopy we need to check hugepage sizes match */ + if (postcopy_advised && + block->page_size != qemu_host_page_size) { + uint64_t remote_page_size = qemu_get_be64(f); + if (remote_page_size != block->page_size) { + error_report("Mismatched RAM page size %s " + "(local) %zd != %" PRId64, + id, block->page_size, + remote_page_size); + ret = -EINVAL; + } + } ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, block->idstr); } else { diff --git a/migration/savevm.c b/migration/savevm.c index 5ecd264134..3b19a4a274 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -688,6 +688,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, return -1; } + g_free(id); se->compat = g_new0(CompatEntry, 1); pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); @@ -869,7 +870,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len) void qemu_savevm_send_postcopy_advise(QEMUFile *f) { uint64_t tmp[2]; - tmp[0] = cpu_to_be64(getpagesize()); + tmp[0] = cpu_to_be64(ram_pagesize_summary()); tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits()); trace_qemu_savevm_send_postcopy_advise(); @@ -1276,6 +1277,11 @@ done: status = MIGRATION_STATUS_COMPLETED; } migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status); + + /* f is outer parameter, it should not stay in global migration state after + * this function finished */ + ms->to_dst_file = NULL; + return ret; } @@ -1346,7 +1352,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) { PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); - uint64_t remote_hps, remote_tps; + uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps; trace_loadvm_postcopy_handle_advise(); if (ps != POSTCOPY_INCOMING_NONE) { @@ -1359,17 +1365,27 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) return -1; } - remote_hps = qemu_get_be64(mis->from_src_file); - if (remote_hps != getpagesize()) { + remote_pagesize_summary = qemu_get_be64(mis->from_src_file); + local_pagesize_summary = ram_pagesize_summary(); + + if (remote_pagesize_summary != local_pagesize_summary) { /* - * Some combinations of mismatch are probably possible but it gets - * a bit more complicated. In particular we need to place whole - * host pages on the dest at once, and we need to ensure that we - * handle dirtying to make sure we never end up sending part of - * a hostpage on it's own. + * This detects two potential causes of mismatch: + * a) A mismatch in host page sizes + * Some combinations of mismatch are probably possible but it gets + * a bit more complicated. In particular we need to place whole + * host pages on the dest at once, and we need to ensure that we + * handle dirtying to make sure we never end up sending part of + * a hostpage on it's own. + * b) The use of different huge page sizes on source/destination + * a more fine grain test is performed during RAM block migration + * but this test here causes a nice early clear failure, and + * also fails when passed to an older qemu that doesn't + * do huge pages. */ - error_report("Postcopy needs matching host page sizes (s=%d d=%d)", - (int)remote_hps, getpagesize()); + error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64 + " d=%" PRIx64 ")", + remote_pagesize_summary, local_pagesize_summary); return -1; } diff --git a/migration/trace-events b/migration/trace-events index fa660e35b1..7372ce2a51 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -68,6 +68,7 @@ get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 migration_throttle(void) "" +ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx" ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x" ram_postcopy_send_discard_bitmap(void) "" ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx" @@ -176,7 +177,6 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) "" # migration/postcopy-ram.c postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands" postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx" -postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx" postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" diff --git a/migration/vmstate.c b/migration/vmstate.c index b4d8ae982a..78b3cd48e7 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -52,29 +52,15 @@ static int vmstate_size(void *opaque, VMStateField *field) return size; } -static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc) +static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) { - void *base_addr = opaque + field->offset; - - if (field->flags & VMS_POINTER) { - if (alloc && (field->flags & VMS_ALLOC)) { - gsize size = 0; - if (field->flags & VMS_VBUFFER) { - size = vmstate_size(opaque, field); - } else { - int n_elems = vmstate_n_elems(opaque, field); - if (n_elems) { - size = n_elems * field->size; - } - } - if (size) { - *(void **)base_addr = g_malloc(size); - } + if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { + gsize size = vmstate_size(opaque, field); + size *= vmstate_n_elems(opaque, field); + if (size) { + *(void **)ptr = g_malloc(size); } - base_addr = *(void **)base_addr; } - - return base_addr; } int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, @@ -116,21 +102,30 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, field->field_exists(opaque, version_id)) || (!field->field_exists && field->version_id <= version_id)) { - void *base_addr = vmstate_base_addr(opaque, field, true); + void *first_elem = opaque + field->offset; int i, n_elems = vmstate_n_elems(opaque, field); int size = vmstate_size(opaque, field); + vmstate_handle_alloc(first_elem, field, opaque); + if (field->flags & VMS_POINTER) { + first_elem = *(void **)first_elem; + assert(first_elem || !n_elems); + } for (i = 0; i < n_elems; i++) { - void *addr = base_addr + size * i; + void *curr_elem = first_elem + size * i; if (field->flags & VMS_ARRAY_OF_POINTER) { - addr = *(void **)addr; + curr_elem = *(void **)curr_elem; } - if (field->flags & VMS_STRUCT) { - ret = vmstate_load_state(f, field->vmsd, addr, + if (!curr_elem) { + /* if null pointer check placeholder and do not follow */ + assert(field->flags & VMS_ARRAY_OF_POINTER); + ret = vmstate_info_nullptr.get(f, curr_elem, size, NULL); + } else if (field->flags & VMS_STRUCT) { + ret = vmstate_load_state(f, field->vmsd, curr_elem, field->vmsd->version_id); } else { - ret = field->info->get(f, addr, size, field); + ret = field->info->get(f, curr_elem, size, field); } if (ret >= 0) { ret = qemu_file_get_error(f); @@ -321,26 +316,34 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, while (field->name) { if (!field->field_exists || field->field_exists(opaque, vmsd->version_id)) { - void *base_addr = vmstate_base_addr(opaque, field, false); + void *first_elem = opaque + field->offset; int i, n_elems = vmstate_n_elems(opaque, field); int size = vmstate_size(opaque, field); int64_t old_offset, written_bytes; QJSON *vmdesc_loop = vmdesc; trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems); + if (field->flags & VMS_POINTER) { + first_elem = *(void **)first_elem; + assert(first_elem || !n_elems); + } for (i = 0; i < n_elems; i++) { - void *addr = base_addr + size * i; + void *curr_elem = first_elem + size * i; vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems); old_offset = qemu_ftell_fast(f); - if (field->flags & VMS_ARRAY_OF_POINTER) { - addr = *(void **)addr; + assert(curr_elem); + curr_elem = *(void **)curr_elem; } - if (field->flags & VMS_STRUCT) { - vmstate_save_state(f, field->vmsd, addr, vmdesc_loop); + if (!curr_elem) { + /* if null pointer write placeholder and do not follow */ + assert(field->flags & VMS_ARRAY_OF_POINTER); + vmstate_info_nullptr.put(f, curr_elem, size, NULL, NULL); + } else if (field->flags & VMS_STRUCT) { + vmstate_save_state(f, field->vmsd, curr_elem, vmdesc_loop); } else { - field->info->put(f, addr, size, field, vmdesc_loop); + field->info->put(f, curr_elem, size, field, vmdesc_loop); } written_bytes = qemu_ftell_fast(f) - old_offset; @@ -752,6 +755,34 @@ const VMStateInfo vmstate_info_uint64 = { .put = put_uint64, }; +static int get_nullptr(QEMUFile *f, void *pv, size_t size, VMStateField *field) + +{ + if (qemu_get_byte(f) == VMS_NULLPTR_MARKER) { + return 0; + } + error_report("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); + return -EINVAL; +} + +static int put_nullptr(QEMUFile *f, void *pv, size_t size, + VMStateField *field, QJSON *vmdesc) + +{ + if (pv == NULL) { + qemu_put_byte(f, VMS_NULLPTR_MARKER); + return 0; + } + error_report("vmstate: put_nullptr must be called with pv == NULL"); + return -EINVAL; +} + +const VMStateInfo vmstate_info_nullptr = { + .name = "uint64", + .get = get_nullptr, + .put = put_nullptr, +}; + /* 64 bit unsigned int. See that the received value is the same than the one in the field */ diff --git a/qdev-monitor.c b/qdev-monitor.c index 549f45f066..5f2fcdfc45 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -29,7 +29,6 @@ #include "qemu/error-report.h" #include "qemu/help_option.h" #include "sysemu/block-backend.h" -#include "migration/migration.h" /* * Aliases were a bad idea from the start. Let's keep them @@ -579,14 +578,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) return NULL; } - if (only_migratable) { - if (dc->vmsd->unmigratable) { - error_setg(errp, "Device %s is not migratable, but " - "--only-migratable was specified", driver); - return NULL; - } - } - /* find bus */ path = qemu_opt_get(opts, "bus"); if (path != NULL) { diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py index 14a27e7f6a..bcef7ee28e 100755 --- a/scripts/vmstate-static-checker.py +++ b/scripts/vmstate-static-checker.py @@ -85,6 +85,11 @@ def check_fields_match(name, s_field, d_field): 'xio3130-express-upstream-port': ['br.dev', 'parent_obj.parent_obj', 'br.dev.exp.aer_log', 'parent_obj.parent_obj.exp.aer_log'], + 'spapr_pci': ['dma_liobn[0]', 'mig_liobn', + 'mem_win_addr', 'mig_mem_win_addr', + 'mem_win_size', 'mig_mem_win_size', + 'io_win_addr', 'mig_io_win_addr', + 'io_win_size', 'mig_io_win_size'], } if not name in changed_names: diff --git a/stubs/vmstate.c b/stubs/vmstate.c index bbe158fe3b..6d52f29bb2 100644 --- a/stubs/vmstate.c +++ b/stubs/vmstate.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "migration/vmstate.h" +#include "migration/migration.h" const VMStateDescription vmstate_dummy = {}; @@ -19,3 +20,8 @@ void vmstate_unregister(DeviceState *dev, void *opaque) { } + +int check_migratable(Object *obj, Error **err) +{ + return 0; +} diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c index 39f338a4c4..f694a89782 100644 --- a/tests/test-vmstate.c +++ b/tests/test-vmstate.c @@ -476,6 +476,8 @@ const VMStateDescription vmsd_tst = { } }; +/* test array migration */ + #define AR_SIZE 4 typedef struct { @@ -492,20 +494,22 @@ const VMStateDescription vmsd_arps = { VMSTATE_END_OF_LIST() } }; + +static uint8_t wire_arr_ptr_no0[] = { + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x02, + 0x00, 0x00, 0x00, 0x03, + QEMU_VM_EOF +}; + static void test_arr_ptr_str_no0_save(void) { TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} }; TestArrayOfPtrToStuct sample = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} }; - uint8_t wire_sample[] = { - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x02, - 0x00, 0x00, 0x00, 0x03, - QEMU_VM_EOF - }; save_vmstate(&vmsd_arps, &sample); - compare_vmstate(wire_sample, sizeof(wire_sample)); + compare_vmstate(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0)); } static void test_arr_ptr_str_no0_load(void) @@ -514,21 +518,98 @@ static void test_arr_ptr_str_no0_load(void) TestStructTriv ar[AR_SIZE] = {}; TestArrayOfPtrToStuct obj = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} }; int idx; - uint8_t wire_sample[] = { - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x02, - 0x00, 0x00, 0x00, 0x03, - QEMU_VM_EOF - }; - save_buffer(wire_sample, sizeof(wire_sample)); + save_buffer(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0)); + SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1, + wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0))); + for (idx = 0; idx < AR_SIZE; ++idx) { + /* compare the target array ar with the ground truth array ar_gt */ + g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i); + } +} + +static uint8_t wire_arr_ptr_0[] = { + 0x00, 0x00, 0x00, 0x00, + VMS_NULLPTR_MARKER, + 0x00, 0x00, 0x00, 0x02, + 0x00, 0x00, 0x00, 0x03, + QEMU_VM_EOF +}; + +static void test_arr_ptr_str_0_save(void) +{ + TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} }; + TestArrayOfPtrToStuct sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} }; + + save_vmstate(&vmsd_arps, &sample); + compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0)); +} + +static void test_arr_ptr_str_0_load(void) +{ + TestStructTriv ar_gt[AR_SIZE] = {{.i = 0}, {.i = 0}, {.i = 2}, {.i = 3} }; + TestStructTriv ar[AR_SIZE] = {}; + TestArrayOfPtrToStuct obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} }; + int idx; + + save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0)); SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1, - wire_sample, sizeof(wire_sample))); + wire_arr_ptr_0, sizeof(wire_arr_ptr_0))); for (idx = 0; idx < AR_SIZE; ++idx) { /* compare the target array ar with the ground truth array ar_gt */ g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i); } + for (idx = 0; idx < AR_SIZE; ++idx) { + if (idx == 1) { + g_assert_cmpint((uintptr_t)(obj.ar[idx]), ==, 0); + } else { + g_assert_cmpint((uintptr_t)(obj.ar[idx]), !=, 0); + } + } +} + +typedef struct TestArrayOfPtrToInt { + int32_t *ar[AR_SIZE]; +} TestArrayOfPtrToInt; + +const VMStateDescription vmsd_arpp = { + .name = "test/arps", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_ARRAY_OF_POINTER(ar, TestArrayOfPtrToInt, + AR_SIZE, 0, vmstate_info_int32, int32_t*), + VMSTATE_END_OF_LIST() + } +}; + +static void test_arr_ptr_prim_0_save(void) +{ + int32_t ar[AR_SIZE] = {0 , 1, 2, 3}; + TestArrayOfPtrToInt sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} }; + + save_vmstate(&vmsd_arpp, &sample); + compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0)); +} + +static void test_arr_ptr_prim_0_load(void) +{ + int32_t ar_gt[AR_SIZE] = {0, 1, 2, 3}; + int32_t ar[AR_SIZE] = {3 , 42, 1, 0}; + TestArrayOfPtrToInt obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} }; + int idx; + + save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0)); + SUCCESS(load_vmstate_one(&vmsd_arpp, &obj, 1, + wire_arr_ptr_0, sizeof(wire_arr_ptr_0))); + for (idx = 0; idx < AR_SIZE; ++idx) { + /* compare the target array ar with the ground truth array ar_gt */ + if (idx == 1) { + g_assert_cmpint(42, ==, ar[idx]); + } else { + g_assert_cmpint(ar_gt[idx], ==, ar[idx]); + } + } } /* test QTAILQ migration */ @@ -781,6 +862,13 @@ int main(int argc, char **argv) test_arr_ptr_str_no0_save); g_test_add_func("/vmstate/array/ptr/str/no0/load", test_arr_ptr_str_no0_load); + g_test_add_func("/vmstate/array/ptr/str/0/save", test_arr_ptr_str_0_save); + g_test_add_func("/vmstate/array/ptr/str/0/load", + test_arr_ptr_str_0_load); + g_test_add_func("/vmstate/array/ptr/prim/0/save", + test_arr_ptr_prim_0_save); + g_test_add_func("/vmstate/array/ptr/prim/0/load", + test_arr_ptr_prim_0_load); g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q); g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q); g_test_add_func("/vmstate/tmp_struct", test_tmp_struct); |