diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-10-14 16:09:52 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-10-14 16:09:52 +0100 |
commit | c760cb77e511eb05094df67c1b30029a952efa35 (patch) | |
tree | d57ed37e184c39091bc94f67a760e417b0fc2d3b | |
parent | 22dbfdecc3c52228d3489da3fe81da92b21197bf (diff) | |
parent | 9a85e4b8f672016adbf7b7d5beaab2a99b9b5615 (diff) | |
download | qemu-c760cb77e511eb05094df67c1b30029a952efa35.zip |
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20191011a' into staging
Migration pull 2019-10-11
Mostly cleanups and minor fixes
[Note I'm seeing a hang on the aarch64 hosted x86-64 tcg migration
test in xbzrle; but I'm seeing that on current head as well]
# gpg: Signature made Fri 11 Oct 2019 20:14:31 BST
# gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7
* remotes/dgilbert/tags/pull-migration-20191011a: (21 commits)
migration: Support gtree migration
migration/multifd: pages->used would be cleared when attach to multifd_send_state
migration/multifd: initialize packet->magic/version once at setup stage
migration/multifd: use pages->allocated instead of the static max
migration/multifd: fix a typo in comment of multifd_recv_unfill_packet()
migration/postcopy: check PostcopyState before setting to POSTCOPY_INCOMING_RUNNING
migration/postcopy: rename postcopy_ram_enable_notify to postcopy_ram_incoming_setup
migration/postcopy: postpone setting PostcopyState to END
migration/postcopy: mis->have_listen_thread check will never be touched
migration: report SaveStateEntry id and name on failure
migration: pass in_postcopy instead of check state again
migration/postcopy: fix typo in mark_postcopy_blocktime_begin's comment
migration/postcopy: map large zero page in postcopy_ram_incoming_setup()
migration/postcopy: allocate tmp_page in setup stage
migration: Don't try and recover return path in non-postcopy
rcu: Use automatic rc_read unlock in core memory/exec code
migration: Use automatic rcu_read unlock in rdma.c
migration: Use automatic rcu_read unlock in ram.c
migration: Fix missing rcu_read_unlock
rcu: Add automatically released rcu_read_lock variants
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | docs/devel/rcu.txt | 16 | ||||
-rw-r--r-- | exec.c | 116 | ||||
-rw-r--r-- | include/exec/ram_addr.h | 138 | ||||
-rw-r--r-- | include/migration/misc.h | 1 | ||||
-rw-r--r-- | include/migration/vmstate.h | 40 | ||||
-rw-r--r-- | include/qemu/rcu.h | 25 | ||||
-rw-r--r-- | memory.c | 15 | ||||
-rw-r--r-- | migration/migration.c | 17 | ||||
-rw-r--r-- | migration/postcopy-ram.c | 88 | ||||
-rw-r--r-- | migration/postcopy-ram.h | 9 | ||||
-rw-r--r-- | migration/ram.c | 298 | ||||
-rw-r--r-- | migration/rdma.c | 57 | ||||
-rw-r--r-- | migration/savevm.c | 14 | ||||
-rw-r--r-- | migration/trace-events | 5 | ||||
-rw-r--r-- | migration/vmstate-types.c | 152 | ||||
-rw-r--r-- | tests/test-vmstate.c | 421 |
16 files changed, 980 insertions, 432 deletions
diff --git a/docs/devel/rcu.txt b/docs/devel/rcu.txt index c84e7f42b2..d83fed2f79 100644 --- a/docs/devel/rcu.txt +++ b/docs/devel/rcu.txt @@ -187,6 +187,22 @@ The following APIs must be used before RCU is used in a thread: Note that these APIs are relatively heavyweight, and should _not_ be nested. +Convenience macros +================== + +Two macros are provided that automatically release the read lock at the +end of the scope. + + RCU_READ_LOCK_GUARD() + + Takes the lock and will release it at the end of the block it's + used in. + + WITH_RCU_READ_LOCK_GUARD() { code } + + Is used at the head of a block to protect the code within the block. + +Note that 'goto'ing out of the guarded block will also drop the lock. DIFFERENCES WITH LINUX ====================== @@ -1037,16 +1037,14 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs) return; } - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); mr = address_space_translate(as, addr, &addr, &l, false, attrs); if (!(memory_region_is_ram(mr) || memory_region_is_romd(mr))) { - rcu_read_unlock(); return; } ram_addr = memory_region_get_ram_addr(mr) + addr; tb_invalidate_phys_page_range(ram_addr, ram_addr + 1); - rcu_read_unlock(); } static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) @@ -1332,14 +1330,13 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length) end = TARGET_PAGE_ALIGN(start + length); start &= TARGET_PAGE_MASK; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); block = qemu_get_ram_block(start); assert(block == qemu_get_ram_block(end - 1)); start1 = (uintptr_t)ramblock_ptr(block, start - block->offset); CPU_FOREACH(cpu) { tlb_reset_dirty(cpu, start1, length); } - rcu_read_unlock(); } /* Note: start and end must be within the same ram block. */ @@ -1360,30 +1357,29 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; page = start >> TARGET_PAGE_BITS; - rcu_read_lock(); + WITH_RCU_READ_LOCK_GUARD() { + blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); + ramblock = qemu_get_ram_block(start); + /* Range sanity check on the ramblock */ + assert(start >= ramblock->offset && + start + length <= ramblock->offset + ramblock->used_length); - blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); - ramblock = qemu_get_ram_block(start); - /* Range sanity check on the ramblock */ - assert(start >= ramblock->offset && - start + length <= ramblock->offset + ramblock->used_length); + while (page < end) { + unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; + unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; + unsigned long num = MIN(end - page, + DIRTY_MEMORY_BLOCK_SIZE - offset); - while (page < end) { - unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; - unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; - unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset); + dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx], + offset, num); + page += num; + } - dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx], - offset, num); - page += num; + mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset; + mr_size = (end - page) << TARGET_PAGE_BITS; + memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size); } - mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset; - mr_size = (end - page) << TARGET_PAGE_BITS; - memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size); - - rcu_read_unlock(); - if (dirty && tcg_enabled()) { tlb_reset_dirty_range_all(start, length); } @@ -1411,28 +1407,27 @@ DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty end = last >> TARGET_PAGE_BITS; dest = 0; - rcu_read_lock(); + WITH_RCU_READ_LOCK_GUARD() { + blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); - blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); + while (page < end) { + unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; + unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; + unsigned long num = MIN(end - page, + DIRTY_MEMORY_BLOCK_SIZE - offset); - while (page < end) { - unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; - unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; - unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset); - - assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL))); - assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL))); - offset >>= BITS_PER_LEVEL; + assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL))); + assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL))); + offset >>= BITS_PER_LEVEL; - bitmap_copy_and_clear_atomic(snap->dirty + dest, - blocks->blocks[idx] + offset, - num); - page += num; - dest += num >> BITS_PER_LEVEL; + bitmap_copy_and_clear_atomic(snap->dirty + dest, + blocks->blocks[idx] + offset, + num); + page += num; + dest += num >> BITS_PER_LEVEL; + } } - rcu_read_unlock(); - if (tcg_enabled()) { tlb_reset_dirty_range_all(start, length); } @@ -1643,7 +1638,7 @@ void ram_block_dump(Monitor *mon) RAMBlock *block; char *psize; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); monitor_printf(mon, "%24s %8s %18s %18s %18s\n", "Block Name", "PSize", "Offset", "Used", "Total"); RAMBLOCK_FOREACH(block) { @@ -1655,7 +1650,6 @@ void ram_block_dump(Monitor *mon) (uint64_t)block->max_length); g_free(psize); } - rcu_read_unlock(); } #ifdef __linux__ @@ -2009,11 +2003,10 @@ static unsigned long last_ram_page(void) RAMBlock *block; ram_addr_t last = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); RAMBLOCK_FOREACH(block) { last = MAX(last, block->offset + block->max_length); } - rcu_read_unlock(); return last >> TARGET_PAGE_BITS; } @@ -2100,7 +2093,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev) } pstrcat(new_block->idstr, sizeof(new_block->idstr), name); - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); RAMBLOCK_FOREACH(block) { if (block != new_block && !strcmp(block->idstr, new_block->idstr)) { @@ -2109,7 +2102,6 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev) abort(); } } - rcu_read_unlock(); } /* Called with iothread lock held. */ @@ -2651,17 +2643,16 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, if (xen_enabled()) { ram_addr_t ram_addr; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); ram_addr = xen_ram_addr_from_mapcache(ptr); block = qemu_get_ram_block(ram_addr); if (block) { *offset = ram_addr - block->offset; } - rcu_read_unlock(); return block; } - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); block = atomic_rcu_read(&ram_list.mru_block); if (block && block->host && host - block->host < block->max_length) { goto found; @@ -2677,7 +2668,6 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, } } - rcu_read_unlock(); return NULL; found: @@ -2685,7 +2675,6 @@ found: if (round_offset) { *offset &= TARGET_PAGE_MASK; } - rcu_read_unlock(); return block; } @@ -3281,10 +3270,9 @@ MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr, FlatView *fv; if (len > 0) { - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); fv = address_space_to_flatview(as); result = flatview_read(fv, addr, attrs, buf, len); - rcu_read_unlock(); } return result; @@ -3298,10 +3286,9 @@ MemTxResult address_space_write(AddressSpace *as, hwaddr addr, FlatView *fv; if (len > 0) { - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); fv = address_space_to_flatview(as); result = flatview_write(fv, addr, attrs, buf, len); - rcu_read_unlock(); } return result; @@ -3341,7 +3328,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, hwaddr addr1; MemoryRegion *mr; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); while (len > 0) { l = len; mr = address_space_translate(as, addr, &addr1, &l, true, attrs); @@ -3366,7 +3353,6 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, buf += l; addr += l; } - rcu_read_unlock(); return MEMTX_OK; } @@ -3511,10 +3497,9 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, FlatView *fv; bool result; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); fv = address_space_to_flatview(as); result = flatview_access_valid(fv, addr, len, is_write, attrs); - rcu_read_unlock(); return result; } @@ -3569,13 +3554,12 @@ void *address_space_map(AddressSpace *as, } l = len; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); fv = address_space_to_flatview(as); mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); if (!memory_access_is_direct(mr, is_write)) { if (atomic_xchg(&bounce.in_use, true)) { - rcu_read_unlock(); return NULL; } /* Avoid unbounded allocations */ @@ -3591,7 +3575,6 @@ void *address_space_map(AddressSpace *as, bounce.buffer, l); } - rcu_read_unlock(); *plen = l; return bounce.buffer; } @@ -3601,7 +3584,6 @@ void *address_space_map(AddressSpace *as, *plen = flatview_extend_translation(fv, addr, len, mr, xlat, l, is_write, attrs); ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen, true); - rcu_read_unlock(); return ptr; } @@ -3869,13 +3851,12 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr) hwaddr l = 1; bool res; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); mr = address_space_translate(&address_space_memory, phys_addr, &phys_addr, &l, false, MEMTXATTRS_UNSPECIFIED); res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr)); - rcu_read_unlock(); return res; } @@ -3884,14 +3865,13 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) RAMBlock *block; int ret = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); RAMBLOCK_FOREACH(block) { ret = func(block, opaque); if (ret) { break; } } - rcu_read_unlock(); return ret; } diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index e96e621de5..ad158bb247 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -193,30 +193,29 @@ static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; page = start >> TARGET_PAGE_BITS; - rcu_read_lock(); - - blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); + WITH_RCU_READ_LOCK_GUARD() { + blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); + + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + base = page - offset; + while (page < end) { + unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); + unsigned long num = next - base; + unsigned long found = find_next_bit(blocks->blocks[idx], + num, offset); + if (found < num) { + dirty = true; + break; + } - idx = page / DIRTY_MEMORY_BLOCK_SIZE; - offset = page % DIRTY_MEMORY_BLOCK_SIZE; - base = page - offset; - while (page < end) { - unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); - unsigned long num = next - base; - unsigned long found = find_next_bit(blocks->blocks[idx], num, offset); - if (found < num) { - dirty = true; - break; + page = next; + idx++; + offset = 0; + base += DIRTY_MEMORY_BLOCK_SIZE; } - - page = next; - idx++; - offset = 0; - base += DIRTY_MEMORY_BLOCK_SIZE; } - rcu_read_unlock(); - return dirty; } @@ -234,7 +233,7 @@ static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; page = start >> TARGET_PAGE_BITS; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); @@ -256,8 +255,6 @@ static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, base += DIRTY_MEMORY_BLOCK_SIZE; } - rcu_read_unlock(); - return dirty; } @@ -309,13 +306,11 @@ static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, idx = page / DIRTY_MEMORY_BLOCK_SIZE; offset = page % DIRTY_MEMORY_BLOCK_SIZE; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); set_bit_atomic(offset, blocks->blocks[idx]); - - rcu_read_unlock(); } static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, @@ -334,39 +329,37 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; page = start >> TARGET_PAGE_BITS; - rcu_read_lock(); + WITH_RCU_READ_LOCK_GUARD() { + for (i = 0; i < DIRTY_MEMORY_NUM; i++) { + blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]); + } - for (i = 0; i < DIRTY_MEMORY_NUM; i++) { - blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]); - } + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + base = page - offset; + while (page < end) { + unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); - idx = page / DIRTY_MEMORY_BLOCK_SIZE; - offset = page % DIRTY_MEMORY_BLOCK_SIZE; - base = page - offset; - while (page < end) { - unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); + if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], + offset, next - page); + } + if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], + offset, next - page); + } + if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], + offset, next - page); + } - if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { - bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], - offset, next - page); + page = next; + idx++; + offset = 0; + base += DIRTY_MEMORY_BLOCK_SIZE; } - if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { - bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], - offset, next - page); - } - if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { - bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], - offset, next - page); - } - - page = next; - idx++; - offset = 0; - base += DIRTY_MEMORY_BLOCK_SIZE; } - rcu_read_unlock(); - xen_hvm_modified_memory(start, length); } @@ -396,36 +389,35 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, offset = BIT_WORD((start >> TARGET_PAGE_BITS) % DIRTY_MEMORY_BLOCK_SIZE); - rcu_read_lock(); + WITH_RCU_READ_LOCK_GUARD() { + for (i = 0; i < DIRTY_MEMORY_NUM; i++) { + blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks; + } - for (i = 0; i < DIRTY_MEMORY_NUM; i++) { - blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks; - } + for (k = 0; k < nr; k++) { + if (bitmap[k]) { + unsigned long temp = leul_to_cpu(bitmap[k]); - for (k = 0; k < nr; k++) { - if (bitmap[k]) { - unsigned long temp = leul_to_cpu(bitmap[k]); + atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); - atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); + if (global_dirty_log) { + atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], + temp); + } - if (global_dirty_log) { - atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], - temp); + if (tcg_enabled()) { + atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], + temp); + } } - if (tcg_enabled()) { - atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp); + if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { + offset = 0; + idx++; } } - - if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { - offset = 0; - idx++; - } } - rcu_read_unlock(); - xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); } else { uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; diff --git a/include/migration/misc.h b/include/migration/misc.h index b9d8e787af..d2762257aa 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -60,6 +60,7 @@ void migration_object_init(void); void migration_shutdown(void); void qemu_start_incoming_migration(const char *uri, Error **errp); bool migration_is_idle(void); +bool migration_is_active(MigrationState *); void add_migration_state_change_notifier(Notifier *notify); void remove_migration_state_change_notifier(Notifier *notify); bool migration_in_setup(MigrationState *); diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 1fbfd099dd..b9ee563aa4 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -224,6 +224,7 @@ extern const VMStateInfo vmstate_info_unused_buffer; extern const VMStateInfo vmstate_info_tmp; extern const VMStateInfo vmstate_info_bitmap; extern const VMStateInfo vmstate_info_qtailq; +extern const VMStateInfo vmstate_info_gtree; #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0) /* @@ -754,6 +755,45 @@ extern const VMStateInfo vmstate_info_qtailq; .start = offsetof(_type, _next), \ } +/* + * For migrating a GTree whose key is a pointer to _key_type and the + * value, a pointer to _val_type + * The target tree must have been properly initialized + * _vmsd: Start address of the 2 element array containing the data vmsd + * and the key vmsd, in that order + * _key_type: type of the key + * _val_type: type of the value + */ +#define VMSTATE_GTREE_V(_field, _state, _version, _vmsd, \ + _key_type, _val_type) \ +{ \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = (_vmsd), \ + .info = &vmstate_info_gtree, \ + .start = sizeof(_key_type), \ + .size = sizeof(_val_type), \ + .offset = offsetof(_state, _field), \ +} + +/* + * For migrating a GTree with direct key and the value a pointer + * to _val_type + * The target tree must have been properly initialized + * _vmsd: data vmsd + * _val_type: type of the value + */ +#define VMSTATE_GTREE_DIRECT_KEY_V(_field, _state, _version, _vmsd, _val_type) \ +{ \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = (_vmsd), \ + .info = &vmstate_info_gtree, \ + .start = 0, \ + .size = sizeof(_val_type), \ + .offset = offsetof(_state, _field), \ +} + /* _f : field name _f_n : num of elements field_name _n : num of elements diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 22876d1428..9c82683e37 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -154,6 +154,31 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func); }), \ (RCUCBFunc *)g_free); +typedef void RCUReadAuto; +static inline RCUReadAuto *rcu_read_auto_lock(void) +{ + rcu_read_lock(); + /* Anything non-NULL causes the cleanup function to be called */ + return (void *)(uintptr_t)0x1; +} + +static inline void rcu_read_auto_unlock(RCUReadAuto *r) +{ + rcu_read_unlock(); +} + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock) + +#define WITH_RCU_READ_LOCK_GUARD() \ + WITH_RCU_READ_LOCK_GUARD_(_rcu_read_auto##__COUNTER__) + +#define WITH_RCU_READ_LOCK_GUARD_(var) \ + for (g_autoptr(RCUReadAuto) var = rcu_read_auto_lock(); \ + (var); rcu_read_auto_unlock(var), (var) = NULL) + +#define RCU_READ_LOCK_GUARD() \ + g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock() + #ifdef __cplusplus } #endif @@ -779,14 +779,13 @@ FlatView *address_space_get_flatview(AddressSpace *as) { FlatView *view; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); do { view = address_space_to_flatview(as); /* If somebody has replaced as->current_map concurrently, * flatview_ref returns false. */ } while (!flatview_ref(view)); - rcu_read_unlock(); return view; } @@ -2166,12 +2165,11 @@ int memory_region_get_fd(MemoryRegion *mr) { int fd; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); while (mr->alias) { mr = mr->alias; } fd = mr->ram_block->fd; - rcu_read_unlock(); return fd; } @@ -2181,14 +2179,13 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr) void *ptr; uint64_t offset = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); while (mr->alias) { offset += mr->alias_offset; mr = mr->alias; } assert(mr->ram_block); ptr = qemu_map_ram_ptr(mr->ram_block, offset); - rcu_read_unlock(); return ptr; } @@ -2578,12 +2575,11 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr, hwaddr addr, uint64_t size) { MemoryRegionSection ret; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); ret = memory_region_find_rcu(mr, addr, size); if (ret.mr) { memory_region_ref(ret.mr); } - rcu_read_unlock(); return ret; } @@ -2591,9 +2587,8 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr) { MemoryRegion *mr; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); mr = memory_region_find_rcu(container, addr, 1).mr; - rcu_read_unlock(); return mr && mr != container; } diff --git a/migration/migration.c b/migration/migration.c index 5f7e4d15e9..3febd0f8f3 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1533,8 +1533,7 @@ static void migrate_fd_cleanup(MigrationState *s) qemu_fclose(tmp); } - assert((s->state != MIGRATION_STATUS_ACTIVE) && - (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE)); + assert(!migration_is_active(s)); if (s->state == MIGRATION_STATUS_CANCELLING) { migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, @@ -1703,6 +1702,12 @@ bool migration_is_idle(void) return false; } +bool migration_is_active(MigrationState *s) +{ + return (s->state == MIGRATION_STATUS_ACTIVE || + s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); +} + void migrate_init(MigrationState *s) { /* @@ -2481,7 +2486,7 @@ retry: out: res = qemu_file_get_error(rp); if (res) { - if (res == -EIO) { + if (res == -EIO && migration_in_postcopy()) { /* * Maybe there is something we can do: it looks like a * network down issue, and we pause for a recovery. @@ -3144,8 +3149,7 @@ static MigIterateState migration_iteration_run(MigrationState *s) return MIG_ITERATE_SKIP; } /* Just another iteration step */ - qemu_savevm_state_iterate(s->to_dst_file, - s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); + qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); } else { trace_migration_thread_low_pending(pending_size); migration_completion(s); @@ -3266,8 +3270,7 @@ static void *migration_thread(void *opaque) trace_migration_thread_setup_complete(); - while (s->state == MIGRATION_STATUS_ACTIVE || - s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { + while (migration_is_active(s)) { int64_t current_time; if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 1f63e65ed7..abccafc8c8 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -577,8 +577,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) } } - postcopy_state_set(POSTCOPY_INCOMING_END); - if (mis->postcopy_tmp_page) { munmap(mis->postcopy_tmp_page, mis->largest_page_size); mis->postcopy_tmp_page = NULL; @@ -768,9 +766,11 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, atomic_xchg(&dc->page_fault_vcpu_time[cpu], low_time_offset); atomic_xchg(&dc->vcpu_addr[cpu], addr); - /* check it here, not at the begining of the function, - * due to, check could accur early than bitmap_set in - * qemu_ufd_copy_ioctl */ + /* + * check it here, not at the beginning of the function, + * due to, check could occur early than bitmap_set in + * qemu_ufd_copy_ioctl + */ already_received = ramblock_recv_bitmap_test(rb, (void *)addr); if (already_received) { atomic_xchg(&dc->vcpu_addr[cpu], 0); @@ -1094,7 +1094,7 @@ retry: return NULL; } -int postcopy_ram_enable_notify(MigrationIncomingState *mis) +int postcopy_ram_incoming_setup(MigrationIncomingState *mis) { /* Open the fd for the kernel to give us userfaults */ mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); @@ -1134,6 +1134,32 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return -1; } + mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (mis->postcopy_tmp_page == MAP_FAILED) { + mis->postcopy_tmp_page = NULL; + error_report("%s: Failed to map postcopy_tmp_page %s", + __func__, strerror(errno)); + return -1; + } + + /* + * Map large zero page when kernel can't use UFFDIO_ZEROPAGE for hugepages + */ + mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (mis->postcopy_tmp_zero_page == MAP_FAILED) { + int e = errno; + mis->postcopy_tmp_zero_page = NULL; + error_report("%s: Failed to map large zero page %s", + __func__, strerror(e)); + return -e; + } + memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); + /* * Ballooning can mark pages as absent while we're postcopying * that would cause false userfaults. @@ -1240,50 +1266,10 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, qemu_ram_block_host_offset(rb, host)); } else { - /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */ - if (!mis->postcopy_tmp_zero_page) { - mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0); - if (mis->postcopy_tmp_zero_page == MAP_FAILED) { - int e = errno; - mis->postcopy_tmp_zero_page = NULL; - error_report("%s: %s mapping large zero page", - __func__, strerror(e)); - return -e; - } - memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); - } - return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, - rb); + return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, rb); } } -/* - * Returns a target page of memory that can be mapped at a later point in time - * using postcopy_place_page - * The same address is used repeatedly, postcopy_place_page just takes the - * backing page away. - * Returns: Pointer to allocated page - * - */ -void *postcopy_get_tmp_page(MigrationIncomingState *mis) -{ - if (!mis->postcopy_tmp_page) { - mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, - PROT_READ | PROT_WRITE, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (mis->postcopy_tmp_page == MAP_FAILED) { - mis->postcopy_tmp_page = NULL; - error_report("%s: %s", __func__, strerror(errno)); - return NULL; - } - } - - return mis->postcopy_tmp_page; -} - #else /* No target OS support, stubs just fail */ void fill_destination_postcopy_migration_info(MigrationInfo *info) @@ -1321,7 +1307,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb, return -1; } -int postcopy_ram_enable_notify(MigrationIncomingState *mis) +int postcopy_ram_incoming_setup(MigrationIncomingState *mis) { assert(0); return -1; @@ -1341,12 +1327,6 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, return -1; } -void *postcopy_get_tmp_page(MigrationIncomingState *mis) -{ - assert(0); - return NULL; -} - int postcopy_wake_shared(struct PostCopyFD *pcfd, uint64_t client_addr, RAMBlock *rb) diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 9c8bd2bae0..9941feb63a 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -20,7 +20,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis); * Make all of RAM sensitive to accesses to areas that haven't yet been written * and wire up anything necessary to deal with it. */ -int postcopy_ram_enable_notify(MigrationIncomingState *mis); +int postcopy_ram_incoming_setup(MigrationIncomingState *mis); /* * Initialise postcopy-ram, setting the RAM to a state where we can go into @@ -100,13 +100,6 @@ typedef enum { POSTCOPY_INCOMING_END } PostcopyState; -/* - * Allocate a page of memory that can be mapped at a later point in time - * using postcopy_place_page - * Returns: Pointer to allocated page - */ -void *postcopy_get_tmp_page(MigrationIncomingState *mis); - PostcopyState postcopy_state_get(void); /* Set the state and return the old state */ PostcopyState postcopy_state_set(PostcopyState new_state); diff --git a/migration/ram.c b/migration/ram.c index 22423f08cd..5078f94490 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -181,14 +181,14 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque) RAMBlock *block; int ret = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); + RAMBLOCK_FOREACH_NOT_IGNORED(block) { ret = func(block, opaque); if (ret) { break; } } - rcu_read_unlock(); return ret; } @@ -791,13 +791,10 @@ static void multifd_pages_clear(MultiFDPages_t *pages) static void multifd_send_fill_packet(MultiFDSendParams *p) { MultiFDPacket_t *packet = p->packet; - uint32_t page_max = MULTIFD_PACKET_SIZE / qemu_target_page_size(); int i; - packet->magic = cpu_to_be32(MULTIFD_MAGIC); - packet->version = cpu_to_be32(MULTIFD_VERSION); packet->flags = cpu_to_be32(p->flags); - packet->pages_alloc = cpu_to_be32(page_max); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); packet->pages_used = cpu_to_be32(p->pages->used); packet->next_packet_size = cpu_to_be32(p->next_packet_size); packet->packet_num = cpu_to_be64(p->packet_num); @@ -838,7 +835,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) packet->pages_alloc = be32_to_cpu(packet->pages_alloc); /* - * If we recevied a packet that is 100 times bigger than expected + * If we received a packet that is 100 times bigger than expected * just stop migration. It is a magic number. */ if (packet->pages_alloc > pages_max * 100) { @@ -1132,7 +1129,6 @@ static void *multifd_send_thread(void *opaque) p->flags = 0; p->num_packets++; p->num_pages += used; - p->pages->used = 0; qemu_mutex_unlock(&p->mutex); trace_multifd_send(p->id, packet_num, used, flags, @@ -1241,6 +1237,8 @@ int multifd_save_setup(void) p->packet_len = sizeof(MultiFDPacket_t) + sizeof(ram_addr_t) * page_count; p->packet = g_malloc0(p->packet_len); + p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); + p->packet->version = cpu_to_be32(MULTIFD_VERSION); p->name = g_strdup_printf("multifdsend_%d", i); socket_send_channel_create(multifd_new_send_channel_async, p); } @@ -1848,12 +1846,12 @@ static void migration_bitmap_sync(RAMState *rs) memory_global_dirty_log_sync(); qemu_mutex_lock(&rs->bitmap_mutex); - rcu_read_lock(); - RAMBLOCK_FOREACH_NOT_IGNORED(block) { - ramblock_sync_dirty_bitmap(rs, block); + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + ramblock_sync_dirty_bitmap(rs, block); + } + ram_counters.remaining = ram_bytes_remaining(); } - ram_counters.remaining = ram_bytes_remaining(); - rcu_read_unlock(); qemu_mutex_unlock(&rs->bitmap_mutex); memory_global_after_dirty_log_sync(); @@ -2397,13 +2395,12 @@ static void migration_page_queue_free(RAMState *rs) /* This queue generally should be empty - but in the case of a failed * migration might have some droppings in. */ - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) { memory_region_unref(mspr->rb->mr); QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); g_free(mspr); } - rcu_read_unlock(); } /** @@ -2424,7 +2421,8 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) RAMState *rs = ram_state; ram_counters.postcopy_requests++; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); + if (!rbname) { /* Reuse last RAMBlock */ ramblock = rs->last_req_rb; @@ -2466,12 +2464,10 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req); migration_make_urgent_request(); qemu_mutex_unlock(&rs->src_page_req_mutex); - rcu_read_unlock(); return 0; err: - rcu_read_unlock(); return -1; } @@ -2700,7 +2696,8 @@ static uint64_t ram_bytes_total_common(bool count_ignored) RAMBlock *block; uint64_t total = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); + if (count_ignored) { RAMBLOCK_FOREACH_MIGRATABLE(block) { total += block->used_length; @@ -2710,7 +2707,6 @@ static uint64_t ram_bytes_total_common(bool count_ignored) total += block->used_length; } } - rcu_read_unlock(); return total; } @@ -3034,7 +3030,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) RAMBlock *block; int ret; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); /* This should be our last sync, the src is now paused */ migration_bitmap_sync(rs); @@ -3048,7 +3044,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) /* Deal with TPS != HPS and huge pages */ ret = postcopy_chunk_hostpages(ms, block); if (ret) { - rcu_read_unlock(); return ret; } @@ -3060,7 +3055,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) trace_ram_postcopy_send_discard_bitmap(); ret = postcopy_each_ram_send_discard(ms); - rcu_read_unlock(); return ret; } @@ -3081,7 +3075,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) trace_ram_discard_range(rbname, start, length); - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); RAMBlock *rb = qemu_ram_block_by_name(rbname); if (!rb) { @@ -3101,8 +3095,6 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) ret = ram_block_discard_range(rb, start, length); err: - rcu_read_unlock(); - return ret; } @@ -3231,13 +3223,12 @@ static void ram_init_bitmaps(RAMState *rs) /* For memory_global_dirty_log_start below. */ qemu_mutex_lock_iothread(); qemu_mutex_lock_ramlist(); - rcu_read_lock(); - - ram_list_init_bitmaps(); - memory_global_dirty_log_start(); - migration_bitmap_sync_precopy(rs); - rcu_read_unlock(); + WITH_RCU_READ_LOCK_GUARD() { + ram_list_init_bitmaps(); + memory_global_dirty_log_start(); + migration_bitmap_sync_precopy(rs); + } qemu_mutex_unlock_ramlist(); qemu_mutex_unlock_iothread(); } @@ -3373,24 +3364,23 @@ static int ram_save_setup(QEMUFile *f, void *opaque) } (*rsp)->f = f; - rcu_read_lock(); - - qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE); + WITH_RCU_READ_LOCK_GUARD() { + qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE); - RAMBLOCK_FOREACH_MIGRATABLE(block) { - qemu_put_byte(f, strlen(block->idstr)); - qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); - qemu_put_be64(f, block->used_length); - if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { - qemu_put_be64(f, block->page_size); - } - if (migrate_ignore_shared()) { - qemu_put_be64(f, block->mr->addr); + RAMBLOCK_FOREACH_MIGRATABLE(block) { + qemu_put_byte(f, strlen(block->idstr)); + qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); + qemu_put_be64(f, block->used_length); + if (migrate_postcopy_ram() && block->page_size != + qemu_host_page_size) { + qemu_put_be64(f, block->page_size); + } + if (migrate_ignore_shared()) { + qemu_put_be64(f, block->mr->addr); + } } } - rcu_read_unlock(); - ram_control_before_iterate(f, RAM_CONTROL_SETUP); ram_control_after_iterate(f, RAM_CONTROL_SETUP); @@ -3425,55 +3415,57 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) goto out; } - rcu_read_lock(); - if (ram_list.version != rs->last_version) { - ram_state_reset(rs); - } - - /* Read version before ram_list.blocks */ - smp_rmb(); + WITH_RCU_READ_LOCK_GUARD() { + if (ram_list.version != rs->last_version) { + ram_state_reset(rs); + } - ram_control_before_iterate(f, RAM_CONTROL_ROUND); + /* Read version before ram_list.blocks */ + smp_rmb(); - t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - i = 0; - while ((ret = qemu_file_rate_limit(f)) == 0 || - !QSIMPLEQ_EMPTY(&rs->src_page_requests)) { - int pages; + ram_control_before_iterate(f, RAM_CONTROL_ROUND); - if (qemu_file_get_error(f)) { - break; - } + t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + i = 0; + while ((ret = qemu_file_rate_limit(f)) == 0 || + !QSIMPLEQ_EMPTY(&rs->src_page_requests)) { + int pages; - pages = ram_find_and_save_block(rs, false); - /* no more pages to sent */ - if (pages == 0) { - done = 1; - break; - } + if (qemu_file_get_error(f)) { + break; + } - if (pages < 0) { - qemu_file_set_error(f, pages); - break; - } + pages = ram_find_and_save_block(rs, false); + /* no more pages to sent */ + if (pages == 0) { + done = 1; + break; + } - rs->target_page_count += pages; - - /* we want to check in the 1st loop, just in case it was the 1st time - and we had to sync the dirty bitmap. - qemu_clock_get_ns() is a bit expensive, so we only check each some - iterations - */ - if ((i & 63) == 0) { - uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; - if (t1 > MAX_WAIT) { - trace_ram_save_iterate_big_wait(t1, i); + if (pages < 0) { + qemu_file_set_error(f, pages); break; } + + rs->target_page_count += pages; + + /* + * we want to check in the 1st loop, just in case it was the 1st + * time and we had to sync the dirty bitmap. + * qemu_clock_get_ns() is a bit expensive, so we only check each + * some iterations + */ + if ((i & 63) == 0) { + uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / + 1000000; + if (t1 > MAX_WAIT) { + trace_ram_save_iterate_big_wait(t1, i); + break; + } + } + i++; } - i++; } - rcu_read_unlock(); /* * Must occur before EOS (or any QEMUFile operation) @@ -3511,35 +3503,33 @@ static int ram_save_complete(QEMUFile *f, void *opaque) RAMState *rs = *temp; int ret = 0; - rcu_read_lock(); - - if (!migration_in_postcopy()) { - migration_bitmap_sync_precopy(rs); - } + WITH_RCU_READ_LOCK_GUARD() { + if (!migration_in_postcopy()) { + migration_bitmap_sync_precopy(rs); + } - ram_control_before_iterate(f, RAM_CONTROL_FINISH); + ram_control_before_iterate(f, RAM_CONTROL_FINISH); - /* try transferring iterative blocks of memory */ + /* try transferring iterative blocks of memory */ - /* flush all remaining blocks regardless of rate limiting */ - while (true) { - int pages; + /* flush all remaining blocks regardless of rate limiting */ + while (true) { + int pages; - pages = ram_find_and_save_block(rs, !migration_in_colo_state()); - /* no more blocks to sent */ - if (pages == 0) { - break; - } - if (pages < 0) { - ret = pages; - break; + pages = ram_find_and_save_block(rs, !migration_in_colo_state()); + /* no more blocks to sent */ + if (pages == 0) { + break; + } + if (pages < 0) { + ret = pages; + break; + } } - } - flush_compressed_data(rs); - ram_control_after_iterate(f, RAM_CONTROL_FINISH); - - rcu_read_unlock(); + flush_compressed_data(rs); + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + } multifd_send_sync_main(rs); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); @@ -3562,9 +3552,9 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, if (!migration_in_postcopy() && remaining_size < max_size) { qemu_mutex_lock_iothread(); - rcu_read_lock(); - migration_bitmap_sync_precopy(rs); - rcu_read_unlock(); + WITH_RCU_READ_LOCK_GUARD() { + migration_bitmap_sync_precopy(rs); + } qemu_mutex_unlock_iothread(); remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; } @@ -3908,7 +3898,13 @@ int colo_init_ram_cache(void) error_report("%s: Can't alloc memory for COLO cache of block %s," "size 0x" RAM_ADDR_FMT, __func__, block->idstr, block->used_length); - goto out_locked; + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + if (block->colo_cache) { + qemu_anon_ram_free(block->colo_cache, block->used_length); + block->colo_cache = NULL; + } + } + return -errno; } memcpy(block->colo_cache, block->host, block->used_length); } @@ -3934,18 +3930,6 @@ int colo_init_ram_cache(void) memory_global_dirty_log_start(); return 0; - -out_locked: - - RAMBLOCK_FOREACH_NOT_IGNORED(block) { - if (block->colo_cache) { - qemu_anon_ram_free(block->colo_cache, block->used_length); - block->colo_cache = NULL; - } - } - - rcu_read_unlock(); - return -errno; } /* It is need to hold the global lock to call this helper */ @@ -3959,16 +3943,14 @@ void colo_release_ram_cache(void) block->bmap = NULL; } - rcu_read_lock(); - - RAMBLOCK_FOREACH_NOT_IGNORED(block) { - if (block->colo_cache) { - qemu_anon_ram_free(block->colo_cache, block->used_length); - block->colo_cache = NULL; + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + if (block->colo_cache) { + qemu_anon_ram_free(block->colo_cache, block->used_length); + block->colo_cache = NULL; + } } } - - rcu_read_unlock(); qemu_mutex_destroy(&ram_state->bitmap_mutex); g_free(ram_state); ram_state = NULL; @@ -4048,7 +4030,7 @@ static int ram_load_postcopy(QEMUFile *f) bool matches_target_page_size = false; MigrationIncomingState *mis = migration_incoming_get_current(); /* Temporary page that is later 'placed' */ - void *postcopy_host_page = postcopy_get_tmp_page(mis); + void *postcopy_host_page = mis->postcopy_tmp_page; void *last_host = NULL; bool all_zero = false; @@ -4206,31 +4188,30 @@ static void colo_flush_ram_cache(void) unsigned long offset = 0; memory_global_dirty_log_sync(); - rcu_read_lock(); - RAMBLOCK_FOREACH_NOT_IGNORED(block) { - ramblock_sync_dirty_bitmap(ram_state, block); + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + ramblock_sync_dirty_bitmap(ram_state, block); + } } - rcu_read_unlock(); trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages); - rcu_read_lock(); - block = QLIST_FIRST_RCU(&ram_list.blocks); + WITH_RCU_READ_LOCK_GUARD() { + block = QLIST_FIRST_RCU(&ram_list.blocks); - while (block) { - offset = migration_bitmap_find_dirty(ram_state, block, offset); + while (block) { + offset = migration_bitmap_find_dirty(ram_state, block, offset); - if (offset << TARGET_PAGE_BITS >= block->used_length) { - offset = 0; - block = QLIST_NEXT_RCU(block, next); - } else { - migration_bitmap_clear_dirty(ram_state, block, offset); - dst_host = block->host + (offset << TARGET_PAGE_BITS); - src_host = block->colo_cache + (offset << TARGET_PAGE_BITS); - memcpy(dst_host, src_host, TARGET_PAGE_SIZE); + if (offset << TARGET_PAGE_BITS >= block->used_length) { + offset = 0; + block = QLIST_NEXT_RCU(block, next); + } else { + migration_bitmap_clear_dirty(ram_state, block, offset); + dst_host = block->host + (offset << TARGET_PAGE_BITS); + src_host = block->colo_cache + (offset << TARGET_PAGE_BITS); + memcpy(dst_host, src_host, TARGET_PAGE_SIZE); + } } } - - rcu_read_unlock(); trace_colo_flush_ram_cache_end(); } @@ -4429,16 +4410,15 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) * it will be necessary to reduce the granularity of this * critical section. */ - rcu_read_lock(); + WITH_RCU_READ_LOCK_GUARD() { + if (postcopy_running) { + ret = ram_load_postcopy(f); + } else { + ret = ram_load_precopy(f); + } - if (postcopy_running) { - ret = ram_load_postcopy(f); - } else { - ret = ram_load_precopy(f); + ret |= wait_for_decompress_done(); } - - ret |= wait_for_decompress_done(); - rcu_read_unlock(); trace_ram_load_complete(ret, seq_iter); if (!ret && migration_incoming_in_colo_state()) { diff --git a/migration/rdma.c b/migration/rdma.c index 4c74e88a37..e241dcb992 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -88,7 +88,6 @@ static uint32_t known_capabilities = RDMA_CAPABILITY_PIN_ALL; " to abort!"); \ rdma->error_reported = 1; \ } \ - rcu_read_unlock(); \ return rdma->error_state; \ } \ } while (0) @@ -2678,11 +2677,10 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, size_t i; size_t len = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmaout); if (!rdma) { - rcu_read_unlock(); return -EIO; } @@ -2695,7 +2693,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, ret = qemu_rdma_write_flush(f, rdma); if (ret < 0) { rdma->error_state = ret; - rcu_read_unlock(); return ret; } @@ -2715,7 +2712,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, if (ret < 0) { rdma->error_state = ret; - rcu_read_unlock(); return ret; } @@ -2724,7 +2720,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, } } - rcu_read_unlock(); return done; } @@ -2764,11 +2759,10 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, ssize_t i; size_t done = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmain); if (!rdma) { - rcu_read_unlock(); return -EIO; } @@ -2805,7 +2799,6 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, if (ret < 0) { rdma->error_state = ret; - rcu_read_unlock(); return ret; } @@ -2819,14 +2812,12 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, /* Still didn't get enough, so lets just return */ if (want) { if (done == 0) { - rcu_read_unlock(); return QIO_CHANNEL_ERR_BLOCK; } else { break; } } } - rcu_read_unlock(); return done; } @@ -2882,7 +2873,7 @@ qio_channel_rdma_source_prepare(GSource *source, GIOCondition cond = 0; *timeout = -1; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); if (rsource->condition == G_IO_IN) { rdma = atomic_rcu_read(&rsource->rioc->rdmain); } else { @@ -2891,7 +2882,6 @@ qio_channel_rdma_source_prepare(GSource *source, if (!rdma) { error_report("RDMAContext is NULL when prepare Gsource"); - rcu_read_unlock(); return FALSE; } @@ -2900,7 +2890,6 @@ qio_channel_rdma_source_prepare(GSource *source, } cond |= G_IO_OUT; - rcu_read_unlock(); return cond & rsource->condition; } @@ -2911,7 +2900,7 @@ qio_channel_rdma_source_check(GSource *source) RDMAContext *rdma; GIOCondition cond = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); if (rsource->condition == G_IO_IN) { rdma = atomic_rcu_read(&rsource->rioc->rdmain); } else { @@ -2920,7 +2909,6 @@ qio_channel_rdma_source_check(GSource *source) if (!rdma) { error_report("RDMAContext is NULL when check Gsource"); - rcu_read_unlock(); return FALSE; } @@ -2929,7 +2917,6 @@ qio_channel_rdma_source_check(GSource *source) } cond |= G_IO_OUT; - rcu_read_unlock(); return cond & rsource->condition; } @@ -2943,7 +2930,7 @@ qio_channel_rdma_source_dispatch(GSource *source, RDMAContext *rdma; GIOCondition cond = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); if (rsource->condition == G_IO_IN) { rdma = atomic_rcu_read(&rsource->rioc->rdmain); } else { @@ -2952,7 +2939,6 @@ qio_channel_rdma_source_dispatch(GSource *source, if (!rdma) { error_report("RDMAContext is NULL when dispatch Gsource"); - rcu_read_unlock(); return FALSE; } @@ -2961,7 +2947,6 @@ qio_channel_rdma_source_dispatch(GSource *source, } cond |= G_IO_OUT; - rcu_read_unlock(); return (*func)(QIO_CHANNEL(rsource->rioc), (cond & rsource->condition), user_data); @@ -3073,7 +3058,7 @@ qio_channel_rdma_shutdown(QIOChannel *ioc, QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); RDMAContext *rdmain, *rdmaout; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdmain = atomic_rcu_read(&rioc->rdmain); rdmaout = atomic_rcu_read(&rioc->rdmain); @@ -3100,7 +3085,6 @@ qio_channel_rdma_shutdown(QIOChannel *ioc, break; } - rcu_read_unlock(); return 0; } @@ -3146,18 +3130,16 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, RDMAContext *rdma; int ret; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmaout); if (!rdma) { - rcu_read_unlock(); return -EIO; } CHECK_ERROR_STATE(); if (migration_in_postcopy()) { - rcu_read_unlock(); return RAM_SAVE_CONTROL_NOT_SUPP; } @@ -3242,11 +3224,9 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, } } - rcu_read_unlock(); return RAM_SAVE_CONTROL_DELAYED; err: rdma->error_state = ret; - rcu_read_unlock(); return ret; } @@ -3470,11 +3450,10 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque) int count = 0; int i = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmain); if (!rdma) { - rcu_read_unlock(); return -EIO; } @@ -3717,7 +3696,6 @@ out: if (ret < 0) { rdma->error_state = ret; } - rcu_read_unlock(); return ret; } @@ -3735,11 +3713,10 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name) int curr; int found = -1; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmain); if (!rdma) { - rcu_read_unlock(); return -EIO; } @@ -3753,7 +3730,6 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name) if (found == -1) { error_report("RAMBlock '%s' not found on destination", name); - rcu_read_unlock(); return -ENOENT; } @@ -3761,7 +3737,6 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name) trace_rdma_block_notification_handle(name, rdma->next_src_index); rdma->next_src_index++; - rcu_read_unlock(); return 0; } @@ -3786,17 +3761,15 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque); RDMAContext *rdma; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmaout); if (!rdma) { - rcu_read_unlock(); return -EIO; } CHECK_ERROR_STATE(); if (migration_in_postcopy()) { - rcu_read_unlock(); return 0; } @@ -3804,7 +3777,6 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, qemu_put_be64(f, RAM_SAVE_FLAG_HOOK); qemu_fflush(f); - rcu_read_unlock(); return 0; } @@ -3821,17 +3793,15 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, RDMAControlHeader head = { .len = 0, .repeat = 1 }; int ret = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmaout); if (!rdma) { - rcu_read_unlock(); return -EIO; } CHECK_ERROR_STATE(); if (migration_in_postcopy()) { - rcu_read_unlock(); return 0; } @@ -3863,7 +3833,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, qemu_rdma_reg_whole_ram_blocks : NULL); if (ret < 0) { ERROR(errp, "receiving remote info!"); - rcu_read_unlock(); return ret; } @@ -3887,7 +3856,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, "not identical on both the source and destination.", local->nb_blocks, nb_dest_blocks); rdma->error_state = -EINVAL; - rcu_read_unlock(); return -EINVAL; } @@ -3904,7 +3872,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, local->block[i].length, rdma->dest_blocks[i].length); rdma->error_state = -EINVAL; - rcu_read_unlock(); return -EINVAL; } local->block[i].remote_host_addr = @@ -3922,11 +3889,9 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, goto err; } - rcu_read_unlock(); return 0; err: rdma->error_state = ret; - rcu_read_unlock(); return ret; } diff --git a/migration/savevm.c b/migration/savevm.c index bb9462a54d..8d95e261f6 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1215,6 +1215,8 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) save_section_footer(f, se); if (ret < 0) { + error_report("failed to save SaveStateEntry with id(name): %d(%s)", + se->section_id, se->idstr); qemu_file_set_error(f, ret); } if (ret <= 0) { @@ -1835,6 +1837,8 @@ static void *postcopy_ram_listen_thread(void *opaque) rcu_unregister_thread(); mis->have_listen_thread = false; + postcopy_state_set(POSTCOPY_INCOMING_END); + return NULL; } @@ -1865,7 +1869,7 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) * shouldn't be doing anything yet so don't actually expect requests */ if (migrate_postcopy_ram()) { - if (postcopy_ram_enable_notify(mis)) { + if (postcopy_ram_incoming_setup(mis)) { postcopy_ram_incoming_cleanup(mis); return -1; } @@ -1876,11 +1880,6 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) return -1; } - if (mis->have_listen_thread) { - error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread"); - return -1; - } - mis->have_listen_thread = true; /* Start up the listening thread and wait for it to signal ready */ qemu_sem_init(&mis->listen_thread_sem, 0); @@ -1934,7 +1933,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) /* After all discards we can start running and asking for pages */ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) { - PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING); + PostcopyState ps = postcopy_state_get(); trace_loadvm_postcopy_handle_run(); if (ps != POSTCOPY_INCOMING_LISTENING) { @@ -1942,6 +1941,7 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) return -1; } + postcopy_state_set(POSTCOPY_INCOMING_RUNNING); mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis); qemu_bh_schedule(mis->bh); diff --git a/migration/trace-events b/migration/trace-events index 858d415d56..6dee7b5389 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -71,6 +71,11 @@ get_qtailq_end(const char *name, const char *reason, int val) "%s %s/%d" put_qtailq(const char *name, int version_id) "%s v%d" put_qtailq_end(const char *name, const char *reason) "%s %s" +get_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d" +get_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d" +put_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d" +put_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d" + # qemu-file.c qemu_file_fclose(void) "" diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c index bee658a1b2..7236cf92bc 100644 --- a/migration/vmstate-types.c +++ b/migration/vmstate-types.c @@ -691,3 +691,155 @@ const VMStateInfo vmstate_info_qtailq = { .get = get_qtailq, .put = put_qtailq, }; + +struct put_gtree_data { + QEMUFile *f; + const VMStateDescription *key_vmsd; + const VMStateDescription *val_vmsd; + QJSON *vmdesc; + int ret; +}; + +static gboolean put_gtree_elem(gpointer key, gpointer value, gpointer data) +{ + struct put_gtree_data *capsule = (struct put_gtree_data *)data; + QEMUFile *f = capsule->f; + int ret; + + qemu_put_byte(f, true); + + /* put the key */ + if (!capsule->key_vmsd) { + qemu_put_be64(f, (uint64_t)(uintptr_t)(key)); /* direct key */ + } else { + ret = vmstate_save_state(f, capsule->key_vmsd, key, capsule->vmdesc); + if (ret) { + capsule->ret = ret; + return true; + } + } + + /* put the data */ + ret = vmstate_save_state(f, capsule->val_vmsd, value, capsule->vmdesc); + if (ret) { + capsule->ret = ret; + return true; + } + return false; +} + +static int put_gtree(QEMUFile *f, void *pv, size_t unused_size, + const VMStateField *field, QJSON *vmdesc) +{ + bool direct_key = (!field->start); + const VMStateDescription *key_vmsd = direct_key ? NULL : &field->vmsd[1]; + const VMStateDescription *val_vmsd = &field->vmsd[0]; + const char *key_vmsd_name = direct_key ? "direct" : key_vmsd->name; + struct put_gtree_data capsule = { + .f = f, + .key_vmsd = key_vmsd, + .val_vmsd = val_vmsd, + .vmdesc = vmdesc, + .ret = 0}; + GTree **pval = pv; + GTree *tree = *pval; + uint32_t nnodes = g_tree_nnodes(tree); + int ret; + + trace_put_gtree(field->name, key_vmsd_name, val_vmsd->name, nnodes); + qemu_put_be32(f, nnodes); + g_tree_foreach(tree, put_gtree_elem, (gpointer)&capsule); + qemu_put_byte(f, false); + ret = capsule.ret; + if (ret) { + error_report("%s : failed to save gtree (%d)", field->name, ret); + } + trace_put_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret); + return ret; +} + +static int get_gtree(QEMUFile *f, void *pv, size_t unused_size, + const VMStateField *field) +{ + bool direct_key = (!field->start); + const VMStateDescription *key_vmsd = direct_key ? NULL : &field->vmsd[1]; + const VMStateDescription *val_vmsd = &field->vmsd[0]; + const char *key_vmsd_name = direct_key ? "direct" : key_vmsd->name; + int version_id = field->version_id; + size_t key_size = field->start; + size_t val_size = field->size; + int nnodes, count = 0; + GTree **pval = pv; + GTree *tree = *pval; + void *key, *val; + int ret = 0; + + /* in case of direct key, the key vmsd can be {}, ie. check fields */ + if (!direct_key && version_id > key_vmsd->version_id) { + error_report("%s %s", key_vmsd->name, "too new"); + return -EINVAL; + } + if (!direct_key && version_id < key_vmsd->minimum_version_id) { + error_report("%s %s", key_vmsd->name, "too old"); + return -EINVAL; + } + if (version_id > val_vmsd->version_id) { + error_report("%s %s", val_vmsd->name, "too new"); + return -EINVAL; + } + if (version_id < val_vmsd->minimum_version_id) { + error_report("%s %s", val_vmsd->name, "too old"); + return -EINVAL; + } + + nnodes = qemu_get_be32(f); + trace_get_gtree(field->name, key_vmsd_name, val_vmsd->name, nnodes); + + while (qemu_get_byte(f)) { + if ((++count) > nnodes) { + ret = -EINVAL; + break; + } + if (direct_key) { + key = (void *)(uintptr_t)qemu_get_be64(f); + } else { + key = g_malloc0(key_size); + ret = vmstate_load_state(f, key_vmsd, key, version_id); + if (ret) { + error_report("%s : failed to load %s (%d)", + field->name, key_vmsd->name, ret); + goto key_error; + } + } + val = g_malloc0(val_size); + ret = vmstate_load_state(f, val_vmsd, val, version_id); + if (ret) { + error_report("%s : failed to load %s (%d)", + field->name, val_vmsd->name, ret); + goto val_error; + } + g_tree_insert(tree, key, val); + } + if (count != nnodes) { + error_report("%s inconsistent stream when loading the gtree", + field->name); + return -EINVAL; + } + trace_get_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret); + return ret; +val_error: + g_free(val); +key_error: + if (!direct_key) { + g_free(key); + } + trace_get_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret); + return ret; +} + + +const VMStateInfo vmstate_info_gtree = { + .name = "gtree", + .get = get_gtree, + .put = put_gtree, +}; diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c index e80c4c6143..1e5be1d4ff 100644 --- a/tests/test-vmstate.c +++ b/tests/test-vmstate.c @@ -812,6 +812,423 @@ static void test_load_q(void) qemu_fclose(fload); } +/* interval (key) */ +typedef struct TestGTreeInterval { + uint64_t low; + uint64_t high; +} TestGTreeInterval; + +#define VMSTATE_INTERVAL \ +{ \ + .name = "interval", \ + .version_id = 1, \ + .minimum_version_id = 1, \ + .fields = (VMStateField[]) { \ + VMSTATE_UINT64(low, TestGTreeInterval), \ + VMSTATE_UINT64(high, TestGTreeInterval), \ + VMSTATE_END_OF_LIST() \ + } \ +} + +/* mapping (value) */ +typedef struct TestGTreeMapping { + uint64_t phys_addr; + uint32_t flags; +} TestGTreeMapping; + +#define VMSTATE_MAPPING \ +{ \ + .name = "mapping", \ + .version_id = 1, \ + .minimum_version_id = 1, \ + .fields = (VMStateField[]) { \ + VMSTATE_UINT64(phys_addr, TestGTreeMapping), \ + VMSTATE_UINT32(flags, TestGTreeMapping), \ + VMSTATE_END_OF_LIST() \ + }, \ +} + +static const VMStateDescription vmstate_interval_mapping[2] = { + VMSTATE_MAPPING, /* value */ + VMSTATE_INTERVAL /* key */ +}; + +typedef struct TestGTreeDomain { + int32_t id; + GTree *mappings; +} TestGTreeDomain; + +typedef struct TestGTreeIOMMU { + int32_t id; + GTree *domains; +} TestGTreeIOMMU; + +/* Interval comparison function */ +static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data) +{ + TestGTreeInterval *inta = (TestGTreeInterval *)a; + TestGTreeInterval *intb = (TestGTreeInterval *)b; + + if (inta->high < intb->low) { + return -1; + } else if (intb->high < inta->low) { + return 1; + } else { + return 0; + } +} + +/* ID comparison function */ +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) +{ + uint ua = GPOINTER_TO_UINT(a); + uint ub = GPOINTER_TO_UINT(b); + return (ua > ub) - (ua < ub); +} + +static void destroy_domain(gpointer data) +{ + TestGTreeDomain *domain = (TestGTreeDomain *)data; + + g_tree_destroy(domain->mappings); + g_free(domain); +} + +static int domain_preload(void *opaque) +{ + TestGTreeDomain *domain = opaque; + + domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, + NULL, g_free, g_free); + return 0; +} + +static int iommu_preload(void *opaque) +{ + TestGTreeIOMMU *iommu = opaque; + + iommu->domains = g_tree_new_full((GCompareDataFunc)int_cmp, + NULL, NULL, destroy_domain); + return 0; +} + +static const VMStateDescription vmstate_domain = { + .name = "domain", + .version_id = 1, + .minimum_version_id = 1, + .pre_load = domain_preload, + .fields = (VMStateField[]) { + VMSTATE_INT32(id, TestGTreeDomain), + VMSTATE_GTREE_V(mappings, TestGTreeDomain, 1, + vmstate_interval_mapping, + TestGTreeInterval, TestGTreeMapping), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_iommu = { + .name = "iommu", + .version_id = 1, + .minimum_version_id = 1, + .pre_load = iommu_preload, + .fields = (VMStateField[]) { + VMSTATE_INT32(id, TestGTreeIOMMU), + VMSTATE_GTREE_DIRECT_KEY_V(domains, TestGTreeIOMMU, 1, + &vmstate_domain, TestGTreeDomain), + VMSTATE_END_OF_LIST() + } +}; + +uint8_t first_domain_dump[] = { + /* id */ + 0x00, 0x0, 0x0, 0x6, + 0x00, 0x0, 0x0, 0x2, /* 2 mappings */ + 0x1, /* start of a */ + /* a */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F, 0xFF, + /* map_a */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x00, + 0x00, 0x00, 0x00, 0x01, + 0x1, /* start of b */ + /* b */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4F, 0xFF, + /* map_b */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, + 0x0, /* end of gtree */ + QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */ +}; + +static TestGTreeDomain *create_first_domain(void) +{ + TestGTreeDomain *domain; + TestGTreeMapping *map_a, *map_b; + TestGTreeInterval *a, *b; + + domain = g_malloc0(sizeof(TestGTreeDomain)); + domain->id = 6; + + a = g_malloc0(sizeof(TestGTreeInterval)); + a->low = 0x1000; + a->high = 0x1FFF; + + b = g_malloc0(sizeof(TestGTreeInterval)); + b->low = 0x4000; + b->high = 0x4FFF; + + map_a = g_malloc0(sizeof(TestGTreeMapping)); + map_a->phys_addr = 0xa000; + map_a->flags = 1; + + map_b = g_malloc0(sizeof(TestGTreeMapping)); + map_b->phys_addr = 0xe0000; + map_b->flags = 2; + + domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, NULL, + (GDestroyNotify)g_free, + (GDestroyNotify)g_free); + g_tree_insert(domain->mappings, a, map_a); + g_tree_insert(domain->mappings, b, map_b); + return domain; +} + +static void test_gtree_save_domain(void) +{ + TestGTreeDomain *first_domain = create_first_domain(); + + save_vmstate(&vmstate_domain, first_domain); + compare_vmstate(first_domain_dump, sizeof(first_domain_dump)); + destroy_domain(first_domain); +} + +struct match_node_data { + GTree *tree; + gpointer key; + gpointer value; +}; + +struct tree_cmp_data { + GTree *tree1; + GTree *tree2; + GTraverseFunc match_node; +}; + +static gboolean match_interval_mapping_node(gpointer key, + gpointer value, gpointer data) +{ + TestGTreeMapping *map_a, *map_b; + TestGTreeInterval *a, *b; + struct match_node_data *d = (struct match_node_data *)data; + char *str = g_strdup_printf("dest"); + + g_free(str); + a = (TestGTreeInterval *)key; + b = (TestGTreeInterval *)d->key; + + map_a = (TestGTreeMapping *)value; + map_b = (TestGTreeMapping *)d->value; + + assert(a->low == b->low); + assert(a->high == b->high); + assert(map_a->phys_addr == map_b->phys_addr); + assert(map_a->flags == map_b->flags); + g_tree_remove(d->tree, key); + return true; +} + +static gboolean diff_tree(gpointer key, gpointer value, gpointer data) +{ + struct tree_cmp_data *tp = (struct tree_cmp_data *)data; + struct match_node_data d = {tp->tree2, key, value}; + + g_tree_foreach(tp->tree2, tp->match_node, &d); + g_tree_remove(tp->tree1, key); + return false; +} + +static void compare_trees(GTree *tree1, GTree *tree2, + GTraverseFunc function) +{ + struct tree_cmp_data tp = {tree1, tree2, function}; + + g_tree_foreach(tree1, diff_tree, &tp); + assert(g_tree_nnodes(tree1) == 0); + assert(g_tree_nnodes(tree2) == 0); +} + +static void diff_domain(TestGTreeDomain *d1, TestGTreeDomain *d2) +{ + assert(d1->id == d2->id); + compare_trees(d1->mappings, d2->mappings, match_interval_mapping_node); +} + +static gboolean match_domain_node(gpointer key, gpointer value, gpointer data) +{ + uint64_t id1, id2; + TestGTreeDomain *d1, *d2; + struct match_node_data *d = (struct match_node_data *)data; + + id1 = (uint64_t)(uintptr_t)key; + id2 = (uint64_t)(uintptr_t)d->key; + d1 = (TestGTreeDomain *)value; + d2 = (TestGTreeDomain *)d->value; + assert(id1 == id2); + diff_domain(d1, d2); + g_tree_remove(d->tree, key); + return true; +} + +static void diff_iommu(TestGTreeIOMMU *iommu1, TestGTreeIOMMU *iommu2) +{ + assert(iommu1->id == iommu2->id); + compare_trees(iommu1->domains, iommu2->domains, match_domain_node); +} + +static void test_gtree_load_domain(void) +{ + TestGTreeDomain *dest_domain = g_malloc0(sizeof(TestGTreeDomain)); + TestGTreeDomain *orig_domain = create_first_domain(); + QEMUFile *fload, *fsave; + char eof; + + fsave = open_test_file(true); + qemu_put_buffer(fsave, first_domain_dump, sizeof(first_domain_dump)); + g_assert(!qemu_file_get_error(fsave)); + qemu_fclose(fsave); + + fload = open_test_file(false); + + vmstate_load_state(fload, &vmstate_domain, dest_domain, 1); + eof = qemu_get_byte(fload); + g_assert(!qemu_file_get_error(fload)); + g_assert_cmpint(orig_domain->id, ==, dest_domain->id); + g_assert_cmpint(eof, ==, QEMU_VM_EOF); + + diff_domain(orig_domain, dest_domain); + destroy_domain(orig_domain); + destroy_domain(dest_domain); + qemu_fclose(fload); +} + +uint8_t iommu_dump[] = { + /* iommu id */ + 0x00, 0x0, 0x0, 0x7, + 0x00, 0x0, 0x0, 0x2, /* 2 domains */ + 0x1,/* start of domain 5 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0x0, 0x5, /* key = 5 */ + 0x00, 0x0, 0x0, 0x5, /* domain1 id */ + 0x00, 0x0, 0x0, 0x1, /* 1 mapping */ + 0x1, /* start of mappings */ + /* c */ + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0xFF, 0xFF, 0xFF, + /* map_c */ + 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x00, 0x0, 0x0, 0x3, + 0x0, /* end of domain1 mappings*/ + 0x1,/* start of domain 6 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0x0, 0x6, /* key = 6 */ + 0x00, 0x0, 0x0, 0x6, /* domain6 id */ + 0x00, 0x0, 0x0, 0x2, /* 2 mappings */ + 0x1, /* start of a */ + /* a */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F, 0xFF, + /* map_a */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x00, + 0x00, 0x00, 0x00, 0x01, + 0x1, /* start of b */ + /* b */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4F, 0xFF, + /* map_b */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, + 0x0, /* end of domain6 mappings*/ + 0x0, /* end of domains */ + QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */ +}; + +static TestGTreeIOMMU *create_iommu(void) +{ + TestGTreeIOMMU *iommu = g_malloc0(sizeof(TestGTreeIOMMU)); + TestGTreeDomain *first_domain = create_first_domain(); + TestGTreeDomain *second_domain; + TestGTreeMapping *map_c; + TestGTreeInterval *c; + + iommu->id = 7; + iommu->domains = g_tree_new_full((GCompareDataFunc)int_cmp, NULL, + NULL, + destroy_domain); + + second_domain = g_malloc0(sizeof(TestGTreeDomain)); + second_domain->id = 5; + second_domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, + NULL, + (GDestroyNotify)g_free, + (GDestroyNotify)g_free); + + g_tree_insert(iommu->domains, GUINT_TO_POINTER(6), first_domain); + g_tree_insert(iommu->domains, (gpointer)0x0000000000000005, second_domain); + + c = g_malloc0(sizeof(TestGTreeInterval)); + c->low = 0x1000000; + c->high = 0x1FFFFFF; + + map_c = g_malloc0(sizeof(TestGTreeMapping)); + map_c->phys_addr = 0xF000000; + map_c->flags = 0x3; + + g_tree_insert(second_domain->mappings, c, map_c); + return iommu; +} + +static void destroy_iommu(TestGTreeIOMMU *iommu) +{ + g_tree_destroy(iommu->domains); + g_free(iommu); +} + +static void test_gtree_save_iommu(void) +{ + TestGTreeIOMMU *iommu = create_iommu(); + + save_vmstate(&vmstate_iommu, iommu); + compare_vmstate(iommu_dump, sizeof(iommu_dump)); + destroy_iommu(iommu); +} + +static void test_gtree_load_iommu(void) +{ + TestGTreeIOMMU *dest_iommu = g_malloc0(sizeof(TestGTreeIOMMU)); + TestGTreeIOMMU *orig_iommu = create_iommu(); + QEMUFile *fsave, *fload; + char eof; + int ret; + + fsave = open_test_file(true); + qemu_put_buffer(fsave, iommu_dump, sizeof(iommu_dump)); + g_assert(!qemu_file_get_error(fsave)); + qemu_fclose(fsave); + + fload = open_test_file(false); + vmstate_load_state(fload, &vmstate_iommu, dest_iommu, 1); + ret = qemu_file_get_error(fload); + eof = qemu_get_byte(fload); + ret = qemu_file_get_error(fload); + g_assert(!ret); + g_assert_cmpint(orig_iommu->id, ==, dest_iommu->id); + g_assert_cmpint(eof, ==, QEMU_VM_EOF); + + diff_iommu(orig_iommu, dest_iommu); + destroy_iommu(orig_iommu); + destroy_iommu(dest_iommu); + qemu_fclose(fload); +} + typedef struct TmpTestStruct { TestStruct *parent; int64_t diff; @@ -932,6 +1349,10 @@ int main(int argc, char **argv) test_arr_ptr_prim_0_load); g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q); g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q); + g_test_add_func("/vmstate/gtree/save/savedomain", test_gtree_save_domain); + g_test_add_func("/vmstate/gtree/load/loaddomain", test_gtree_load_domain); + g_test_add_func("/vmstate/gtree/save/saveiommu", test_gtree_save_iommu); + g_test_add_func("/vmstate/gtree/load/loadiommu", test_gtree_load_iommu); g_test_add_func("/vmstate/tmp_struct", test_tmp_struct); g_test_run(); |