diff options
Diffstat (limited to 'migration')
-rw-r--r-- | migration/migration.c | 17 | ||||
-rw-r--r-- | migration/postcopy-ram.c | 88 | ||||
-rw-r--r-- | migration/postcopy-ram.h | 9 | ||||
-rw-r--r-- | migration/ram.c | 298 | ||||
-rw-r--r-- | migration/rdma.c | 57 | ||||
-rw-r--r-- | migration/savevm.c | 14 | ||||
-rw-r--r-- | migration/trace-events | 5 | ||||
-rw-r--r-- | migration/vmstate-types.c | 152 |
8 files changed, 359 insertions, 281 deletions
diff --git a/migration/migration.c b/migration/migration.c index 5f7e4d15e9..3febd0f8f3 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1533,8 +1533,7 @@ static void migrate_fd_cleanup(MigrationState *s) qemu_fclose(tmp); } - assert((s->state != MIGRATION_STATUS_ACTIVE) && - (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE)); + assert(!migration_is_active(s)); if (s->state == MIGRATION_STATUS_CANCELLING) { migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, @@ -1703,6 +1702,12 @@ bool migration_is_idle(void) return false; } +bool migration_is_active(MigrationState *s) +{ + return (s->state == MIGRATION_STATUS_ACTIVE || + s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); +} + void migrate_init(MigrationState *s) { /* @@ -2481,7 +2486,7 @@ retry: out: res = qemu_file_get_error(rp); if (res) { - if (res == -EIO) { + if (res == -EIO && migration_in_postcopy()) { /* * Maybe there is something we can do: it looks like a * network down issue, and we pause for a recovery. @@ -3144,8 +3149,7 @@ static MigIterateState migration_iteration_run(MigrationState *s) return MIG_ITERATE_SKIP; } /* Just another iteration step */ - qemu_savevm_state_iterate(s->to_dst_file, - s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); + qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); } else { trace_migration_thread_low_pending(pending_size); migration_completion(s); @@ -3266,8 +3270,7 @@ static void *migration_thread(void *opaque) trace_migration_thread_setup_complete(); - while (s->state == MIGRATION_STATUS_ACTIVE || - s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { + while (migration_is_active(s)) { int64_t current_time; if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 1f63e65ed7..abccafc8c8 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -577,8 +577,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) } } - postcopy_state_set(POSTCOPY_INCOMING_END); - if (mis->postcopy_tmp_page) { munmap(mis->postcopy_tmp_page, mis->largest_page_size); mis->postcopy_tmp_page = NULL; @@ -768,9 +766,11 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, atomic_xchg(&dc->page_fault_vcpu_time[cpu], low_time_offset); atomic_xchg(&dc->vcpu_addr[cpu], addr); - /* check it here, not at the begining of the function, - * due to, check could accur early than bitmap_set in - * qemu_ufd_copy_ioctl */ + /* + * check it here, not at the beginning of the function, + * due to, check could occur early than bitmap_set in + * qemu_ufd_copy_ioctl + */ already_received = ramblock_recv_bitmap_test(rb, (void *)addr); if (already_received) { atomic_xchg(&dc->vcpu_addr[cpu], 0); @@ -1094,7 +1094,7 @@ retry: return NULL; } -int postcopy_ram_enable_notify(MigrationIncomingState *mis) +int postcopy_ram_incoming_setup(MigrationIncomingState *mis) { /* Open the fd for the kernel to give us userfaults */ mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); @@ -1134,6 +1134,32 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return -1; } + mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (mis->postcopy_tmp_page == MAP_FAILED) { + mis->postcopy_tmp_page = NULL; + error_report("%s: Failed to map postcopy_tmp_page %s", + __func__, strerror(errno)); + return -1; + } + + /* + * Map large zero page when kernel can't use UFFDIO_ZEROPAGE for hugepages + */ + mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (mis->postcopy_tmp_zero_page == MAP_FAILED) { + int e = errno; + mis->postcopy_tmp_zero_page = NULL; + error_report("%s: Failed to map large zero page %s", + __func__, strerror(e)); + return -e; + } + memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); + /* * Ballooning can mark pages as absent while we're postcopying * that would cause false userfaults. @@ -1240,50 +1266,10 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, qemu_ram_block_host_offset(rb, host)); } else { - /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */ - if (!mis->postcopy_tmp_zero_page) { - mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0); - if (mis->postcopy_tmp_zero_page == MAP_FAILED) { - int e = errno; - mis->postcopy_tmp_zero_page = NULL; - error_report("%s: %s mapping large zero page", - __func__, strerror(e)); - return -e; - } - memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); - } - return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, - rb); + return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, rb); } } -/* - * Returns a target page of memory that can be mapped at a later point in time - * using postcopy_place_page - * The same address is used repeatedly, postcopy_place_page just takes the - * backing page away. - * Returns: Pointer to allocated page - * - */ -void *postcopy_get_tmp_page(MigrationIncomingState *mis) -{ - if (!mis->postcopy_tmp_page) { - mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, - PROT_READ | PROT_WRITE, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (mis->postcopy_tmp_page == MAP_FAILED) { - mis->postcopy_tmp_page = NULL; - error_report("%s: %s", __func__, strerror(errno)); - return NULL; - } - } - - return mis->postcopy_tmp_page; -} - #else /* No target OS support, stubs just fail */ void fill_destination_postcopy_migration_info(MigrationInfo *info) @@ -1321,7 +1307,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb, return -1; } -int postcopy_ram_enable_notify(MigrationIncomingState *mis) +int postcopy_ram_incoming_setup(MigrationIncomingState *mis) { assert(0); return -1; @@ -1341,12 +1327,6 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, return -1; } -void *postcopy_get_tmp_page(MigrationIncomingState *mis) -{ - assert(0); - return NULL; -} - int postcopy_wake_shared(struct PostCopyFD *pcfd, uint64_t client_addr, RAMBlock *rb) diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 9c8bd2bae0..9941feb63a 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -20,7 +20,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis); * Make all of RAM sensitive to accesses to areas that haven't yet been written * and wire up anything necessary to deal with it. */ -int postcopy_ram_enable_notify(MigrationIncomingState *mis); +int postcopy_ram_incoming_setup(MigrationIncomingState *mis); /* * Initialise postcopy-ram, setting the RAM to a state where we can go into @@ -100,13 +100,6 @@ typedef enum { POSTCOPY_INCOMING_END } PostcopyState; -/* - * Allocate a page of memory that can be mapped at a later point in time - * using postcopy_place_page - * Returns: Pointer to allocated page - */ -void *postcopy_get_tmp_page(MigrationIncomingState *mis); - PostcopyState postcopy_state_get(void); /* Set the state and return the old state */ PostcopyState postcopy_state_set(PostcopyState new_state); diff --git a/migration/ram.c b/migration/ram.c index 22423f08cd..5078f94490 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -181,14 +181,14 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque) RAMBlock *block; int ret = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); + RAMBLOCK_FOREACH_NOT_IGNORED(block) { ret = func(block, opaque); if (ret) { break; } } - rcu_read_unlock(); return ret; } @@ -791,13 +791,10 @@ static void multifd_pages_clear(MultiFDPages_t *pages) static void multifd_send_fill_packet(MultiFDSendParams *p) { MultiFDPacket_t *packet = p->packet; - uint32_t page_max = MULTIFD_PACKET_SIZE / qemu_target_page_size(); int i; - packet->magic = cpu_to_be32(MULTIFD_MAGIC); - packet->version = cpu_to_be32(MULTIFD_VERSION); packet->flags = cpu_to_be32(p->flags); - packet->pages_alloc = cpu_to_be32(page_max); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); packet->pages_used = cpu_to_be32(p->pages->used); packet->next_packet_size = cpu_to_be32(p->next_packet_size); packet->packet_num = cpu_to_be64(p->packet_num); @@ -838,7 +835,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) packet->pages_alloc = be32_to_cpu(packet->pages_alloc); /* - * If we recevied a packet that is 100 times bigger than expected + * If we received a packet that is 100 times bigger than expected * just stop migration. It is a magic number. */ if (packet->pages_alloc > pages_max * 100) { @@ -1132,7 +1129,6 @@ static void *multifd_send_thread(void *opaque) p->flags = 0; p->num_packets++; p->num_pages += used; - p->pages->used = 0; qemu_mutex_unlock(&p->mutex); trace_multifd_send(p->id, packet_num, used, flags, @@ -1241,6 +1237,8 @@ int multifd_save_setup(void) p->packet_len = sizeof(MultiFDPacket_t) + sizeof(ram_addr_t) * page_count; p->packet = g_malloc0(p->packet_len); + p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); + p->packet->version = cpu_to_be32(MULTIFD_VERSION); p->name = g_strdup_printf("multifdsend_%d", i); socket_send_channel_create(multifd_new_send_channel_async, p); } @@ -1848,12 +1846,12 @@ static void migration_bitmap_sync(RAMState *rs) memory_global_dirty_log_sync(); qemu_mutex_lock(&rs->bitmap_mutex); - rcu_read_lock(); - RAMBLOCK_FOREACH_NOT_IGNORED(block) { - ramblock_sync_dirty_bitmap(rs, block); + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + ramblock_sync_dirty_bitmap(rs, block); + } + ram_counters.remaining = ram_bytes_remaining(); } - ram_counters.remaining = ram_bytes_remaining(); - rcu_read_unlock(); qemu_mutex_unlock(&rs->bitmap_mutex); memory_global_after_dirty_log_sync(); @@ -2397,13 +2395,12 @@ static void migration_page_queue_free(RAMState *rs) /* This queue generally should be empty - but in the case of a failed * migration might have some droppings in. */ - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) { memory_region_unref(mspr->rb->mr); QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); g_free(mspr); } - rcu_read_unlock(); } /** @@ -2424,7 +2421,8 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) RAMState *rs = ram_state; ram_counters.postcopy_requests++; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); + if (!rbname) { /* Reuse last RAMBlock */ ramblock = rs->last_req_rb; @@ -2466,12 +2464,10 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req); migration_make_urgent_request(); qemu_mutex_unlock(&rs->src_page_req_mutex); - rcu_read_unlock(); return 0; err: - rcu_read_unlock(); return -1; } @@ -2700,7 +2696,8 @@ static uint64_t ram_bytes_total_common(bool count_ignored) RAMBlock *block; uint64_t total = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); + if (count_ignored) { RAMBLOCK_FOREACH_MIGRATABLE(block) { total += block->used_length; @@ -2710,7 +2707,6 @@ static uint64_t ram_bytes_total_common(bool count_ignored) total += block->used_length; } } - rcu_read_unlock(); return total; } @@ -3034,7 +3030,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) RAMBlock *block; int ret; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); /* This should be our last sync, the src is now paused */ migration_bitmap_sync(rs); @@ -3048,7 +3044,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) /* Deal with TPS != HPS and huge pages */ ret = postcopy_chunk_hostpages(ms, block); if (ret) { - rcu_read_unlock(); return ret; } @@ -3060,7 +3055,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) trace_ram_postcopy_send_discard_bitmap(); ret = postcopy_each_ram_send_discard(ms); - rcu_read_unlock(); return ret; } @@ -3081,7 +3075,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) trace_ram_discard_range(rbname, start, length); - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); RAMBlock *rb = qemu_ram_block_by_name(rbname); if (!rb) { @@ -3101,8 +3095,6 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) ret = ram_block_discard_range(rb, start, length); err: - rcu_read_unlock(); - return ret; } @@ -3231,13 +3223,12 @@ static void ram_init_bitmaps(RAMState *rs) /* For memory_global_dirty_log_start below. */ qemu_mutex_lock_iothread(); qemu_mutex_lock_ramlist(); - rcu_read_lock(); - - ram_list_init_bitmaps(); - memory_global_dirty_log_start(); - migration_bitmap_sync_precopy(rs); - rcu_read_unlock(); + WITH_RCU_READ_LOCK_GUARD() { + ram_list_init_bitmaps(); + memory_global_dirty_log_start(); + migration_bitmap_sync_precopy(rs); + } qemu_mutex_unlock_ramlist(); qemu_mutex_unlock_iothread(); } @@ -3373,24 +3364,23 @@ static int ram_save_setup(QEMUFile *f, void *opaque) } (*rsp)->f = f; - rcu_read_lock(); - - qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE); + WITH_RCU_READ_LOCK_GUARD() { + qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE); - RAMBLOCK_FOREACH_MIGRATABLE(block) { - qemu_put_byte(f, strlen(block->idstr)); - qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); - qemu_put_be64(f, block->used_length); - if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { - qemu_put_be64(f, block->page_size); - } - if (migrate_ignore_shared()) { - qemu_put_be64(f, block->mr->addr); + RAMBLOCK_FOREACH_MIGRATABLE(block) { + qemu_put_byte(f, strlen(block->idstr)); + qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); + qemu_put_be64(f, block->used_length); + if (migrate_postcopy_ram() && block->page_size != + qemu_host_page_size) { + qemu_put_be64(f, block->page_size); + } + if (migrate_ignore_shared()) { + qemu_put_be64(f, block->mr->addr); + } } } - rcu_read_unlock(); - ram_control_before_iterate(f, RAM_CONTROL_SETUP); ram_control_after_iterate(f, RAM_CONTROL_SETUP); @@ -3425,55 +3415,57 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) goto out; } - rcu_read_lock(); - if (ram_list.version != rs->last_version) { - ram_state_reset(rs); - } - - /* Read version before ram_list.blocks */ - smp_rmb(); + WITH_RCU_READ_LOCK_GUARD() { + if (ram_list.version != rs->last_version) { + ram_state_reset(rs); + } - ram_control_before_iterate(f, RAM_CONTROL_ROUND); + /* Read version before ram_list.blocks */ + smp_rmb(); - t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - i = 0; - while ((ret = qemu_file_rate_limit(f)) == 0 || - !QSIMPLEQ_EMPTY(&rs->src_page_requests)) { - int pages; + ram_control_before_iterate(f, RAM_CONTROL_ROUND); - if (qemu_file_get_error(f)) { - break; - } + t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + i = 0; + while ((ret = qemu_file_rate_limit(f)) == 0 || + !QSIMPLEQ_EMPTY(&rs->src_page_requests)) { + int pages; - pages = ram_find_and_save_block(rs, false); - /* no more pages to sent */ - if (pages == 0) { - done = 1; - break; - } + if (qemu_file_get_error(f)) { + break; + } - if (pages < 0) { - qemu_file_set_error(f, pages); - break; - } + pages = ram_find_and_save_block(rs, false); + /* no more pages to sent */ + if (pages == 0) { + done = 1; + break; + } - rs->target_page_count += pages; - - /* we want to check in the 1st loop, just in case it was the 1st time - and we had to sync the dirty bitmap. - qemu_clock_get_ns() is a bit expensive, so we only check each some - iterations - */ - if ((i & 63) == 0) { - uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; - if (t1 > MAX_WAIT) { - trace_ram_save_iterate_big_wait(t1, i); + if (pages < 0) { + qemu_file_set_error(f, pages); break; } + + rs->target_page_count += pages; + + /* + * we want to check in the 1st loop, just in case it was the 1st + * time and we had to sync the dirty bitmap. + * qemu_clock_get_ns() is a bit expensive, so we only check each + * some iterations + */ + if ((i & 63) == 0) { + uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / + 1000000; + if (t1 > MAX_WAIT) { + trace_ram_save_iterate_big_wait(t1, i); + break; + } + } + i++; } - i++; } - rcu_read_unlock(); /* * Must occur before EOS (or any QEMUFile operation) @@ -3511,35 +3503,33 @@ static int ram_save_complete(QEMUFile *f, void *opaque) RAMState *rs = *temp; int ret = 0; - rcu_read_lock(); - - if (!migration_in_postcopy()) { - migration_bitmap_sync_precopy(rs); - } + WITH_RCU_READ_LOCK_GUARD() { + if (!migration_in_postcopy()) { + migration_bitmap_sync_precopy(rs); + } - ram_control_before_iterate(f, RAM_CONTROL_FINISH); + ram_control_before_iterate(f, RAM_CONTROL_FINISH); - /* try transferring iterative blocks of memory */ + /* try transferring iterative blocks of memory */ - /* flush all remaining blocks regardless of rate limiting */ - while (true) { - int pages; + /* flush all remaining blocks regardless of rate limiting */ + while (true) { + int pages; - pages = ram_find_and_save_block(rs, !migration_in_colo_state()); - /* no more blocks to sent */ - if (pages == 0) { - break; - } - if (pages < 0) { - ret = pages; - break; + pages = ram_find_and_save_block(rs, !migration_in_colo_state()); + /* no more blocks to sent */ + if (pages == 0) { + break; + } + if (pages < 0) { + ret = pages; + break; + } } - } - flush_compressed_data(rs); - ram_control_after_iterate(f, RAM_CONTROL_FINISH); - - rcu_read_unlock(); + flush_compressed_data(rs); + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + } multifd_send_sync_main(rs); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); @@ -3562,9 +3552,9 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, if (!migration_in_postcopy() && remaining_size < max_size) { qemu_mutex_lock_iothread(); - rcu_read_lock(); - migration_bitmap_sync_precopy(rs); - rcu_read_unlock(); + WITH_RCU_READ_LOCK_GUARD() { + migration_bitmap_sync_precopy(rs); + } qemu_mutex_unlock_iothread(); remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; } @@ -3908,7 +3898,13 @@ int colo_init_ram_cache(void) error_report("%s: Can't alloc memory for COLO cache of block %s," "size 0x" RAM_ADDR_FMT, __func__, block->idstr, block->used_length); - goto out_locked; + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + if (block->colo_cache) { + qemu_anon_ram_free(block->colo_cache, block->used_length); + block->colo_cache = NULL; + } + } + return -errno; } memcpy(block->colo_cache, block->host, block->used_length); } @@ -3934,18 +3930,6 @@ int colo_init_ram_cache(void) memory_global_dirty_log_start(); return 0; - -out_locked: - - RAMBLOCK_FOREACH_NOT_IGNORED(block) { - if (block->colo_cache) { - qemu_anon_ram_free(block->colo_cache, block->used_length); - block->colo_cache = NULL; - } - } - - rcu_read_unlock(); - return -errno; } /* It is need to hold the global lock to call this helper */ @@ -3959,16 +3943,14 @@ void colo_release_ram_cache(void) block->bmap = NULL; } - rcu_read_lock(); - - RAMBLOCK_FOREACH_NOT_IGNORED(block) { - if (block->colo_cache) { - qemu_anon_ram_free(block->colo_cache, block->used_length); - block->colo_cache = NULL; + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + if (block->colo_cache) { + qemu_anon_ram_free(block->colo_cache, block->used_length); + block->colo_cache = NULL; + } } } - - rcu_read_unlock(); qemu_mutex_destroy(&ram_state->bitmap_mutex); g_free(ram_state); ram_state = NULL; @@ -4048,7 +4030,7 @@ static int ram_load_postcopy(QEMUFile *f) bool matches_target_page_size = false; MigrationIncomingState *mis = migration_incoming_get_current(); /* Temporary page that is later 'placed' */ - void *postcopy_host_page = postcopy_get_tmp_page(mis); + void *postcopy_host_page = mis->postcopy_tmp_page; void *last_host = NULL; bool all_zero = false; @@ -4206,31 +4188,30 @@ static void colo_flush_ram_cache(void) unsigned long offset = 0; memory_global_dirty_log_sync(); - rcu_read_lock(); - RAMBLOCK_FOREACH_NOT_IGNORED(block) { - ramblock_sync_dirty_bitmap(ram_state, block); + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + ramblock_sync_dirty_bitmap(ram_state, block); + } } - rcu_read_unlock(); trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages); - rcu_read_lock(); - block = QLIST_FIRST_RCU(&ram_list.blocks); + WITH_RCU_READ_LOCK_GUARD() { + block = QLIST_FIRST_RCU(&ram_list.blocks); - while (block) { - offset = migration_bitmap_find_dirty(ram_state, block, offset); + while (block) { + offset = migration_bitmap_find_dirty(ram_state, block, offset); - if (offset << TARGET_PAGE_BITS >= block->used_length) { - offset = 0; - block = QLIST_NEXT_RCU(block, next); - } else { - migration_bitmap_clear_dirty(ram_state, block, offset); - dst_host = block->host + (offset << TARGET_PAGE_BITS); - src_host = block->colo_cache + (offset << TARGET_PAGE_BITS); - memcpy(dst_host, src_host, TARGET_PAGE_SIZE); + if (offset << TARGET_PAGE_BITS >= block->used_length) { + offset = 0; + block = QLIST_NEXT_RCU(block, next); + } else { + migration_bitmap_clear_dirty(ram_state, block, offset); + dst_host = block->host + (offset << TARGET_PAGE_BITS); + src_host = block->colo_cache + (offset << TARGET_PAGE_BITS); + memcpy(dst_host, src_host, TARGET_PAGE_SIZE); + } } } - - rcu_read_unlock(); trace_colo_flush_ram_cache_end(); } @@ -4429,16 +4410,15 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) * it will be necessary to reduce the granularity of this * critical section. */ - rcu_read_lock(); + WITH_RCU_READ_LOCK_GUARD() { + if (postcopy_running) { + ret = ram_load_postcopy(f); + } else { + ret = ram_load_precopy(f); + } - if (postcopy_running) { - ret = ram_load_postcopy(f); - } else { - ret = ram_load_precopy(f); + ret |= wait_for_decompress_done(); } - - ret |= wait_for_decompress_done(); - rcu_read_unlock(); trace_ram_load_complete(ret, seq_iter); if (!ret && migration_incoming_in_colo_state()) { diff --git a/migration/rdma.c b/migration/rdma.c index 4c74e88a37..e241dcb992 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -88,7 +88,6 @@ static uint32_t known_capabilities = RDMA_CAPABILITY_PIN_ALL; " to abort!"); \ rdma->error_reported = 1; \ } \ - rcu_read_unlock(); \ return rdma->error_state; \ } \ } while (0) @@ -2678,11 +2677,10 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, size_t i; size_t len = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmaout); if (!rdma) { - rcu_read_unlock(); return -EIO; } @@ -2695,7 +2693,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, ret = qemu_rdma_write_flush(f, rdma); if (ret < 0) { rdma->error_state = ret; - rcu_read_unlock(); return ret; } @@ -2715,7 +2712,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, if (ret < 0) { rdma->error_state = ret; - rcu_read_unlock(); return ret; } @@ -2724,7 +2720,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, } } - rcu_read_unlock(); return done; } @@ -2764,11 +2759,10 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, ssize_t i; size_t done = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmain); if (!rdma) { - rcu_read_unlock(); return -EIO; } @@ -2805,7 +2799,6 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, if (ret < 0) { rdma->error_state = ret; - rcu_read_unlock(); return ret; } @@ -2819,14 +2812,12 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, /* Still didn't get enough, so lets just return */ if (want) { if (done == 0) { - rcu_read_unlock(); return QIO_CHANNEL_ERR_BLOCK; } else { break; } } } - rcu_read_unlock(); return done; } @@ -2882,7 +2873,7 @@ qio_channel_rdma_source_prepare(GSource *source, GIOCondition cond = 0; *timeout = -1; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); if (rsource->condition == G_IO_IN) { rdma = atomic_rcu_read(&rsource->rioc->rdmain); } else { @@ -2891,7 +2882,6 @@ qio_channel_rdma_source_prepare(GSource *source, if (!rdma) { error_report("RDMAContext is NULL when prepare Gsource"); - rcu_read_unlock(); return FALSE; } @@ -2900,7 +2890,6 @@ qio_channel_rdma_source_prepare(GSource *source, } cond |= G_IO_OUT; - rcu_read_unlock(); return cond & rsource->condition; } @@ -2911,7 +2900,7 @@ qio_channel_rdma_source_check(GSource *source) RDMAContext *rdma; GIOCondition cond = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); if (rsource->condition == G_IO_IN) { rdma = atomic_rcu_read(&rsource->rioc->rdmain); } else { @@ -2920,7 +2909,6 @@ qio_channel_rdma_source_check(GSource *source) if (!rdma) { error_report("RDMAContext is NULL when check Gsource"); - rcu_read_unlock(); return FALSE; } @@ -2929,7 +2917,6 @@ qio_channel_rdma_source_check(GSource *source) } cond |= G_IO_OUT; - rcu_read_unlock(); return cond & rsource->condition; } @@ -2943,7 +2930,7 @@ qio_channel_rdma_source_dispatch(GSource *source, RDMAContext *rdma; GIOCondition cond = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); if (rsource->condition == G_IO_IN) { rdma = atomic_rcu_read(&rsource->rioc->rdmain); } else { @@ -2952,7 +2939,6 @@ qio_channel_rdma_source_dispatch(GSource *source, if (!rdma) { error_report("RDMAContext is NULL when dispatch Gsource"); - rcu_read_unlock(); return FALSE; } @@ -2961,7 +2947,6 @@ qio_channel_rdma_source_dispatch(GSource *source, } cond |= G_IO_OUT; - rcu_read_unlock(); return (*func)(QIO_CHANNEL(rsource->rioc), (cond & rsource->condition), user_data); @@ -3073,7 +3058,7 @@ qio_channel_rdma_shutdown(QIOChannel *ioc, QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); RDMAContext *rdmain, *rdmaout; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdmain = atomic_rcu_read(&rioc->rdmain); rdmaout = atomic_rcu_read(&rioc->rdmain); @@ -3100,7 +3085,6 @@ qio_channel_rdma_shutdown(QIOChannel *ioc, break; } - rcu_read_unlock(); return 0; } @@ -3146,18 +3130,16 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, RDMAContext *rdma; int ret; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmaout); if (!rdma) { - rcu_read_unlock(); return -EIO; } CHECK_ERROR_STATE(); if (migration_in_postcopy()) { - rcu_read_unlock(); return RAM_SAVE_CONTROL_NOT_SUPP; } @@ -3242,11 +3224,9 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, } } - rcu_read_unlock(); return RAM_SAVE_CONTROL_DELAYED; err: rdma->error_state = ret; - rcu_read_unlock(); return ret; } @@ -3470,11 +3450,10 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque) int count = 0; int i = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmain); if (!rdma) { - rcu_read_unlock(); return -EIO; } @@ -3717,7 +3696,6 @@ out: if (ret < 0) { rdma->error_state = ret; } - rcu_read_unlock(); return ret; } @@ -3735,11 +3713,10 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name) int curr; int found = -1; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmain); if (!rdma) { - rcu_read_unlock(); return -EIO; } @@ -3753,7 +3730,6 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name) if (found == -1) { error_report("RAMBlock '%s' not found on destination", name); - rcu_read_unlock(); return -ENOENT; } @@ -3761,7 +3737,6 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name) trace_rdma_block_notification_handle(name, rdma->next_src_index); rdma->next_src_index++; - rcu_read_unlock(); return 0; } @@ -3786,17 +3761,15 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque); RDMAContext *rdma; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmaout); if (!rdma) { - rcu_read_unlock(); return -EIO; } CHECK_ERROR_STATE(); if (migration_in_postcopy()) { - rcu_read_unlock(); return 0; } @@ -3804,7 +3777,6 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, qemu_put_be64(f, RAM_SAVE_FLAG_HOOK); qemu_fflush(f); - rcu_read_unlock(); return 0; } @@ -3821,17 +3793,15 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, RDMAControlHeader head = { .len = 0, .repeat = 1 }; int ret = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); rdma = atomic_rcu_read(&rioc->rdmaout); if (!rdma) { - rcu_read_unlock(); return -EIO; } CHECK_ERROR_STATE(); if (migration_in_postcopy()) { - rcu_read_unlock(); return 0; } @@ -3863,7 +3833,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, qemu_rdma_reg_whole_ram_blocks : NULL); if (ret < 0) { ERROR(errp, "receiving remote info!"); - rcu_read_unlock(); return ret; } @@ -3887,7 +3856,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, "not identical on both the source and destination.", local->nb_blocks, nb_dest_blocks); rdma->error_state = -EINVAL; - rcu_read_unlock(); return -EINVAL; } @@ -3904,7 +3872,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, local->block[i].length, rdma->dest_blocks[i].length); rdma->error_state = -EINVAL; - rcu_read_unlock(); return -EINVAL; } local->block[i].remote_host_addr = @@ -3922,11 +3889,9 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, goto err; } - rcu_read_unlock(); return 0; err: rdma->error_state = ret; - rcu_read_unlock(); return ret; } diff --git a/migration/savevm.c b/migration/savevm.c index bb9462a54d..8d95e261f6 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1215,6 +1215,8 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) save_section_footer(f, se); if (ret < 0) { + error_report("failed to save SaveStateEntry with id(name): %d(%s)", + se->section_id, se->idstr); qemu_file_set_error(f, ret); } if (ret <= 0) { @@ -1835,6 +1837,8 @@ static void *postcopy_ram_listen_thread(void *opaque) rcu_unregister_thread(); mis->have_listen_thread = false; + postcopy_state_set(POSTCOPY_INCOMING_END); + return NULL; } @@ -1865,7 +1869,7 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) * shouldn't be doing anything yet so don't actually expect requests */ if (migrate_postcopy_ram()) { - if (postcopy_ram_enable_notify(mis)) { + if (postcopy_ram_incoming_setup(mis)) { postcopy_ram_incoming_cleanup(mis); return -1; } @@ -1876,11 +1880,6 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) return -1; } - if (mis->have_listen_thread) { - error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread"); - return -1; - } - mis->have_listen_thread = true; /* Start up the listening thread and wait for it to signal ready */ qemu_sem_init(&mis->listen_thread_sem, 0); @@ -1934,7 +1933,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) /* After all discards we can start running and asking for pages */ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) { - PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING); + PostcopyState ps = postcopy_state_get(); trace_loadvm_postcopy_handle_run(); if (ps != POSTCOPY_INCOMING_LISTENING) { @@ -1942,6 +1941,7 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) return -1; } + postcopy_state_set(POSTCOPY_INCOMING_RUNNING); mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis); qemu_bh_schedule(mis->bh); diff --git a/migration/trace-events b/migration/trace-events index 858d415d56..6dee7b5389 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -71,6 +71,11 @@ get_qtailq_end(const char *name, const char *reason, int val) "%s %s/%d" put_qtailq(const char *name, int version_id) "%s v%d" put_qtailq_end(const char *name, const char *reason) "%s %s" +get_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d" +get_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d" +put_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d" +put_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d" + # qemu-file.c qemu_file_fclose(void) "" diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c index bee658a1b2..7236cf92bc 100644 --- a/migration/vmstate-types.c +++ b/migration/vmstate-types.c @@ -691,3 +691,155 @@ const VMStateInfo vmstate_info_qtailq = { .get = get_qtailq, .put = put_qtailq, }; + +struct put_gtree_data { + QEMUFile *f; + const VMStateDescription *key_vmsd; + const VMStateDescription *val_vmsd; + QJSON *vmdesc; + int ret; +}; + +static gboolean put_gtree_elem(gpointer key, gpointer value, gpointer data) +{ + struct put_gtree_data *capsule = (struct put_gtree_data *)data; + QEMUFile *f = capsule->f; + int ret; + + qemu_put_byte(f, true); + + /* put the key */ + if (!capsule->key_vmsd) { + qemu_put_be64(f, (uint64_t)(uintptr_t)(key)); /* direct key */ + } else { + ret = vmstate_save_state(f, capsule->key_vmsd, key, capsule->vmdesc); + if (ret) { + capsule->ret = ret; + return true; + } + } + + /* put the data */ + ret = vmstate_save_state(f, capsule->val_vmsd, value, capsule->vmdesc); + if (ret) { + capsule->ret = ret; + return true; + } + return false; +} + +static int put_gtree(QEMUFile *f, void *pv, size_t unused_size, + const VMStateField *field, QJSON *vmdesc) +{ + bool direct_key = (!field->start); + const VMStateDescription *key_vmsd = direct_key ? NULL : &field->vmsd[1]; + const VMStateDescription *val_vmsd = &field->vmsd[0]; + const char *key_vmsd_name = direct_key ? "direct" : key_vmsd->name; + struct put_gtree_data capsule = { + .f = f, + .key_vmsd = key_vmsd, + .val_vmsd = val_vmsd, + .vmdesc = vmdesc, + .ret = 0}; + GTree **pval = pv; + GTree *tree = *pval; + uint32_t nnodes = g_tree_nnodes(tree); + int ret; + + trace_put_gtree(field->name, key_vmsd_name, val_vmsd->name, nnodes); + qemu_put_be32(f, nnodes); + g_tree_foreach(tree, put_gtree_elem, (gpointer)&capsule); + qemu_put_byte(f, false); + ret = capsule.ret; + if (ret) { + error_report("%s : failed to save gtree (%d)", field->name, ret); + } + trace_put_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret); + return ret; +} + +static int get_gtree(QEMUFile *f, void *pv, size_t unused_size, + const VMStateField *field) +{ + bool direct_key = (!field->start); + const VMStateDescription *key_vmsd = direct_key ? NULL : &field->vmsd[1]; + const VMStateDescription *val_vmsd = &field->vmsd[0]; + const char *key_vmsd_name = direct_key ? "direct" : key_vmsd->name; + int version_id = field->version_id; + size_t key_size = field->start; + size_t val_size = field->size; + int nnodes, count = 0; + GTree **pval = pv; + GTree *tree = *pval; + void *key, *val; + int ret = 0; + + /* in case of direct key, the key vmsd can be {}, ie. check fields */ + if (!direct_key && version_id > key_vmsd->version_id) { + error_report("%s %s", key_vmsd->name, "too new"); + return -EINVAL; + } + if (!direct_key && version_id < key_vmsd->minimum_version_id) { + error_report("%s %s", key_vmsd->name, "too old"); + return -EINVAL; + } + if (version_id > val_vmsd->version_id) { + error_report("%s %s", val_vmsd->name, "too new"); + return -EINVAL; + } + if (version_id < val_vmsd->minimum_version_id) { + error_report("%s %s", val_vmsd->name, "too old"); + return -EINVAL; + } + + nnodes = qemu_get_be32(f); + trace_get_gtree(field->name, key_vmsd_name, val_vmsd->name, nnodes); + + while (qemu_get_byte(f)) { + if ((++count) > nnodes) { + ret = -EINVAL; + break; + } + if (direct_key) { + key = (void *)(uintptr_t)qemu_get_be64(f); + } else { + key = g_malloc0(key_size); + ret = vmstate_load_state(f, key_vmsd, key, version_id); + if (ret) { + error_report("%s : failed to load %s (%d)", + field->name, key_vmsd->name, ret); + goto key_error; + } + } + val = g_malloc0(val_size); + ret = vmstate_load_state(f, val_vmsd, val, version_id); + if (ret) { + error_report("%s : failed to load %s (%d)", + field->name, val_vmsd->name, ret); + goto val_error; + } + g_tree_insert(tree, key, val); + } + if (count != nnodes) { + error_report("%s inconsistent stream when loading the gtree", + field->name); + return -EINVAL; + } + trace_get_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret); + return ret; +val_error: + g_free(val); +key_error: + if (!direct_key) { + g_free(key); + } + trace_get_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret); + return ret; +} + + +const VMStateInfo vmstate_info_gtree = { + .name = "gtree", + .get = get_gtree, + .put = put_gtree, +}; |