diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2011-02-01 15:23:24 -0600 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2011-02-01 15:32:18 -0600 |
commit | b3a98367eec7b2d87acca54ef5e4de3b0e0a7ed5 (patch) | |
tree | 22b6121e6f81c09c7dd780ed211afef7b9ebb92d | |
parent | 9363ee31ab53fc0fd39fbe5936d9c00a2f4e54a4 (diff) | |
parent | c5999bfcfdf66390c98115044cb6fd174fbcf36d (diff) | |
download | qemu-b3a98367eec7b2d87acca54ef5e4de3b0e0a7ed5.zip |
Merge remote branch 'qemu-kvm/uq/master' into staging
aliguori: fix build with !defined(KVM_CAP_ASYNC_PF)
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
-rwxr-xr-x | configure | 36 | ||||
-rw-r--r-- | cpu-all.h | 5 | ||||
-rw-r--r-- | cpus.c | 2 | ||||
-rw-r--r-- | hmp-commands.hx | 6 | ||||
-rw-r--r-- | kvm-all.c | 247 | ||||
-rw-r--r-- | kvm-stub.c | 2 | ||||
-rw-r--r-- | kvm.h | 14 | ||||
-rw-r--r-- | monitor.c | 7 | ||||
-rw-r--r-- | target-i386/cpu.h | 9 | ||||
-rw-r--r-- | target-i386/cpuid.c | 5 | ||||
-rw-r--r-- | target-i386/helper.c | 97 | ||||
-rw-r--r-- | target-i386/kvm.c | 751 | ||||
-rw-r--r-- | target-i386/kvm_x86.h | 5 | ||||
-rw-r--r-- | target-ppc/kvm.c | 10 | ||||
-rw-r--r-- | target-s390x/kvm.c | 6 | ||||
-rw-r--r-- | vl.c | 2 |
16 files changed, 716 insertions, 488 deletions
@@ -1663,15 +1663,31 @@ if test "$kvm" != "no" ; then #if !defined(KVM_API_VERSION) || KVM_API_VERSION < 12 || KVM_API_VERSION > 12 #error Invalid KVM version #endif -#if !defined(KVM_CAP_USER_MEMORY) -#error Missing KVM capability KVM_CAP_USER_MEMORY -#endif -#if !defined(KVM_CAP_SET_TSS_ADDR) -#error Missing KVM capability KVM_CAP_SET_TSS_ADDR -#endif -#if !defined(KVM_CAP_DESTROY_MEMORY_REGION_WORKS) -#error Missing KVM capability KVM_CAP_DESTROY_MEMORY_REGION_WORKS +EOF + must_have_caps="KVM_CAP_USER_MEMORY \ + KVM_CAP_DESTROY_MEMORY_REGION_WORKS \ + KVM_CAP_COALESCED_MMIO \ + KVM_CAP_SYNC_MMU \ + " + if test \( "$cpu" = "i386" -o "$cpu" = "x86_64" \) ; then + must_have_caps="$caps \ + KVM_CAP_SET_TSS_ADDR \ + KVM_CAP_EXT_CPUID \ + KVM_CAP_CLOCKSOURCE \ + KVM_CAP_NOP_IO_DELAY \ + KVM_CAP_PV_MMU \ + KVM_CAP_MP_STATE \ + KVM_CAP_USER_NMI \ + " + fi + for c in $must_have_caps ; do + cat >> $TMPC <<EOF +#if !defined($c) +#error Missing KVM capability $c #endif +EOF + done + cat >> $TMPC <<EOF int main(void) { return 0; } EOF if test "$kerneldir" != "" ; then @@ -1706,8 +1722,8 @@ EOF | awk -F "error: " '{if (NR>1) printf(", "); printf("%s",$2);}'` if test "$kvmerr" != "" ; then echo -e "${kvmerr}\n\ - NOTE: To enable KVM support, update your kernel to 2.6.29+ or install \ - recent kvm-kmod from http://sourceforge.net/projects/kvm." +NOTE: To enable KVM support, update your kernel to 2.6.29+ or install \ +recent kvm-kmod from http://sourceforge.net/projects/kvm." fi fi feature_not_found "kvm" @@ -765,6 +765,8 @@ int page_check_range(target_ulong start, target_ulong len, int flags); CPUState *cpu_copy(CPUState *env); CPUState *qemu_get_cpu(int cpu); +#define CPU_DUMP_CODE 0x00010000 + void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, int flags); void cpu_dump_statistics(CPUState *env, FILE *f, fprintf_function cpu_fprintf, @@ -964,6 +966,7 @@ int cpu_memory_rw_debug(CPUState *env, target_ulong addr, uint8_t *buf, int len, int is_write); void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, - uint64_t mcg_status, uint64_t addr, uint64_t misc); + uint64_t mcg_status, uint64_t addr, uint64_t misc, + int broadcast); #endif /* CPU_ALL_H */ @@ -735,9 +735,7 @@ static sigset_t block_io_signals(void) void qemu_mutex_lock_iothread(void) { if (kvm_enabled()) { - qemu_mutex_lock(&qemu_fair_mutex); qemu_mutex_lock(&qemu_global_mutex); - qemu_mutex_unlock(&qemu_fair_mutex); } else { qemu_mutex_lock(&qemu_fair_mutex); if (qemu_mutex_trylock(&qemu_global_mutex)) { diff --git a/hmp-commands.hx b/hmp-commands.hx index a923817a1d..38e1eb7ebb 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1152,9 +1152,9 @@ ETEXI { .name = "mce", - .args_type = "cpu_index:i,bank:i,status:l,mcg_status:l,addr:l,misc:l", - .params = "cpu bank status mcgstatus addr misc", - .help = "inject a MCE on the given CPU", + .args_type = "broadcast:-b,cpu_index:i,bank:i,status:l,mcg_status:l,addr:l,misc:l", + .params = "[-b] cpu bank status mcgstatus addr misc", + .help = "inject a MCE on the given CPU [and broadcast to other CPUs with -b option]", .mhandler.cmd = do_inject_mce, }, @@ -63,9 +63,7 @@ struct KVMState int fd; int vmfd; int coalesced_mmio; -#ifdef KVM_CAP_COALESCED_MMIO struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; -#endif int broken_set_mem_region; int migration_log; int vcpu_events; @@ -82,16 +80,24 @@ struct KVMState static KVMState *kvm_state; +static const KVMCapabilityInfo kvm_required_capabilites[] = { + KVM_CAP_INFO(USER_MEMORY), + KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS), + KVM_CAP_LAST_INFO +}; + static KVMSlot *kvm_alloc_slot(KVMState *s) { int i; for (i = 0; i < ARRAY_SIZE(s->slots); i++) { /* KVM private memory slots */ - if (i >= 8 && i < 12) + if (i >= 8 && i < 12) { continue; - if (s->slots[i].memory_size == 0) + } + if (s->slots[i].memory_size == 0) { return &s->slots[i]; + } } fprintf(stderr, "%s: no free slot available\n", __func__); @@ -225,11 +231,10 @@ int kvm_init_vcpu(CPUState *env) goto err; } -#ifdef KVM_CAP_COALESCED_MMIO - if (s->coalesced_mmio && !s->coalesced_mmio_ring) - s->coalesced_mmio_ring = (void *) env->kvm_run + - s->coalesced_mmio * PAGE_SIZE; -#endif + if (s->coalesced_mmio && !s->coalesced_mmio_ring) { + s->coalesced_mmio_ring = + (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE; + } ret = kvm_arch_init_vcpu(env); if (ret == 0) { @@ -275,16 +280,14 @@ static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size) { - return kvm_dirty_pages_log_change(phys_addr, size, - KVM_MEM_LOG_DIRTY_PAGES, - KVM_MEM_LOG_DIRTY_PAGES); + return kvm_dirty_pages_log_change(phys_addr, size, KVM_MEM_LOG_DIRTY_PAGES, + KVM_MEM_LOG_DIRTY_PAGES); } int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size) { - return kvm_dirty_pages_log_change(phys_addr, size, - 0, - KVM_MEM_LOG_DIRTY_PAGES); + return kvm_dirty_pages_log_change(phys_addr, size, 0, + KVM_MEM_LOG_DIRTY_PAGES); } static int kvm_set_migration_log(int enable) @@ -356,7 +359,7 @@ static int kvm_get_dirty_pages_log_range(unsigned long start_addr, * @end_addr: end of logged region. */ static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, - target_phys_addr_t end_addr) + target_phys_addr_t end_addr) { KVMState *s = kvm_state; unsigned long size, allocated_size = 0; @@ -400,7 +403,6 @@ static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) { int ret = -ENOSYS; -#ifdef KVM_CAP_COALESCED_MMIO KVMState *s = kvm_state; if (s->coalesced_mmio) { @@ -411,7 +413,6 @@ int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone); } -#endif return ret; } @@ -419,7 +420,6 @@ int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) { int ret = -ENOSYS; -#ifdef KVM_CAP_COALESCED_MMIO KVMState *s = kvm_state; if (s->coalesced_mmio) { @@ -430,7 +430,6 @@ int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone); } -#endif return ret; } @@ -484,9 +483,20 @@ static int kvm_check_many_ioeventfds(void) #endif } -static void kvm_set_phys_mem(target_phys_addr_t start_addr, - ram_addr_t size, - ram_addr_t phys_offset) +static const KVMCapabilityInfo * +kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list) +{ + while (list->name) { + if (!kvm_check_extension(s, list->value)) { + return list; + } + list++; + } + return NULL; +} + +static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, + ram_addr_t phys_offset) { KVMState *s = kvm_state; ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK; @@ -593,13 +603,13 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, } /* in case the KVM bug workaround already "consumed" the new slot */ - if (!size) + if (!size) { return; - + } /* KVM does not need to know about this memory */ - if (flags >= IO_MEM_UNASSIGNED) + if (flags >= IO_MEM_UNASSIGNED) { return; - + } mem = kvm_alloc_slot(s); mem->memory_size = size; mem->start_addr = start_addr; @@ -615,38 +625,38 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, } static void kvm_client_set_memory(struct CPUPhysMemoryClient *client, - target_phys_addr_t start_addr, - ram_addr_t size, - ram_addr_t phys_offset) + target_phys_addr_t start_addr, + ram_addr_t size, ram_addr_t phys_offset) { - kvm_set_phys_mem(start_addr, size, phys_offset); + kvm_set_phys_mem(start_addr, size, phys_offset); } static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client, - target_phys_addr_t start_addr, - target_phys_addr_t end_addr) + target_phys_addr_t start_addr, + target_phys_addr_t end_addr) { - return kvm_physical_sync_dirty_bitmap(start_addr, end_addr); + return kvm_physical_sync_dirty_bitmap(start_addr, end_addr); } static int kvm_client_migration_log(struct CPUPhysMemoryClient *client, - int enable) + int enable) { - return kvm_set_migration_log(enable); + return kvm_set_migration_log(enable); } static CPUPhysMemoryClient kvm_cpu_phys_memory_client = { - .set_memory = kvm_client_set_memory, - .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap, - .migration_log = kvm_client_migration_log, + .set_memory = kvm_client_set_memory, + .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap, + .migration_log = kvm_client_migration_log, }; -int kvm_init(int smp_cpus) +int kvm_init(void) { static const char upgrade_note[] = "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n" "(see http://sourceforge.net/projects/kvm).\n"; KVMState *s; + const KVMCapabilityInfo *missing_cap; int ret; int i; @@ -655,9 +665,9 @@ int kvm_init(int smp_cpus) #ifdef KVM_CAP_SET_GUEST_DEBUG QTAILQ_INIT(&s->kvm_sw_breakpoints); #endif - for (i = 0; i < ARRAY_SIZE(s->slots); i++) + for (i = 0; i < ARRAY_SIZE(s->slots); i++) { s->slots[i].slot = i; - + } s->vmfd = -1; s->fd = qemu_open("/dev/kvm", O_RDWR); if (s->fd == -1) { @@ -668,8 +678,9 @@ int kvm_init(int smp_cpus) ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0); if (ret < KVM_API_VERSION) { - if (ret > 0) + if (ret > 0) { ret = -EINVAL; + } fprintf(stderr, "kvm version too old\n"); goto err; } @@ -689,39 +700,23 @@ int kvm_init(int smp_cpus) goto err; } - /* initially, KVM allocated its own memory and we had to jump through - * hooks to make phys_ram_base point to this. Modern versions of KVM - * just use a user allocated buffer so we can use regular pages - * unmodified. Make sure we have a sufficiently modern version of KVM. - */ - if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) { - ret = -EINVAL; - fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s", - upgrade_note); - goto err; + missing_cap = kvm_check_extension_list(s, kvm_required_capabilites); + if (!missing_cap) { + missing_cap = + kvm_check_extension_list(s, kvm_arch_required_capabilities); } - - /* There was a nasty bug in < kvm-80 that prevents memory slots from being - * destroyed properly. Since we rely on this capability, refuse to work - * with any kernel without this capability. */ - if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) { + if (missing_cap) { ret = -EINVAL; - - fprintf(stderr, - "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s", - upgrade_note); + fprintf(stderr, "kvm does not support %s\n%s", + missing_cap->name, upgrade_note); goto err; } - s->coalesced_mmio = 0; -#ifdef KVM_CAP_COALESCED_MMIO s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO); - s->coalesced_mmio_ring = NULL; -#endif s->broken_set_mem_region = 1; #ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS - ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS); + ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS); if (ret > 0) { s->broken_set_mem_region = 0; } @@ -753,9 +748,10 @@ int kvm_init(int smp_cpus) s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS); #endif - ret = kvm_arch_init(s, smp_cpus); - if (ret < 0) + ret = kvm_arch_init(s); + if (ret < 0) { goto err; + } kvm_state = s; cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client); @@ -766,10 +762,12 @@ int kvm_init(int smp_cpus) err: if (s) { - if (s->vmfd != -1) + if (s->vmfd != -1) { close(s->vmfd); - if (s->fd != -1) + } + if (s->fd != -1) { close(s->fd); + } } qemu_free(s); @@ -816,36 +814,36 @@ static int kvm_handle_io(uint16_t port, void *data, int direction, int size, } #ifdef KVM_CAP_INTERNAL_ERROR_DATA -static void kvm_handle_internal_error(CPUState *env, struct kvm_run *run) +static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run) { - + fprintf(stderr, "KVM internal error."); if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) { int i; - fprintf(stderr, "KVM internal error. Suberror: %d\n", - run->internal.suberror); - + fprintf(stderr, " Suberror: %d\n", run->internal.suberror); for (i = 0; i < run->internal.ndata; ++i) { fprintf(stderr, "extra data[%d]: %"PRIx64"\n", i, (uint64_t)run->internal.data[i]); } + } else { + fprintf(stderr, "\n"); } - cpu_dump_state(env, stderr, fprintf, 0); if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) { fprintf(stderr, "emulation failure\n"); - if (!kvm_arch_stop_on_emulation_error(env)) - return; + if (!kvm_arch_stop_on_emulation_error(env)) { + cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE); + return 0; + } } /* FIXME: Should trigger a qmp message to let management know * something went wrong. */ - vm_stop(0); + return -1; } #endif void kvm_flush_coalesced_mmio_buffer(void) { -#ifdef KVM_CAP_COALESCED_MMIO KVMState *s = kvm_state; if (s->coalesced_mmio_ring) { struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring; @@ -859,7 +857,6 @@ void kvm_flush_coalesced_mmio_buffer(void) ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX; } } -#endif } static void do_kvm_cpu_synchronize_state(void *_env) @@ -874,8 +871,9 @@ static void do_kvm_cpu_synchronize_state(void *_env) void kvm_cpu_synchronize_state(CPUState *env) { - if (!env->kvm_vcpu_dirty) + if (!env->kvm_vcpu_dirty) { run_on_cpu(env, do_kvm_cpu_synchronize_state, env); + } } void kvm_cpu_synchronize_post_reset(CPUState *env) @@ -924,6 +922,8 @@ int kvm_cpu_exec(CPUState *env) cpu_single_env = env; kvm_arch_post_run(env, run); + kvm_flush_coalesced_mmio_buffer(); + if (ret == -EINTR || ret == -EAGAIN) { cpu_exit(env); DPRINTF("io window exit\n"); @@ -936,8 +936,6 @@ int kvm_cpu_exec(CPUState *env) abort(); } - kvm_flush_coalesced_mmio_buffer(); - ret = 0; /* exit loop */ switch (run->exit_reason) { case KVM_EXIT_IO: @@ -965,17 +963,13 @@ int kvm_cpu_exec(CPUState *env) ret = 1; break; case KVM_EXIT_UNKNOWN: - DPRINTF("kvm_exit_unknown\n"); - break; - case KVM_EXIT_FAIL_ENTRY: - DPRINTF("kvm_exit_fail_entry\n"); - break; - case KVM_EXIT_EXCEPTION: - DPRINTF("kvm_exit_exception\n"); + fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n", + (uint64_t)run->hw.hardware_exit_reason); + ret = -1; break; #ifdef KVM_CAP_INTERNAL_ERROR_DATA case KVM_EXIT_INTERNAL_ERROR: - kvm_handle_internal_error(env, run); + ret = kvm_handle_internal_error(env, run); break; #endif case KVM_EXIT_DEBUG: @@ -996,6 +990,11 @@ int kvm_cpu_exec(CPUState *env) } } while (ret > 0); + if (ret < 0) { + cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE); + vm_stop(0); + env->exit_request = 1; + } if (env->exit_request) { env->exit_request = 0; env->exception_index = EXCP_INTERRUPT; @@ -1015,9 +1014,9 @@ int kvm_ioctl(KVMState *s, int type, ...) va_end(ap); ret = ioctl(s->fd, type, arg); - if (ret == -1) + if (ret == -1) { ret = -errno; - + } return ret; } @@ -1032,9 +1031,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) va_end(ap); ret = ioctl(s->vmfd, type, arg); - if (ret == -1) + if (ret == -1) { ret = -errno; - + } return ret; } @@ -1049,21 +1048,15 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...) va_end(ap); ret = ioctl(env->kvm_fd, type, arg); - if (ret == -1) + if (ret == -1) { ret = -errno; - + } return ret; } int kvm_has_sync_mmu(void) { -#ifdef KVM_CAP_SYNC_MMU - KVMState *s = kvm_state; - - return kvm_check_extension(s, KVM_CAP_SYNC_MMU); -#else - return 0; -#endif + return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU); } int kvm_has_vcpu_events(void) @@ -1120,8 +1113,9 @@ struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp; QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) { - if (bp->pc == pc) + if (bp->pc == pc) { return bp; + } } return NULL; } @@ -1176,8 +1170,9 @@ int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr, } bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint)); - if (!bp) + if (!bp) { return -ENOMEM; + } bp->pc = addr; bp->use_count = 1; @@ -1191,14 +1186,16 @@ int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr, bp, entry); } else { err = kvm_arch_insert_hw_breakpoint(addr, len, type); - if (err) + if (err) { return err; + } } for (env = first_cpu; env != NULL; env = env->next_cpu) { err = kvm_update_guest_debug(env, 0); - if (err) + if (err) { return err; + } } return 0; } @@ -1212,8 +1209,9 @@ int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr, if (type == GDB_BREAKPOINT_SW) { bp = kvm_find_sw_breakpoint(current_env, addr); - if (!bp) + if (!bp) { return -ENOENT; + } if (bp->use_count > 1) { bp->use_count--; @@ -1221,21 +1219,24 @@ int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr, } err = kvm_arch_remove_sw_breakpoint(current_env, bp); - if (err) + if (err) { return err; + } QTAILQ_REMOVE(¤t_env->kvm_state->kvm_sw_breakpoints, bp, entry); qemu_free(bp); } else { err = kvm_arch_remove_hw_breakpoint(addr, len, type); - if (err) + if (err) { return err; + } } for (env = first_cpu; env != NULL; env = env->next_cpu) { err = kvm_update_guest_debug(env, 0); - if (err) + if (err) { return err; + } } return 0; } @@ -1250,15 +1251,17 @@ void kvm_remove_all_breakpoints(CPUState *current_env) if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) { /* Try harder to find a CPU that currently sees the breakpoint. */ for (env = first_cpu; env != NULL; env = env->next_cpu) { - if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) + if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) { break; + } } } } kvm_arch_remove_all_hw_breakpoints(); - for (env = first_cpu; env != NULL; env = env->next_cpu) + for (env = first_cpu; env != NULL; env = env->next_cpu) { kvm_update_guest_debug(env, 0); + } } #else /* !KVM_CAP_SET_GUEST_DEBUG */ @@ -1290,8 +1293,9 @@ int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset) struct kvm_signal_mask *sigmask; int r; - if (!sigset) + if (!sigset) { return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL); + } sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset)); @@ -1346,13 +1350,16 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign) .fd = fd, }; int r; - if (!kvm_enabled()) + if (!kvm_enabled()) { return -ENOSYS; - if (!assign) + } + if (!assign) { kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN; + } r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick); - if (r < 0) + if (r < 0) { return r; + } return 0; #else return -ENOSYS; diff --git a/kvm-stub.c b/kvm-stub.c index 33d4476fa3..88682f288b 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -58,7 +58,7 @@ int kvm_check_extension(KVMState *s, unsigned int extension) return 0; } -int kvm_init(int smp_cpus) +int kvm_init(void) { return -ENOSYS; } @@ -32,9 +32,17 @@ extern int kvm_allowed; struct kvm_run; +typedef struct KVMCapabilityInfo { + const char *name; + int value; +} KVMCapabilityInfo; + +#define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP } +#define KVM_CAP_LAST_INFO { NULL, 0 } + /* external API */ -int kvm_init(int smp_cpus); +int kvm_init(void); int kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); @@ -86,6 +94,8 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...); /* Arch specific hooks */ +extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; + int kvm_arch_post_run(CPUState *env, struct kvm_run *run); int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run); @@ -105,7 +115,7 @@ int kvm_arch_get_registers(CPUState *env); int kvm_arch_put_registers(CPUState *env, int level); -int kvm_arch_init(KVMState *s, int smp_cpus); +int kvm_arch_init(KVMState *s); int kvm_arch_init_vcpu(CPUState *env); @@ -2709,12 +2709,15 @@ static void do_inject_mce(Monitor *mon, const QDict *qdict) uint64_t mcg_status = qdict_get_int(qdict, "mcg_status"); uint64_t addr = qdict_get_int(qdict, "addr"); uint64_t misc = qdict_get_int(qdict, "misc"); + int broadcast = qdict_get_try_bool(qdict, "broadcast", 0); - for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) + for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) { if (cenv->cpu_index == cpu_index && cenv->mcg_cap) { - cpu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc); + cpu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, + broadcast); break; } + } } #endif diff --git a/target-i386/cpu.h b/target-i386/cpu.h index f0c07cde3b..af701a4412 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -699,6 +699,10 @@ typedef struct CPUX86State { uint32_t smbase; int old_exception; /* exception in flight */ + /* KVM states, automatically cleared on reset */ + uint8_t nmi_injected; + uint8_t nmi_pending; + CPU_COMMON /* processor features (e.g. for CPUID insn) */ @@ -726,8 +730,6 @@ typedef struct CPUX86State { int32_t exception_injected; int32_t interrupt_injected; uint8_t soft_interrupt; - uint8_t nmi_injected; - uint8_t nmi_pending; uint8_t has_error_code; uint32_t sipi_vector; uint32_t cpuid_kvm_features; @@ -760,6 +762,7 @@ int cpu_x86_exec(CPUX86State *s); void cpu_x86_close(CPUX86State *s); void x86_cpu_list (FILE *f, fprintf_function cpu_fprintf, const char *optarg); void x86_cpudef_setup(void); +int cpu_x86_support_mca_broadcast(CPUState *env); int cpu_get_pic_interrupt(CPUX86State *s); /* MSDOS compatibility mode FPU exception support */ @@ -873,6 +876,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, uint32_t *ecx, uint32_t *edx); int cpu_x86_register (CPUX86State *env, const char *cpu_model); void cpu_clear_apic_feature(CPUX86State *env); +void host_cpuid(uint32_t function, uint32_t count, + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); /* helper.c */ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c index 165045ec42..5382a283f5 100644 --- a/target-i386/cpuid.c +++ b/target-i386/cpuid.c @@ -103,9 +103,8 @@ typedef struct model_features_t { int check_cpuid = 0; int enforce_cpuid = 0; -static void host_cpuid(uint32_t function, uint32_t count, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) +void host_cpuid(uint32_t function, uint32_t count, + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { #if defined(CONFIG_KVM) uint32_t vec[4]; diff --git a/target-i386/helper.c b/target-i386/helper.c index 25a3e36138..1217452e71 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -110,6 +110,32 @@ void cpu_x86_close(CPUX86State *env) qemu_free(env); } +static void cpu_x86_version(CPUState *env, int *family, int *model) +{ + int cpuver = env->cpuid_version; + + if (family == NULL || model == NULL) { + return; + } + + *family = (cpuver >> 8) & 0x0f; + *model = ((cpuver >> 12) & 0xf0) + ((cpuver >> 4) & 0x0f); +} + +/* Broadcast MCA signal for processor version 06H_EH and above */ +int cpu_x86_support_mca_broadcast(CPUState *env) +{ + int family = 0; + int model = 0; + + cpu_x86_version(env, &family, &model); + if ((family == 6 && model >= 14) || family > 6) { + return 1; + } + + return 0; +} + /***********************************************************/ /* x86 debug */ @@ -223,6 +249,9 @@ done: cpu_fprintf(f, "\n"); } +#define DUMP_CODE_BYTES_TOTAL 50 +#define DUMP_CODE_BYTES_BACKWARD 20 + void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, int flags) { @@ -408,6 +437,24 @@ void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, cpu_fprintf(f, " "); } } + if (flags & CPU_DUMP_CODE) { + target_ulong base = env->segs[R_CS].base + env->eip; + target_ulong offs = MIN(env->eip, DUMP_CODE_BYTES_BACKWARD); + uint8_t code; + char codestr[3]; + + cpu_fprintf(f, "Code="); + for (i = 0; i < DUMP_CODE_BYTES_TOTAL; i++) { + if (cpu_memory_rw_debug(env, base - offs + i, &code, 1, 0) == 0) { + snprintf(codestr, sizeof(codestr), "%02x", code); + } else { + snprintf(codestr, sizeof(codestr), "??"); + } + cpu_fprintf(f, "%s%s%s%s", i > 0 ? " " : "", + i == offs ? "<" : "", codestr, i == offs ? ">" : ""); + } + cpu_fprintf(f, "\n"); + } } /***********************************************************/ @@ -1021,21 +1068,12 @@ static void breakpoint_handler(CPUState *env) /* This should come from sysemu.h - if we could include it here... */ void qemu_system_reset_request(void); -void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, +static void qemu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc) { uint64_t mcg_cap = cenv->mcg_cap; - unsigned bank_num = mcg_cap & 0xff; uint64_t *banks = cenv->mce_banks; - if (bank >= bank_num || !(status & MCI_STATUS_VAL)) - return; - - if (kvm_enabled()) { - kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, 0); - return; - } - /* * if MSR_MCG_CTL is not all 1s, the uncorrected error * reporting is disabled @@ -1076,6 +1114,45 @@ void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, } else banks[1] |= MCI_STATUS_OVER; } + +void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, + uint64_t mcg_status, uint64_t addr, uint64_t misc, + int broadcast) +{ + unsigned bank_num = cenv->mcg_cap & 0xff; + CPUState *env; + int flag = 0; + + if (bank >= bank_num || !(status & MCI_STATUS_VAL)) { + return; + } + + if (broadcast) { + if (!cpu_x86_support_mca_broadcast(cenv)) { + fprintf(stderr, "Current CPU does not support broadcast\n"); + return; + } + } + + if (kvm_enabled()) { + if (broadcast) { + flag |= MCE_BROADCAST; + } + + kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, flag); + } else { + qemu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc); + if (broadcast) { + for (env = first_cpu; env != NULL; env = env->next_cpu) { + if (cenv == env) { + continue; + } + + qemu_inject_x86_mce(env, 1, 0xa000000000000000, 0, 0, 0); + } + } + } +} #endif /* !CONFIG_USER_ONLY */ static void mce_init(CPUX86State *cenv) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7dfc357e42..05010bbc38 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -54,9 +54,19 @@ #define BUS_MCEERR_AO 5 #endif -static int lm_capable_kernel; +const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_INFO(SET_TSS_ADDR), + KVM_CAP_INFO(EXT_CPUID), + KVM_CAP_INFO(MP_STATE), + KVM_CAP_LAST_INFO +}; -#ifdef KVM_CAP_EXT_CPUID +static bool has_msr_star; +static bool has_msr_hsave_pa; +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) +static bool has_msr_async_pf_en; +#endif +static int lm_capable_kernel; static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) { @@ -91,10 +101,6 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, uint32_t ret = 0; uint32_t cpuid_1_edx; - if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) { - return -1U; - } - max = 1; while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) { max *= 2; @@ -138,46 +144,33 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, return ret; } -#else - -uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, - uint32_t index, int reg) -{ - return -1U; -} - -#endif - #ifdef CONFIG_KVM_PARA struct kvm_para_features { - int cap; - int feature; + int cap; + int feature; } para_features[] = { -#ifdef KVM_CAP_CLOCKSOURCE - { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE }, -#endif -#ifdef KVM_CAP_NOP_IO_DELAY - { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY }, -#endif -#ifdef KVM_CAP_PV_MMU - { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP }, -#endif + { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE }, + { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY }, + { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP }, #ifdef KVM_CAP_ASYNC_PF - { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF }, + { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF }, #endif - { -1, -1 } + { -1, -1 } }; static int get_para_features(CPUState *env) { - int i, features = 0; + int i, features = 0; - for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) { - if (kvm_check_extension(env->kvm_state, para_features[i].cap)) - features |= (1 << para_features[i].feature); + for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) { + if (kvm_check_extension(env->kvm_state, para_features[i].cap)) { + features |= (1 << para_features[i].feature); } - - return features; + } +#ifdef KVM_CAP_ASYNC_PF + has_msr_async_pf_en = features & (1 << KVM_FEATURE_ASYNC_PF); +#endif + return features; } #endif @@ -187,7 +180,7 @@ static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap, { int r; - r = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_MCE); + r = kvm_check_extension(s, KVM_CAP_MCE); if (r > 0) { *max_banks = r; return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap); @@ -219,7 +212,7 @@ static int kvm_get_msr(CPUState *env, struct kvm_msr_entry *msrs, int n) } /* FIXME: kill this and kvm_get_msr, use env->mcg_status instead */ -static int kvm_mce_in_exception(CPUState *env) +static int kvm_mce_in_progress(CPUState *env) { struct kvm_msr_entry msr_mcg_status = { .index = MSR_MCG_STATUS, @@ -228,7 +221,8 @@ static int kvm_mce_in_exception(CPUState *env) r = kvm_get_msr(env, &msr_mcg_status, 1); if (r == -1 || r == 0) { - return -1; + fprintf(stderr, "Failed to get MCE status\n"); + return 0; } return !!(msr_mcg_status.data & MCG_STATUS_MCIP); } @@ -248,10 +242,7 @@ static void kvm_do_inject_x86_mce(void *_data) /* If there is an MCE exception being processed, ignore this SRAO MCE */ if ((data->env->mcg_cap & MCG_SER_P) && !(data->mce->status & MCI_STATUS_AR)) { - r = kvm_mce_in_exception(data->env); - if (r == -1) { - fprintf(stderr, "Failed to get MCE status\n"); - } else if (r) { + if (kvm_mce_in_progress(data->env)) { return; } } @@ -264,11 +255,30 @@ static void kvm_do_inject_x86_mce(void *_data) } } } + +static void kvm_inject_x86_mce_on(CPUState *env, struct kvm_x86_mce *mce, + int flag) +{ + struct kvm_x86_mce_data data = { + .env = env, + .mce = mce, + .abort_on_error = (flag & ABORT_ON_ERROR), + }; + + if (!env->mcg_cap) { + fprintf(stderr, "MCE support is not enabled!\n"); + return; + } + + run_on_cpu(env, kvm_do_inject_x86_mce, &data); +} + +static void kvm_mce_broadcast_rest(CPUState *env); #endif void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, - int abort_on_error) + int flag) { #ifdef KVM_CAP_MCE struct kvm_x86_mce mce = { @@ -278,20 +288,16 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, .addr = addr, .misc = misc, }; - struct kvm_x86_mce_data data = { - .env = cenv, - .mce = &mce, - }; - if (!cenv->mcg_cap) { - fprintf(stderr, "MCE support is not enabled!\n"); - return; + if (flag & MCE_BROADCAST) { + kvm_mce_broadcast_rest(cenv); } - run_on_cpu(cenv, kvm_do_inject_x86_mce, &data); + kvm_inject_x86_mce_on(cenv, &mce, flag); #else - if (abort_on_error) + if (flag & ABORT_ON_ERROR) { abort(); + } #endif } @@ -304,12 +310,10 @@ int kvm_arch_init_vcpu(CPUState *env) uint32_t limit, i, j, cpuid_i; uint32_t unused; struct kvm_cpuid_entry2 *c; -#ifdef KVM_CPUID_SIGNATURE +#ifdef CONFIG_KVM_PARA uint32_t signature[3]; #endif - env->mp_state = KVM_MP_STATE_RUNNABLE; - env->cpuid_features &= kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX); i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR; @@ -376,13 +380,15 @@ int kvm_arch_init_vcpu(CPUState *env) c->index = j; cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); - if (i == 4 && c->eax == 0) + if (i == 4 && c->eax == 0) { break; - if (i == 0xb && !(c->ecx & 0xff00)) + } + if (i == 0xb && !(c->ecx & 0xff00)) { break; - if (i == 0xd && c->eax == 0) + } + if (i == 0xd && c->eax == 0) { break; - + } c = &cpuid_data.entries[cpuid_i++]; } break; @@ -412,17 +418,18 @@ int kvm_arch_init_vcpu(CPUState *env) uint64_t mcg_cap; int banks; - if (kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks)) + if (kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks)) { perror("kvm_get_mce_cap_supported FAILED"); - else { + } else { if (banks > MCE_BANKS_DEF) banks = MCE_BANKS_DEF; mcg_cap &= MCE_CAP_DEF; mcg_cap |= banks; - if (kvm_setup_mce(env, &mcg_cap)) + if (kvm_setup_mce(env, &mcg_cap)) { perror("kvm_setup_mce FAILED"); - else + } else { env->mcg_cap = mcg_cap; + } } } #endif @@ -434,8 +441,7 @@ void kvm_arch_reset_vcpu(CPUState *env) { env->exception_injected = -1; env->interrupt_injected = -1; - env->nmi_injected = 0; - env->nmi_pending = 0; + env->xcr0 = 1; if (kvm_irqchip_in_kernel()) { env->mp_state = cpu_is_bsp(env) ? KVM_MP_STATE_RUNNABLE : KVM_MP_STATE_UNINITIALIZED; @@ -444,13 +450,10 @@ void kvm_arch_reset_vcpu(CPUState *env) } } -int has_msr_star; -int has_msr_hsave_pa; - -static void kvm_supported_msrs(CPUState *env) +static int kvm_get_supported_msrs(KVMState *s) { static int kvm_supported_msrs; - int ret; + int ret = 0; /* first time */ if (kvm_supported_msrs == 0) { @@ -461,9 +464,9 @@ static void kvm_supported_msrs(CPUState *env) /* Obtain MSR list from KVM. These are the MSRs that we must * save/restore */ msr_list.nmsrs = 0; - ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list); + ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list); if (ret < 0 && ret != -E2BIG) { - return; + return ret; } /* Old kernel modules had a bug and could write beyond the provided memory. Allocate at least a safe amount of 1K. */ @@ -472,17 +475,17 @@ static void kvm_supported_msrs(CPUState *env) sizeof(msr_list.indices[0]))); kvm_msr_list->nmsrs = msr_list.nmsrs; - ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list); + ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list); if (ret >= 0) { int i; for (i = 0; i < kvm_msr_list->nmsrs; i++) { if (kvm_msr_list->indices[i] == MSR_STAR) { - has_msr_star = 1; + has_msr_star = true; continue; } if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA) { - has_msr_hsave_pa = 1; + has_msr_hsave_pa = true; continue; } } @@ -491,80 +494,61 @@ static void kvm_supported_msrs(CPUState *env) free(kvm_msr_list); } - return; -} - -static int kvm_has_msr_hsave_pa(CPUState *env) -{ - kvm_supported_msrs(env); - return has_msr_hsave_pa; -} - -static int kvm_has_msr_star(CPUState *env) -{ - kvm_supported_msrs(env); - return has_msr_star; + return ret; } -static int kvm_init_identity_map_page(KVMState *s) +int kvm_arch_init(KVMState *s) { -#ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR + uint64_t identity_base = 0xfffbc000; int ret; - uint64_t addr = 0xfffbc000; - - if (!kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) { - return 0; - } + struct utsname utsname; - ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &addr); + ret = kvm_get_supported_msrs(s); if (ret < 0) { - fprintf(stderr, "kvm_set_identity_map_addr: %s\n", strerror(ret)); return ret; } -#endif - return 0; -} - -int kvm_arch_init(KVMState *s, int smp_cpus) -{ - int ret; - - struct utsname utsname; uname(&utsname); lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0; - /* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code - * directly. In order to use vm86 mode, a TSS is needed. Since this - * must be part of guest physical memory, we need to allocate it. Older - * versions of KVM just assumed that it would be at the end of physical - * memory but that doesn't work with more than 4GB of memory. We simply - * refuse to work with those older versions of KVM. */ - ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); - if (ret <= 0) { - fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n"); - return ret; - } - - /* this address is 3 pages before the bios, and the bios should present - * as unavaible memory. FIXME, need to ensure the e820 map deals with - * this? - */ /* - * Tell fw_cfg to notify the BIOS to reserve the range. + * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly. + * In order to use vm86 mode, an EPT identity map and a TSS are needed. + * Since these must be part of guest physical memory, we need to allocate + * them, both by setting their start addresses in the kernel and by + * creating a corresponding e820 entry. We need 4 pages before the BIOS. + * + * Older KVM versions may not support setting the identity map base. In + * that case we need to stick with the default, i.e. a 256K maximum BIOS + * size. */ - if (e820_add_entry(0xfffbc000, 0x4000, E820_RESERVED) < 0) { - perror("e820_add_entry() table is full"); - exit(1); +#ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR + if (kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) { + /* Allows up to 16M BIOSes. */ + identity_base = 0xfeffc000; + + ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base); + if (ret < 0) { + return ret; + } } - ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000); +#endif + /* Set TSS base one page after EPT identity map. */ + ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base + 0x1000); if (ret < 0) { return ret; } - return kvm_init_identity_map_page(s); + /* Tell fw_cfg to notify the BIOS to reserve the range. */ + ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED); + if (ret < 0) { + fprintf(stderr, "e820_add_entry() table is full\n"); + return ret; + } + + return 0; } - + static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs) { lhs->selector = rhs->selector; @@ -589,7 +573,7 @@ static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs) lhs->limit = rhs->limit; lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; lhs->present = (flags & DESC_P_MASK) != 0; - lhs->dpl = rhs->selector & 3; + lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3; lhs->db = (flags >> DESC_B_SHIFT) & 1; lhs->s = (flags & DESC_S_MASK) != 0; lhs->l = (flags >> DESC_L_SHIFT) & 1; @@ -603,23 +587,23 @@ static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) lhs->selector = rhs->selector; lhs->base = rhs->base; lhs->limit = rhs->limit; - lhs->flags = - (rhs->type << DESC_TYPE_SHIFT) - | (rhs->present * DESC_P_MASK) - | (rhs->dpl << DESC_DPL_SHIFT) - | (rhs->db << DESC_B_SHIFT) - | (rhs->s * DESC_S_MASK) - | (rhs->l << DESC_L_SHIFT) - | (rhs->g * DESC_G_MASK) - | (rhs->avl * DESC_AVL_MASK); + lhs->flags = (rhs->type << DESC_TYPE_SHIFT) | + (rhs->present * DESC_P_MASK) | + (rhs->dpl << DESC_DPL_SHIFT) | + (rhs->db << DESC_B_SHIFT) | + (rhs->s * DESC_S_MASK) | + (rhs->l << DESC_L_SHIFT) | + (rhs->g * DESC_G_MASK) | + (rhs->avl * DESC_AVL_MASK); } static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set) { - if (set) + if (set) { *kvm_reg = *qemu_reg; - else + } else { *qemu_reg = *kvm_reg; + } } static int kvm_getput_regs(CPUState *env, int set) @@ -629,8 +613,9 @@ static int kvm_getput_regs(CPUState *env, int set) if (!set) { ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, ®s); - if (ret < 0) + if (ret < 0) { return ret; + } } kvm_getput_reg(®s.rax, &env->regs[R_EAX], set); @@ -655,8 +640,9 @@ static int kvm_getput_regs(CPUState *env, int set) kvm_getput_reg(®s.rflags, &env->eflags, set); kvm_getput_reg(®s.rip, &env->eip, set); - if (set) + if (set) { ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, ®s); + } return ret; } @@ -670,8 +656,9 @@ static int kvm_put_fpu(CPUState *env) fpu.fsw = env->fpus & ~(7 << 11); fpu.fsw |= (env->fpstt & 7) << 11; fpu.fcw = env->fpuc; - for (i = 0; i < 8; ++i) - fpu.ftwx |= (!env->fptags[i]) << i; + for (i = 0; i < 8; ++i) { + fpu.ftwx |= (!env->fptags[i]) << i; + } memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs); memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs); fpu.mxcsr = env->mxcsr; @@ -696,8 +683,9 @@ static int kvm_put_xsave(CPUState *env) struct kvm_xsave* xsave; uint16_t cwd, swd, twd, fop; - if (!kvm_has_xsave()) + if (!kvm_has_xsave()) { return kvm_put_fpu(env); + } xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); memset(xsave, 0, sizeof(struct kvm_xsave)); @@ -705,8 +693,9 @@ static int kvm_put_xsave(CPUState *env) swd = env->fpus & ~(7 << 11); swd |= (env->fpstt & 7) << 11; cwd = env->fpuc; - for (i = 0; i < 8; ++i) + for (i = 0; i < 8; ++i) { twd |= (!env->fptags[i]) << i; + } xsave->region[0] = (uint32_t)(swd << 16) + cwd; xsave->region[1] = (uint32_t)(fop << 16) + twd; memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs, @@ -730,8 +719,9 @@ static int kvm_put_xcrs(CPUState *env) #ifdef KVM_CAP_XCRS struct kvm_xcrs xcrs; - if (!kvm_has_xcrs()) + if (!kvm_has_xcrs()) { return 0; + } xcrs.nr_xcrs = 1; xcrs.flags = 0; @@ -754,26 +744,19 @@ static int kvm_put_sregs(CPUState *env) } if ((env->eflags & VM_MASK)) { - set_v8086_seg(&sregs.cs, &env->segs[R_CS]); - set_v8086_seg(&sregs.ds, &env->segs[R_DS]); - set_v8086_seg(&sregs.es, &env->segs[R_ES]); - set_v8086_seg(&sregs.fs, &env->segs[R_FS]); - set_v8086_seg(&sregs.gs, &env->segs[R_GS]); - set_v8086_seg(&sregs.ss, &env->segs[R_SS]); + set_v8086_seg(&sregs.cs, &env->segs[R_CS]); + set_v8086_seg(&sregs.ds, &env->segs[R_DS]); + set_v8086_seg(&sregs.es, &env->segs[R_ES]); + set_v8086_seg(&sregs.fs, &env->segs[R_FS]); + set_v8086_seg(&sregs.gs, &env->segs[R_GS]); + set_v8086_seg(&sregs.ss, &env->segs[R_SS]); } else { - set_seg(&sregs.cs, &env->segs[R_CS]); - set_seg(&sregs.ds, &env->segs[R_DS]); - set_seg(&sregs.es, &env->segs[R_ES]); - set_seg(&sregs.fs, &env->segs[R_FS]); - set_seg(&sregs.gs, &env->segs[R_GS]); - set_seg(&sregs.ss, &env->segs[R_SS]); - - if (env->cr[0] & CR0_PE_MASK) { - /* force ss cpl to cs cpl */ - sregs.ss.selector = (sregs.ss.selector & ~3) | - (sregs.cs.selector & 3); - sregs.ss.dpl = sregs.ss.selector & 3; - } + set_seg(&sregs.cs, &env->segs[R_CS]); + set_seg(&sregs.ds, &env->segs[R_DS]); + set_seg(&sregs.es, &env->segs[R_ES]); + set_seg(&sregs.fs, &env->segs[R_FS]); + set_seg(&sregs.gs, &env->segs[R_GS]); + set_seg(&sregs.ss, &env->segs[R_SS]); } set_seg(&sregs.tr, &env->tr); @@ -816,10 +799,12 @@ static int kvm_put_msrs(CPUState *env, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); - if (kvm_has_msr_star(env)) - kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); - if (kvm_has_msr_hsave_pa(env)) + if (has_msr_star) { + kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); + } + if (has_msr_hsave_pa) { kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); + } #ifdef TARGET_X86_64 if (lm_capable_kernel) { kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -838,23 +823,35 @@ static int kvm_put_msrs(CPUState *env, int level) if (smp_cpus == 1 || env->tsc != 0) { kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); } + } + /* + * The following paravirtual MSRs have side effects on the guest or are + * too heavy for normal writeback. Limit them to reset or full state + * updates. + */ + if (level >= KVM_PUT_RESET_STATE) { kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); -#ifdef KVM_CAP_ASYNC_PF - kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN, env->async_pf_en_msr); +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) + if (has_msr_async_pf_en) { + kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN, + env->async_pf_en_msr); + } #endif } #ifdef KVM_CAP_MCE if (env->mcg_cap) { int i; - if (level == KVM_PUT_RESET_STATE) + + if (level == KVM_PUT_RESET_STATE) { kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); - else if (level == KVM_PUT_FULL_STATE) { + } else if (level == KVM_PUT_FULL_STATE) { kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl); - for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) + for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]); + } } } #endif @@ -872,14 +869,16 @@ static int kvm_get_fpu(CPUState *env) int i, ret; ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu); - if (ret < 0) + if (ret < 0) { return ret; + } env->fpstt = (fpu.fsw >> 11) & 7; env->fpus = fpu.fsw; env->fpuc = fpu.fcw; - for (i = 0; i < 8; ++i) - env->fptags[i] = !((fpu.ftwx >> i) & 1); + for (i = 0; i < 8; ++i) { + env->fptags[i] = !((fpu.ftwx >> i) & 1); + } memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs); memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs); env->mxcsr = fpu.mxcsr; @@ -894,8 +893,9 @@ static int kvm_get_xsave(CPUState *env) int ret, i; uint16_t cwd, swd, twd, fop; - if (!kvm_has_xsave()) + if (!kvm_has_xsave()) { return kvm_get_fpu(env); + } xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); ret = kvm_vcpu_ioctl(env, KVM_GET_XSAVE, xsave); @@ -911,8 +911,9 @@ static int kvm_get_xsave(CPUState *env) env->fpstt = (swd >> 11) & 7; env->fpus = swd; env->fpuc = cwd; - for (i = 0; i < 8; ++i) + for (i = 0; i < 8; ++i) { env->fptags[i] = !((twd >> i) & 1); + } env->mxcsr = xsave->region[XSAVE_MXCSR]; memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE], sizeof env->fpregs); @@ -934,19 +935,22 @@ static int kvm_get_xcrs(CPUState *env) int i, ret; struct kvm_xcrs xcrs; - if (!kvm_has_xcrs()) + if (!kvm_has_xcrs()) { return 0; + } ret = kvm_vcpu_ioctl(env, KVM_GET_XCRS, &xcrs); - if (ret < 0) + if (ret < 0) { return ret; + } - for (i = 0; i < xcrs.nr_xcrs; i++) + for (i = 0; i < xcrs.nr_xcrs; i++) { /* Only support xcr0 now */ if (xcrs.xcrs[0].xcr == 0) { env->xcr0 = xcrs.xcrs[0].value; break; } + } return 0; #else return 0; @@ -960,8 +964,9 @@ static int kvm_get_sregs(CPUState *env) int bit, i, ret; ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs); - if (ret < 0) + if (ret < 0) { return ret; + } /* There can only be one pending IRQ set in the bitmap at a time, so try to find it and save its number instead (-1 for none). */ @@ -999,21 +1004,19 @@ static int kvm_get_sregs(CPUState *env) env->efer = sregs.efer; //cpu_set_apic_tpr(env->apic_state, sregs.cr8); -#define HFLAG_COPY_MASK ~( \ - HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \ - HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \ - HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ - HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) - - +#define HFLAG_COPY_MASK \ + ~( HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \ + HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \ + HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ + HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & - (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); + (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << - (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); + (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); if (env->efer & MSR_EFER_LMA) { hflags |= HF_LMA_MASK; @@ -1023,19 +1026,16 @@ static int kvm_get_sregs(CPUState *env) hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK; } else { hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >> - (DESC_B_SHIFT - HF_CS32_SHIFT); + (DESC_B_SHIFT - HF_CS32_SHIFT); hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >> - (DESC_B_SHIFT - HF_SS32_SHIFT); - if (!(env->cr[0] & CR0_PE_MASK) || - (env->eflags & VM_MASK) || - !(hflags & HF_CS32_MASK)) { - hflags |= HF_ADDSEG_MASK; - } else { - hflags |= ((env->segs[R_DS].base | - env->segs[R_ES].base | - env->segs[R_SS].base) != 0) << - HF_ADDSEG_SHIFT; - } + (DESC_B_SHIFT - HF_SS32_SHIFT); + if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK) || + !(hflags & HF_CS32_MASK)) { + hflags |= HF_ADDSEG_MASK; + } else { + hflags |= ((env->segs[R_DS].base | env->segs[R_ES].base | + env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT; + } } env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags; @@ -1055,10 +1055,12 @@ static int kvm_get_msrs(CPUState *env) msrs[n++].index = MSR_IA32_SYSENTER_CS; msrs[n++].index = MSR_IA32_SYSENTER_ESP; msrs[n++].index = MSR_IA32_SYSENTER_EIP; - if (kvm_has_msr_star(env)) - msrs[n++].index = MSR_STAR; - if (kvm_has_msr_hsave_pa(env)) + if (has_msr_star) { + msrs[n++].index = MSR_STAR; + } + if (has_msr_hsave_pa) { msrs[n++].index = MSR_VM_HSAVE_PA; + } msrs[n++].index = MSR_IA32_TSC; #ifdef TARGET_X86_64 if (lm_capable_kernel) { @@ -1070,23 +1072,27 @@ static int kvm_get_msrs(CPUState *env) #endif msrs[n++].index = MSR_KVM_SYSTEM_TIME; msrs[n++].index = MSR_KVM_WALL_CLOCK; -#ifdef KVM_CAP_ASYNC_PF - msrs[n++].index = MSR_KVM_ASYNC_PF_EN; +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) + if (has_msr_async_pf_en) { + msrs[n++].index = MSR_KVM_ASYNC_PF_EN; + } #endif #ifdef KVM_CAP_MCE if (env->mcg_cap) { msrs[n++].index = MSR_MCG_STATUS; msrs[n++].index = MSR_MCG_CTL; - for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) + for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { msrs[n++].index = MSR_MC0_CTL + i; + } } #endif msr_data.info.nmsrs = n; ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data); - if (ret < 0) + if (ret < 0) { return ret; + } for (i = 0; i < ret; i++) { switch (msrs[i].index) { @@ -1144,7 +1150,7 @@ static int kvm_get_msrs(CPUState *env) } #endif break; -#ifdef KVM_CAP_ASYNC_PF +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; break; @@ -1172,6 +1178,9 @@ static int kvm_get_mp_state(CPUState *env) return ret; } env->mp_state = mp_state.mp_state; + if (kvm_irqchip_in_kernel()) { + env->halted = (mp_state.mp_state == KVM_MP_STATE_HALTED); + } return 0; } @@ -1314,7 +1323,7 @@ static int kvm_get_debugregs(CPUState *env) ret = kvm_vcpu_ioctl(env, KVM_GET_DEBUGREGS, &dbgregs); if (ret < 0) { - return ret; + return ret; } for (i = 0; i < 4; i++) { env->dr[i] = dbgregs.db[i]; @@ -1333,44 +1342,44 @@ int kvm_arch_put_registers(CPUState *env, int level) assert(cpu_is_stopped(env) || qemu_cpu_self(env)); ret = kvm_getput_regs(env, 1); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_xsave(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_xcrs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_sregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_msrs(env, level); - if (ret < 0) + if (ret < 0) { return ret; - + } if (level >= KVM_PUT_RESET_STATE) { ret = kvm_put_mp_state(env); - if (ret < 0) + if (ret < 0) { return ret; + } } - ret = kvm_put_vcpu_events(env, level); - if (ret < 0) + if (ret < 0) { return ret; - + } + ret = kvm_put_debugregs(env); + if (ret < 0) { + return ret; + } /* must be last */ ret = kvm_guest_debug_workarounds(env); - if (ret < 0) - return ret; - - ret = kvm_put_debugregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } return 0; } @@ -1381,42 +1390,49 @@ int kvm_arch_get_registers(CPUState *env) assert(cpu_is_stopped(env) || qemu_cpu_self(env)); ret = kvm_getput_regs(env, 0); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_xsave(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_xcrs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_sregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_msrs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_mp_state(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_vcpu_events(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_debugregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } return 0; } int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) { + /* Inject NMI */ + if (env->interrupt_request & CPU_INTERRUPT_NMI) { + env->interrupt_request &= ~CPU_INTERRUPT_NMI; + DPRINTF("injected NMI\n"); + kvm_vcpu_ioctl(env, KVM_NMI); + } + /* Try to inject an interrupt if the guest can accept it */ if (run->ready_for_interrupt_injection && (env->interrupt_request & CPU_INTERRUPT_HARD) && @@ -1438,10 +1454,11 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) * interrupt, request an interrupt window exit. This will * cause a return to userspace as soon as the guest is ready to * receive interrupts. */ - if ((env->interrupt_request & CPU_INTERRUPT_HARD)) + if ((env->interrupt_request & CPU_INTERRUPT_HARD)) { run->request_interrupt_window = 1; - else + } else { run->request_interrupt_window = 0; + } DPRINTF("setting tpr\n"); run->cr8 = cpu_get_apic_tpr(env->apic_state); @@ -1451,11 +1468,11 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) int kvm_arch_post_run(CPUState *env, struct kvm_run *run) { - if (run->if_flag) + if (run->if_flag) { env->eflags |= IF_MASK; - else + } else { env->eflags &= ~IF_MASK; - + } cpu_set_apic_tpr(env->apic_state, run->cr8); cpu_set_apic_base(env->apic_state, run->apic_base); @@ -1491,8 +1508,19 @@ static int kvm_handle_halt(CPUState *env) return 1; } +static bool host_supports_vmx(void) +{ + uint32_t ecx, unused; + + host_cpuid(1, 0, &unused, &unused, &ecx, &unused); + return ecx & CPUID_EXT_VMX; +} + +#define VMX_INVALID_GUEST_STATE 0x80000021 + int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) { + uint64_t code; int ret = 0; switch (run->exit_reason) { @@ -1500,6 +1528,35 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) DPRINTF("handle_hlt\n"); ret = kvm_handle_halt(env); break; + case KVM_EXIT_SET_TPR: + ret = 1; + break; + case KVM_EXIT_FAIL_ENTRY: + code = run->fail_entry.hardware_entry_failure_reason; + fprintf(stderr, "KVM: entry failed, hardware error 0x%" PRIx64 "\n", + code); + if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) { + fprintf(stderr, + "\nIf you're runnning a guest on an Intel machine without " + "unrestricted mode\n" + "support, the failure can be most likely due to the guest " + "entering an invalid\n" + "state for Intel VT. For example, the guest maybe running " + "in big real mode\n" + "which is not supported on less recent Intel processors." + "\n\n"); + } + ret = -1; + break; + case KVM_EXIT_EXCEPTION: + fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n", + run->ex.exception, run->ex.error_code); + ret = -1; + break; + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; + break; } return ret; @@ -1511,8 +1568,9 @@ int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp) static const uint8_t int3 = 0xcc; if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) || - cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1)) + cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1)) { return -EINVAL; + } return 0; } @@ -1521,8 +1579,9 @@ int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp) uint8_t int3; if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc || - cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) + cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) { return -EINVAL; + } return 0; } @@ -1538,10 +1597,12 @@ static int find_hw_breakpoint(target_ulong addr, int len, int type) { int n; - for (n = 0; n < nb_hw_breakpoint; n++) + for (n = 0; n < nb_hw_breakpoint; n++) { if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type && - (hw_breakpoint[n].len == len || len == -1)) + (hw_breakpoint[n].len == len || len == -1)) { return n; + } + } return -1; } @@ -1560,8 +1621,9 @@ int kvm_arch_insert_hw_breakpoint(target_ulong addr, case 2: case 4: case 8: - if (addr & (len - 1)) + if (addr & (len - 1)) { return -EINVAL; + } break; default: return -EINVAL; @@ -1571,12 +1633,12 @@ int kvm_arch_insert_hw_breakpoint(target_ulong addr, return -ENOSYS; } - if (nb_hw_breakpoint == 4) + if (nb_hw_breakpoint == 4) { return -ENOBUFS; - - if (find_hw_breakpoint(addr, len, type) >= 0) + } + if (find_hw_breakpoint(addr, len, type) >= 0) { return -EEXIST; - + } hw_breakpoint[nb_hw_breakpoint].addr = addr; hw_breakpoint[nb_hw_breakpoint].len = len; hw_breakpoint[nb_hw_breakpoint].type = type; @@ -1591,9 +1653,9 @@ int kvm_arch_remove_hw_breakpoint(target_ulong addr, int n; n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type); - if (n < 0) + if (n < 0) { return -ENOENT; - + } nb_hw_breakpoint--; hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint]; @@ -1614,11 +1676,12 @@ int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info) if (arch_info->exception == 1) { if (arch_info->dr6 & (1 << 14)) { - if (cpu_single_env->singlestep_enabled) + if (cpu_single_env->singlestep_enabled) { handle = 1; + } } else { - for (n = 0; n < 4; n++) - if (arch_info->dr6 & (1 << n)) + for (n = 0; n < 4; n++) { + if (arch_info->dr6 & (1 << n)) { switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) { case 0x0: handle = 1; @@ -1636,10 +1699,12 @@ int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info) hw_watchpoint.flags = BP_MEM_ACCESS; break; } + } + } } - } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc)) + } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc)) { handle = 1; - + } if (!handle) { cpu_synchronize_state(cpu_single_env); assert(cpu_single_env->exception_injected == -1); @@ -1663,9 +1728,9 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) }; int n; - if (kvm_sw_breakpoints_active(env)) + if (kvm_sw_breakpoints_active(env)) { dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; - + } if (nb_hw_breakpoint > 0) { dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; dbg->arch.debugreg[7] = 0x0600; @@ -1673,18 +1738,16 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) dbg->arch.debugreg[n] = hw_breakpoint[n].addr; dbg->arch.debugreg[7] |= (2 << (n * 2)) | (type_code[hw_breakpoint[n].type] << (16 + n*4)) | - (len_code[hw_breakpoint[n].len] << (18 + n*4)); + ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4)); } } - /* Legal xcr0 for loading */ - env->xcr0 = 1; } #endif /* KVM_CAP_SET_GUEST_DEBUG */ bool kvm_arch_stop_on_emulation_error(CPUState *env) { - return !(env->cr[0] & CR0_PE_MASK) || - ((env->segs[R_CS].selector & 3) != 3); + return !(env->cr[0] & CR0_PE_MASK) || + ((env->segs[R_CS].selector & 3) != 3); } static void hardware_memory_error(void) @@ -1696,64 +1759,96 @@ static void hardware_memory_error(void) #ifdef KVM_CAP_MCE static void kvm_mce_broadcast_rest(CPUState *env) { + struct kvm_x86_mce mce = { + .bank = 1, + .status = MCI_STATUS_VAL | MCI_STATUS_UC, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, + .addr = 0, + .misc = 0, + }; CPUState *cenv; - int family, model, cpuver = env->cpuid_version; - - family = (cpuver >> 8) & 0xf; - model = ((cpuver >> 12) & 0xf0) + ((cpuver >> 4) & 0xf); /* Broadcast MCA signal for processor version 06H_EH and above */ - if ((family == 6 && model >= 14) || family > 6) { + if (cpu_x86_support_mca_broadcast(env)) { for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) { if (cenv == env) { continue; } - kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC, - MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1); + kvm_inject_x86_mce_on(cenv, &mce, ABORT_ON_ERROR); } } } + +static void kvm_mce_inj_srar_dataload(CPUState *env, target_phys_addr_t paddr) +{ + struct kvm_x86_mce mce = { + .bank = 9, + .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | MCI_STATUS_AR | 0x134, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV, + .addr = paddr, + .misc = (MCM_ADDR_PHYS << 6) | 0xc, + }; + int r; + + r = kvm_set_mce(env, &mce); + if (r < 0) { + fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); + abort(); + } + kvm_mce_broadcast_rest(env); +} + +static void kvm_mce_inj_srao_memscrub(CPUState *env, target_phys_addr_t paddr) +{ + struct kvm_x86_mce mce = { + .bank = 9, + .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | 0xc0, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, + .addr = paddr, + .misc = (MCM_ADDR_PHYS << 6) | 0xc, + }; + int r; + + r = kvm_set_mce(env, &mce); + if (r < 0) { + fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); + abort(); + } + kvm_mce_broadcast_rest(env); +} + +static void kvm_mce_inj_srao_memscrub2(CPUState *env, target_phys_addr_t paddr) +{ + struct kvm_x86_mce mce = { + .bank = 9, + .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | 0xc0, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, + .addr = paddr, + .misc = (MCM_ADDR_PHYS << 6) | 0xc, + }; + + kvm_inject_x86_mce_on(env, &mce, ABORT_ON_ERROR); + kvm_mce_broadcast_rest(env); +} + #endif int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) { #if defined(KVM_CAP_MCE) - struct kvm_x86_mce mce = { - .bank = 9, - }; void *vaddr; ram_addr_t ram_addr; target_phys_addr_t paddr; - int r; if ((env->mcg_cap & MCG_SER_P) && addr && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) { - if (code == BUS_MCEERR_AR) { - /* Fake an Intel architectural Data Load SRAR UCR */ - mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | MCI_STATUS_AR | 0x134; - mce.misc = (MCM_ADDR_PHYS << 6) | 0xc; - mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV; - } else { - /* - * If there is an MCE excpetion being processed, ignore - * this SRAO MCE - */ - r = kvm_mce_in_exception(env); - if (r == -1) { - fprintf(stderr, "Failed to get MCE status\n"); - } else if (r) { - return 0; - } - /* Fake an Intel architectural Memory scrubbing UCR */ - mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | 0xc0; - mce.misc = (MCM_ADDR_PHYS << 6) | 0xc; - mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV; - } vaddr = (void *)addr; if (qemu_ram_addr_from_host(vaddr, &ram_addr) || !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) { @@ -1766,13 +1861,20 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) hardware_memory_error(); } } - mce.addr = paddr; - r = kvm_set_mce(env, &mce); - if (r < 0) { - fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); - abort(); + + if (code == BUS_MCEERR_AR) { + /* Fake an Intel architectural Data Load SRAR UCR */ + kvm_mce_inj_srar_dataload(env, paddr); + } else { + /* + * If there is an MCE excpetion being processed, ignore + * this SRAO MCE + */ + if (!kvm_mce_in_progress(env)) { + /* Fake an Intel architectural Memory scrubbing UCR */ + kvm_mce_inj_srao_memscrub(env, paddr); + } } - kvm_mce_broadcast_rest(env); } else #endif { @@ -1791,7 +1893,6 @@ int kvm_on_sigbus(int code, void *addr) { #if defined(KVM_CAP_MCE) if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) { - uint64_t status; void *vaddr; ram_addr_t ram_addr; target_phys_addr_t paddr; @@ -1804,13 +1905,7 @@ int kvm_on_sigbus(int code, void *addr) "QEMU itself instead of guest system!: %p\n", addr); return 0; } - status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | 0xc0; - kvm_inject_x86_mce(first_cpu, 9, status, - MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr, - (MCM_ADDR_PHYS << 6) | 0xc, 1); - kvm_mce_broadcast_rest(first_cpu); + kvm_mce_inj_srao_memscrub2(first_cpu, paddr); } else #endif { diff --git a/target-i386/kvm_x86.h b/target-i386/kvm_x86.h index 04932cf4c8..9d7b584267 100644 --- a/target-i386/kvm_x86.h +++ b/target-i386/kvm_x86.h @@ -15,8 +15,11 @@ #ifndef __KVM_X86_H__ #define __KVM_X86_H__ +#define ABORT_ON_ERROR 0x01 +#define MCE_BROADCAST 0x02 + void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, - int abort_on_error); + int flag); #endif diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 5caa07cba8..710eca1dca 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -37,6 +37,10 @@ do { } while (0) #endif +const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_LAST_INFO +}; + static int cap_interrupt_unset = false; static int cap_interrupt_level = false; @@ -56,7 +60,7 @@ static void kvm_kick_env(void *env) qemu_cpu_kick(env); } -int kvm_arch_init(KVMState *s, int smp_cpus) +int kvm_arch_init(KVMState *s) { #ifdef KVM_CAP_PPC_UNSET_IRQ cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); @@ -307,6 +311,10 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) dprintf("handle halt\n"); ret = kvmppc_handle_halt(env); break; + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; + break; } return ret; diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index adf4a9e1e5..38823f54f7 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -70,7 +70,11 @@ #define SCLP_CMDW_READ_SCP_INFO 0x00020001 #define SCLP_CMDW_READ_SCP_INFO_FORCED 0x00120001 -int kvm_arch_init(KVMState *s, int smp_cpus) +const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_LAST_INFO +}; + +int kvm_arch_init(KVMState *s) { return 0; } @@ -2860,7 +2860,7 @@ int main(int argc, char **argv, char **envp) } if (kvm_allowed) { - int ret = kvm_init(smp_cpus); + int ret = kvm_init(); if (ret < 0) { if (!kvm_available()) { printf("KVM not supported for this target\n"); |