2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
50 #include "sysemu/kvm_int.h"
55 #define DPRINTF(fmt, ...) \
56 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #define DPRINTF(fmt, ...) \
62 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
68 static int cap_interrupt_unset = false;
69 static int cap_interrupt_level = false;
70 static int cap_segstate;
71 static int cap_booke_sregs;
72 static int cap_ppc_smt;
73 static int cap_ppc_smt_possible;
74 static int cap_spapr_tce;
75 static int cap_spapr_tce_64;
76 static int cap_spapr_multitce;
77 static int cap_spapr_vfio;
79 static int cap_one_reg;
81 static int cap_ppc_watchdog;
83 static int cap_htab_fd;
84 static int cap_fixup_hcalls;
85 static int cap_htm; /* Hardware transactional memory support */
86 static int cap_mmu_radix;
87 static int cap_mmu_hash_v3;
88 static int cap_resize_hpt;
89 static int cap_ppc_pvr_compat;
90 static int cap_ppc_safe_cache;
91 static int cap_ppc_safe_bounds_check;
92 static int cap_ppc_safe_indirect_branch;
93 static int cap_ppc_count_cache_flush_assist;
94 static int cap_ppc_nested_kvm_hv;
95 static int cap_large_decr;
97 static uint32_t debug_inst_opcode;
99 /* XXX We have a race condition where we actually have a level triggered
100 * interrupt, but the infrastructure can't expose that yet, so the guest
101 * takes but ignores it, goes to sleep and never gets notified that there's
102 * still an interrupt pending.
104 * As a quick workaround, let's just wake up again 20 ms after we injected
105 * an interrupt. That way we can assure that we're always reinjecting
106 * interrupts in case the guest swallowed them.
108 static QEMUTimer *idle_timer;
110 static void kvm_kick_cpu(void *opaque)
112 PowerPCCPU *cpu = opaque;
114 qemu_cpu_kick(CPU(cpu));
117 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
118 * should only be used for fallback tests - generally we should use
119 * explicit capabilities for the features we want, rather than
120 * assuming what is/isn't available depending on the KVM variant. */
121 static bool kvmppc_is_pr(KVMState *ks)
123 /* Assume KVM-PR if the GET_PVINFO capability is available */
124 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
127 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
128 static void kvmppc_get_cpu_characteristics(KVMState *s);
129 static int kvmppc_get_dec_bits(void);
131 int kvm_arch_init(MachineState *ms, KVMState *s)
133 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
134 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
135 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
136 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
137 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
138 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
139 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
140 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
141 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
142 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
143 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
144 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
145 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
146 /* Note: we don't set cap_papr here, because this capability is
147 * only activated after this by kvmppc_set_papr() */
148 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
149 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
150 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
151 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
152 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
153 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
154 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
155 kvmppc_get_cpu_characteristics(s);
156 cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
157 cap_large_decr = kvmppc_get_dec_bits();
159 * Note: setting it to false because there is not such capability
160 * in KVM at this moment.
162 * TODO: call kvm_vm_check_extension() with the right capability
163 * after the kernel starts implementing it.*/
164 cap_ppc_pvr_compat = false;
166 if (!cap_interrupt_level) {
167 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
168 "VM to stall at times!\n");
171 kvm_ppc_register_host_cpu_type(ms);
176 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
181 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
183 CPUPPCState *cenv = &cpu->env;
184 CPUState *cs = CPU(cpu);
185 struct kvm_sregs sregs;
188 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
189 /* What we're really trying to say is "if we're on BookE, we use
190 the native PVR for now". This is the only sane way to check
191 it though, so we potentially confuse users that they can run
192 BookE guests on BookS. Let's hope nobody dares enough :) */
196 fprintf(stderr, "kvm error: missing PVR setting capability\n");
201 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
206 sregs.pvr = cenv->spr[SPR_PVR];
207 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
210 /* Set up a shared TLB array with KVM */
211 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
213 CPUPPCState *env = &cpu->env;
214 CPUState *cs = CPU(cpu);
215 struct kvm_book3e_206_tlb_params params = {};
216 struct kvm_config_tlb cfg = {};
217 unsigned int entries = 0;
220 if (!kvm_enabled() ||
221 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
225 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
227 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
228 params.tlb_sizes[i] = booke206_tlb_size(env, i);
229 params.tlb_ways[i] = booke206_tlb_ways(env, i);
230 entries += params.tlb_sizes[i];
233 assert(entries == env->nb_tlb);
234 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
236 env->tlb_dirty = true;
238 cfg.array = (uintptr_t)env->tlb.tlbm;
239 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
240 cfg.params = (uintptr_t)¶ms;
241 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
243 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
245 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
246 __func__, strerror(-ret));
250 env->kvm_sw_tlb = true;
255 #if defined(TARGET_PPC64)
256 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
260 assert(kvm_state != NULL);
262 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
263 error_setg(errp, "KVM doesn't expose the MMU features it supports");
264 error_append_hint(errp, "Consider switching to a newer KVM\n");
268 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
273 error_setg_errno(errp, -ret,
274 "KVM failed to provide the MMU features it supports");
277 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
279 KVMState *s = KVM_STATE(current_machine->accelerator);
280 struct ppc_radix_page_info *radix_page_info;
281 struct kvm_ppc_rmmu_info rmmu_info;
284 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
287 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
290 radix_page_info = g_malloc0(sizeof(*radix_page_info));
291 radix_page_info->count = 0;
292 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
293 if (rmmu_info.ap_encodings[i]) {
294 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
295 radix_page_info->count++;
298 return radix_page_info;
301 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
302 bool radix, bool gtse,
305 CPUState *cs = CPU(cpu);
308 struct kvm_ppc_mmuv3_cfg cfg = {
309 .process_table = proc_tbl,
313 flags |= KVM_PPC_MMUV3_RADIX;
316 flags |= KVM_PPC_MMUV3_GTSE;
319 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
326 return H_NOT_AVAILABLE;
332 bool kvmppc_hpt_needs_host_contiguous_pages(void)
334 static struct kvm_ppc_smmu_info smmu_info;
336 if (!kvm_enabled()) {
340 kvm_get_smmu_info(&smmu_info, &error_fatal);
341 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
344 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
346 struct kvm_ppc_smmu_info smmu_info;
348 Error *local_err = NULL;
350 /* For now, we only have anything to check on hash64 MMUs */
351 if (!cpu->hash64_opts || !kvm_enabled()) {
355 kvm_get_smmu_info(&smmu_info, &local_err);
357 error_propagate(errp, local_err);
361 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
362 && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
364 "KVM does not support 1TiB segments which guest expects");
368 if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
369 error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
370 smmu_info.slb_size, cpu->hash64_opts->slb_size);
375 * Verify that every pagesize supported by the cpu model is
376 * supported by KVM with the same encodings
378 for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
379 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
380 struct kvm_ppc_one_seg_page_size *ksps;
382 for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
383 if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
387 if (ik >= ARRAY_SIZE(smmu_info.sps)) {
388 error_setg(errp, "KVM doesn't support for base page shift %u",
393 ksps = &smmu_info.sps[ik];
394 if (ksps->slb_enc != qsps->slb_enc) {
396 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
397 ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
401 for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
402 for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
403 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
408 if (jk >= ARRAY_SIZE(ksps->enc)) {
409 error_setg(errp, "KVM doesn't support page shift %u/%u",
410 qsps->enc[jq].page_shift, qsps->page_shift);
413 if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
415 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
416 ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
417 qsps->page_shift, qsps->enc[jq].pte_enc);
423 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
424 /* Mostly what guest pagesizes we can use are related to the
425 * host pages used to map guest RAM, which is handled in the
426 * platform code. Cache-Inhibited largepages (64k) however are
427 * used for I/O, so if they're mapped to the host at all it
428 * will be a normal mapping, not a special hugepage one used
430 if (getpagesize() < 0x10000) {
432 "KVM can't supply 64kiB CI pages, which guest expects");
436 #endif /* !defined (TARGET_PPC64) */
438 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
440 return POWERPC_CPU(cpu)->vcpu_id;
443 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
444 * book3s supports only 1 watchpoint, so array size
445 * of 4 is sufficient for now.
447 #define MAX_HW_BKPTS 4
449 static struct HWBreakpoint {
452 } hw_debug_points[MAX_HW_BKPTS];
454 static CPUWatchpoint hw_watchpoint;
456 /* Default there is no breakpoint and watchpoint supported */
457 static int max_hw_breakpoint;
458 static int max_hw_watchpoint;
459 static int nb_hw_breakpoint;
460 static int nb_hw_watchpoint;
462 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
464 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
465 max_hw_breakpoint = 2;
466 max_hw_watchpoint = 2;
469 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
470 fprintf(stderr, "Error initializing h/w breakpoints\n");
475 int kvm_arch_init_vcpu(CPUState *cs)
477 PowerPCCPU *cpu = POWERPC_CPU(cs);
478 CPUPPCState *cenv = &cpu->env;
481 /* Synchronize sregs with kvm */
482 ret = kvm_arch_sync_sregs(cpu);
484 if (ret == -EINVAL) {
485 error_report("Register sync failed... If you're using kvm-hv.ko,"
486 " only \"-cpu host\" is possible");
491 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
493 switch (cenv->mmu_model) {
494 case POWERPC_MMU_BOOKE206:
495 /* This target supports access to KVM's guest TLB */
496 ret = kvm_booke206_tlb_init(cpu);
498 case POWERPC_MMU_2_07:
499 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
500 /* KVM-HV has transactional memory on POWER8 also without the
501 * KVM_CAP_PPC_HTM extension, so enable it here instead as
502 * long as it's availble to userspace on the host. */
503 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
512 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
513 kvmppc_hw_debug_points_init(cenv);
518 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
520 CPUPPCState *env = &cpu->env;
521 CPUState *cs = CPU(cpu);
522 struct kvm_dirty_tlb dirty_tlb;
523 unsigned char *bitmap;
526 if (!env->kvm_sw_tlb) {
530 bitmap = g_malloc((env->nb_tlb + 7) / 8);
531 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
533 dirty_tlb.bitmap = (uintptr_t)bitmap;
534 dirty_tlb.num_dirty = env->nb_tlb;
536 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
538 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
539 __func__, strerror(-ret));
545 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
547 PowerPCCPU *cpu = POWERPC_CPU(cs);
548 CPUPPCState *env = &cpu->env;
553 struct kvm_one_reg reg = {
555 .addr = (uintptr_t) &val,
559 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
561 trace_kvm_failed_spr_get(spr, strerror(errno));
563 switch (id & KVM_REG_SIZE_MASK) {
564 case KVM_REG_SIZE_U32:
565 env->spr[spr] = val.u32;
568 case KVM_REG_SIZE_U64:
569 env->spr[spr] = val.u64;
573 /* Don't handle this size yet */
579 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
581 PowerPCCPU *cpu = POWERPC_CPU(cs);
582 CPUPPCState *env = &cpu->env;
587 struct kvm_one_reg reg = {
589 .addr = (uintptr_t) &val,
593 switch (id & KVM_REG_SIZE_MASK) {
594 case KVM_REG_SIZE_U32:
595 val.u32 = env->spr[spr];
598 case KVM_REG_SIZE_U64:
599 val.u64 = env->spr[spr];
603 /* Don't handle this size yet */
607 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
609 trace_kvm_failed_spr_set(spr, strerror(errno));
613 static int kvm_put_fp(CPUState *cs)
615 PowerPCCPU *cpu = POWERPC_CPU(cs);
616 CPUPPCState *env = &cpu->env;
617 struct kvm_one_reg reg;
621 if (env->insns_flags & PPC_FLOAT) {
622 uint64_t fpscr = env->fpscr;
623 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
625 reg.id = KVM_REG_PPC_FPSCR;
626 reg.addr = (uintptr_t)&fpscr;
627 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
629 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
633 for (i = 0; i < 32; i++) {
635 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
636 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
638 #ifdef HOST_WORDS_BIGENDIAN
639 vsr[0] = float64_val(*fpr);
643 vsr[1] = float64_val(*fpr);
645 reg.addr = (uintptr_t) &vsr;
646 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
648 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
650 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
657 if (env->insns_flags & PPC_ALTIVEC) {
658 reg.id = KVM_REG_PPC_VSCR;
659 reg.addr = (uintptr_t)&env->vscr;
660 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
662 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
666 for (i = 0; i < 32; i++) {
667 reg.id = KVM_REG_PPC_VR(i);
668 reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
669 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
671 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
680 static int kvm_get_fp(CPUState *cs)
682 PowerPCCPU *cpu = POWERPC_CPU(cs);
683 CPUPPCState *env = &cpu->env;
684 struct kvm_one_reg reg;
688 if (env->insns_flags & PPC_FLOAT) {
690 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
692 reg.id = KVM_REG_PPC_FPSCR;
693 reg.addr = (uintptr_t)&fpscr;
694 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
696 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
702 for (i = 0; i < 32; i++) {
704 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
705 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
707 reg.addr = (uintptr_t) &vsr;
708 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
710 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
712 DPRINTF("Unable to get %s%d from KVM: %s\n",
713 vsx ? "VSR" : "FPR", i, strerror(errno));
716 #ifdef HOST_WORDS_BIGENDIAN
731 if (env->insns_flags & PPC_ALTIVEC) {
732 reg.id = KVM_REG_PPC_VSCR;
733 reg.addr = (uintptr_t)&env->vscr;
734 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
736 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
740 for (i = 0; i < 32; i++) {
741 reg.id = KVM_REG_PPC_VR(i);
742 reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
743 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
745 DPRINTF("Unable to get VR%d from KVM: %s\n",
755 #if defined(TARGET_PPC64)
756 static int kvm_get_vpa(CPUState *cs)
758 PowerPCCPU *cpu = POWERPC_CPU(cs);
759 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
760 struct kvm_one_reg reg;
763 reg.id = KVM_REG_PPC_VPA_ADDR;
764 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
765 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
767 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
771 assert((uintptr_t)&spapr_cpu->slb_shadow_size
772 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
773 reg.id = KVM_REG_PPC_VPA_SLB;
774 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
775 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
777 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
782 assert((uintptr_t)&spapr_cpu->dtl_size
783 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
784 reg.id = KVM_REG_PPC_VPA_DTL;
785 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
786 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
788 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
796 static int kvm_put_vpa(CPUState *cs)
798 PowerPCCPU *cpu = POWERPC_CPU(cs);
799 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
800 struct kvm_one_reg reg;
803 /* SLB shadow or DTL can't be registered unless a master VPA is
804 * registered. That means when restoring state, if a VPA *is*
805 * registered, we need to set that up first. If not, we need to
806 * deregister the others before deregistering the master VPA */
807 assert(spapr_cpu->vpa_addr
808 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
810 if (spapr_cpu->vpa_addr) {
811 reg.id = KVM_REG_PPC_VPA_ADDR;
812 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
813 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
815 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
820 assert((uintptr_t)&spapr_cpu->slb_shadow_size
821 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
822 reg.id = KVM_REG_PPC_VPA_SLB;
823 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
824 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
826 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
830 assert((uintptr_t)&spapr_cpu->dtl_size
831 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
832 reg.id = KVM_REG_PPC_VPA_DTL;
833 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
834 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
836 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
841 if (!spapr_cpu->vpa_addr) {
842 reg.id = KVM_REG_PPC_VPA_ADDR;
843 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
844 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
846 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
853 #endif /* TARGET_PPC64 */
855 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
857 CPUPPCState *env = &cpu->env;
858 struct kvm_sregs sregs;
861 sregs.pvr = env->spr[SPR_PVR];
864 PPCVirtualHypervisorClass *vhc =
865 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
866 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
868 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
873 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
874 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
875 if (env->slb[i].esid & SLB_ESID_V) {
876 sregs.u.s.ppc64.slb[i].slbe |= i;
878 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
883 for (i = 0; i < 16; i++) {
884 sregs.u.s.ppc32.sr[i] = env->sr[i];
888 for (i = 0; i < 8; i++) {
889 /* Beware. We have to swap upper and lower bits here */
890 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
892 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
896 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
899 int kvm_arch_put_registers(CPUState *cs, int level)
901 PowerPCCPU *cpu = POWERPC_CPU(cs);
902 CPUPPCState *env = &cpu->env;
903 struct kvm_regs regs;
907 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
914 regs.xer = cpu_read_xer(env);
918 regs.srr0 = env->spr[SPR_SRR0];
919 regs.srr1 = env->spr[SPR_SRR1];
921 regs.sprg0 = env->spr[SPR_SPRG0];
922 regs.sprg1 = env->spr[SPR_SPRG1];
923 regs.sprg2 = env->spr[SPR_SPRG2];
924 regs.sprg3 = env->spr[SPR_SPRG3];
925 regs.sprg4 = env->spr[SPR_SPRG4];
926 regs.sprg5 = env->spr[SPR_SPRG5];
927 regs.sprg6 = env->spr[SPR_SPRG6];
928 regs.sprg7 = env->spr[SPR_SPRG7];
930 regs.pid = env->spr[SPR_BOOKE_PID];
932 for (i = 0;i < 32; i++)
933 regs.gpr[i] = env->gpr[i];
936 for (i = 0; i < 8; i++) {
937 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
940 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s);
946 if (env->tlb_dirty) {
948 env->tlb_dirty = false;
951 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
952 ret = kvmppc_put_books_sregs(cpu);
958 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
959 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
965 /* We deliberately ignore errors here, for kernels which have
966 * the ONE_REG calls, but don't support the specific
967 * registers, there's a reasonable chance things will still
968 * work, at least until we try to migrate. */
969 for (i = 0; i < 1024; i++) {
970 uint64_t id = env->spr_cb[i].one_reg_id;
973 kvm_put_one_spr(cs, id, i);
979 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
980 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
982 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
983 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
986 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
987 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
988 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
989 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
990 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
991 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
992 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
993 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
994 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
998 if (kvm_put_vpa(cs) < 0) {
999 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1003 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1004 #endif /* TARGET_PPC64 */
1010 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1012 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1015 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1017 CPUPPCState *env = &cpu->env;
1018 struct kvm_sregs sregs;
1021 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1026 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1027 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1028 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1029 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1030 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1031 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1032 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1033 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1034 env->spr[SPR_DECR] = sregs.u.e.dec;
1035 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1036 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1037 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1040 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1041 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1042 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1043 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1044 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1045 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1048 if (sregs.u.e.features & KVM_SREGS_E_64) {
1049 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1052 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1053 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1056 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1057 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1058 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1059 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1060 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1061 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1062 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1063 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1064 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1065 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1066 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1067 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1068 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1069 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1070 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1071 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1072 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1073 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1074 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1075 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1076 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1077 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1078 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1079 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1080 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1081 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1082 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1083 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1084 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1085 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1086 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1087 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1088 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1090 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1091 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1092 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1093 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1094 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1095 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1096 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1099 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1100 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1101 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1104 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1105 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1106 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1107 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1108 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1112 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1113 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1114 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1115 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1116 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1117 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1118 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1119 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1120 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1121 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1122 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1125 if (sregs.u.e.features & KVM_SREGS_EXP) {
1126 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1129 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1130 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1131 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1134 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1135 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1136 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1137 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1139 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1140 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1141 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1148 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1150 CPUPPCState *env = &cpu->env;
1151 struct kvm_sregs sregs;
1155 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1161 ppc_store_sdr1(env, sregs.u.s.sdr1);
1167 * The packed SLB array we get from KVM_GET_SREGS only contains
1168 * information about valid entries. So we flush our internal copy
1169 * to get rid of stale ones, then put all valid SLB entries back
1172 memset(env->slb, 0, sizeof(env->slb));
1173 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1174 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1175 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1177 * Only restore valid entries
1179 if (rb & SLB_ESID_V) {
1180 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1186 for (i = 0; i < 16; i++) {
1187 env->sr[i] = sregs.u.s.ppc32.sr[i];
1191 for (i = 0; i < 8; i++) {
1192 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1193 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1194 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1195 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1201 int kvm_arch_get_registers(CPUState *cs)
1203 PowerPCCPU *cpu = POWERPC_CPU(cs);
1204 CPUPPCState *env = &cpu->env;
1205 struct kvm_regs regs;
1209 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
1214 for (i = 7; i >= 0; i--) {
1215 env->crf[i] = cr & 15;
1219 env->ctr = regs.ctr;
1221 cpu_write_xer(env, regs.xer);
1222 env->msr = regs.msr;
1225 env->spr[SPR_SRR0] = regs.srr0;
1226 env->spr[SPR_SRR1] = regs.srr1;
1228 env->spr[SPR_SPRG0] = regs.sprg0;
1229 env->spr[SPR_SPRG1] = regs.sprg1;
1230 env->spr[SPR_SPRG2] = regs.sprg2;
1231 env->spr[SPR_SPRG3] = regs.sprg3;
1232 env->spr[SPR_SPRG4] = regs.sprg4;
1233 env->spr[SPR_SPRG5] = regs.sprg5;
1234 env->spr[SPR_SPRG6] = regs.sprg6;
1235 env->spr[SPR_SPRG7] = regs.sprg7;
1237 env->spr[SPR_BOOKE_PID] = regs.pid;
1239 for (i = 0;i < 32; i++)
1240 env->gpr[i] = regs.gpr[i];
1244 if (cap_booke_sregs) {
1245 ret = kvmppc_get_booke_sregs(cpu);
1252 ret = kvmppc_get_books_sregs(cpu);
1259 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1265 /* We deliberately ignore errors here, for kernels which have
1266 * the ONE_REG calls, but don't support the specific
1267 * registers, there's a reasonable chance things will still
1268 * work, at least until we try to migrate. */
1269 for (i = 0; i < 1024; i++) {
1270 uint64_t id = env->spr_cb[i].one_reg_id;
1273 kvm_get_one_spr(cs, id, i);
1279 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1280 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1282 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1283 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1285 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1286 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1287 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1288 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1289 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1290 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1291 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1292 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1293 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1294 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1298 if (kvm_get_vpa(cs) < 0) {
1299 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1303 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1310 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1312 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1314 if (irq != PPC_INTERRUPT_EXT) {
1318 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1322 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1327 #if defined(TARGET_PPC64)
1328 #define PPC_INPUT_INT PPC970_INPUT_INT
1330 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1333 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1335 PowerPCCPU *cpu = POWERPC_CPU(cs);
1336 CPUPPCState *env = &cpu->env;
1340 qemu_mutex_lock_iothread();
1342 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1343 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1344 if (!cap_interrupt_level &&
1345 run->ready_for_interrupt_injection &&
1346 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1347 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1349 /* For now KVM disregards the 'irq' argument. However, in the
1350 * future KVM could cache it in-kernel to avoid a heavyweight exit
1351 * when reading the UIC.
1353 irq = KVM_INTERRUPT_SET;
1355 DPRINTF("injected interrupt %d\n", irq);
1356 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1358 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1361 /* Always wake up soon in case the interrupt was level based */
1362 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1363 (NANOSECONDS_PER_SECOND / 50));
1366 /* We don't know if there are more interrupts pending after this. However,
1367 * the guest will return to userspace in the course of handling this one
1368 * anyways, so we will get a chance to deliver the rest. */
1370 qemu_mutex_unlock_iothread();
1373 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1375 return MEMTXATTRS_UNSPECIFIED;
1378 int kvm_arch_process_async_events(CPUState *cs)
1383 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1385 CPUState *cs = CPU(cpu);
1386 CPUPPCState *env = &cpu->env;
1388 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1390 cs->exception_index = EXCP_HLT;
1396 /* map dcr access to existing qemu dcr emulation */
1397 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1399 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1400 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1405 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1407 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1408 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1413 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1415 /* Mixed endian case is not handled */
1416 uint32_t sc = debug_inst_opcode;
1418 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1420 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1427 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1431 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1432 sc != debug_inst_opcode ||
1433 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1441 static int find_hw_breakpoint(target_ulong addr, int type)
1445 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1446 <= ARRAY_SIZE(hw_debug_points));
1448 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1449 if (hw_debug_points[n].addr == addr &&
1450 hw_debug_points[n].type == type) {
1458 static int find_hw_watchpoint(target_ulong addr, int *flag)
1462 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1464 *flag = BP_MEM_ACCESS;
1468 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1470 *flag = BP_MEM_WRITE;
1474 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1476 *flag = BP_MEM_READ;
1483 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1484 target_ulong len, int type)
1486 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1490 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1491 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1494 case GDB_BREAKPOINT_HW:
1495 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1499 if (find_hw_breakpoint(addr, type) >= 0) {
1506 case GDB_WATCHPOINT_WRITE:
1507 case GDB_WATCHPOINT_READ:
1508 case GDB_WATCHPOINT_ACCESS:
1509 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1513 if (find_hw_breakpoint(addr, type) >= 0) {
1527 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1528 target_ulong len, int type)
1532 n = find_hw_breakpoint(addr, type);
1538 case GDB_BREAKPOINT_HW:
1542 case GDB_WATCHPOINT_WRITE:
1543 case GDB_WATCHPOINT_READ:
1544 case GDB_WATCHPOINT_ACCESS:
1551 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1556 void kvm_arch_remove_all_hw_breakpoints(void)
1558 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1561 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1565 /* Software Breakpoint updates */
1566 if (kvm_sw_breakpoints_active(cs)) {
1567 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1570 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1571 <= ARRAY_SIZE(hw_debug_points));
1572 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1574 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1575 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1576 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1577 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1578 switch (hw_debug_points[n].type) {
1579 case GDB_BREAKPOINT_HW:
1580 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1582 case GDB_WATCHPOINT_WRITE:
1583 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1585 case GDB_WATCHPOINT_READ:
1586 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1588 case GDB_WATCHPOINT_ACCESS:
1589 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1590 KVMPPC_DEBUG_WATCH_READ;
1593 cpu_abort(cs, "Unsupported breakpoint type\n");
1595 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1600 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1602 CPUState *cs = CPU(cpu);
1603 CPUPPCState *env = &cpu->env;
1604 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1609 if (cs->singlestep_enabled) {
1611 } else if (arch_info->status) {
1612 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1613 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1614 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1618 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1619 KVMPPC_DEBUG_WATCH_WRITE)) {
1620 n = find_hw_watchpoint(arch_info->address, &flag);
1623 cs->watchpoint_hit = &hw_watchpoint;
1624 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1625 hw_watchpoint.flags = flag;
1629 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1632 /* QEMU is not able to handle debug exception, so inject
1633 * program exception to guest;
1634 * Yes program exception NOT debug exception !!
1635 * When QEMU is using debug resources then debug exception must
1636 * be always set. To achieve this we set MSR_DE and also set
1637 * MSRP_DEP so guest cannot change MSR_DE.
1638 * When emulating debug resource for guest we want guest
1639 * to control MSR_DE (enable/disable debug interrupt on need).
1640 * Supporting both configurations are NOT possible.
1641 * So the result is that we cannot share debug resources
1642 * between QEMU and Guest on BOOKE architecture.
1643 * In the current design QEMU gets the priority over guest,
1644 * this means that if QEMU is using debug resources then guest
1646 * For software breakpoint QEMU uses a privileged instruction;
1647 * So there cannot be any reason that we are here for guest
1648 * set debug exception, only possibility is guest executed a
1649 * privileged / illegal instruction and that's why we are
1650 * injecting a program interrupt.
1653 cpu_synchronize_state(cs);
1654 /* env->nip is PC, so increment this by 4 to use
1655 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1658 cs->exception_index = POWERPC_EXCP_PROGRAM;
1659 env->error_code = POWERPC_EXCP_INVAL;
1660 ppc_cpu_do_interrupt(cs);
1666 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1668 PowerPCCPU *cpu = POWERPC_CPU(cs);
1669 CPUPPCState *env = &cpu->env;
1672 qemu_mutex_lock_iothread();
1674 switch (run->exit_reason) {
1676 if (run->dcr.is_write) {
1677 DPRINTF("handle dcr write\n");
1678 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1680 DPRINTF("handle dcr read\n");
1681 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1685 DPRINTF("handle halt\n");
1686 ret = kvmppc_handle_halt(cpu);
1688 #if defined(TARGET_PPC64)
1689 case KVM_EXIT_PAPR_HCALL:
1690 DPRINTF("handle PAPR hypercall\n");
1691 run->papr_hcall.ret = spapr_hypercall(cpu,
1693 run->papr_hcall.args);
1698 DPRINTF("handle epr\n");
1699 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1702 case KVM_EXIT_WATCHDOG:
1703 DPRINTF("handle watchdog expiry\n");
1704 watchdog_perform_action();
1708 case KVM_EXIT_DEBUG:
1709 DPRINTF("handle debug exception\n");
1710 if (kvm_handle_debug(cpu, run)) {
1714 /* re-enter, this exception was guest-internal */
1719 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1724 qemu_mutex_unlock_iothread();
1728 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1730 CPUState *cs = CPU(cpu);
1731 uint32_t bits = tsr_bits;
1732 struct kvm_one_reg reg = {
1733 .id = KVM_REG_PPC_OR_TSR,
1734 .addr = (uintptr_t) &bits,
1737 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1740 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1743 CPUState *cs = CPU(cpu);
1744 uint32_t bits = tsr_bits;
1745 struct kvm_one_reg reg = {
1746 .id = KVM_REG_PPC_CLEAR_TSR,
1747 .addr = (uintptr_t) &bits,
1750 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1753 int kvmppc_set_tcr(PowerPCCPU *cpu)
1755 CPUState *cs = CPU(cpu);
1756 CPUPPCState *env = &cpu->env;
1757 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1759 struct kvm_one_reg reg = {
1760 .id = KVM_REG_PPC_TCR,
1761 .addr = (uintptr_t) &tcr,
1764 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1767 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1769 CPUState *cs = CPU(cpu);
1772 if (!kvm_enabled()) {
1776 if (!cap_ppc_watchdog) {
1777 printf("warning: KVM does not support watchdog");
1781 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1783 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1784 __func__, strerror(-ret));
1791 static int read_cpuinfo(const char *field, char *value, int len)
1795 int field_len = strlen(field);
1798 f = fopen("/proc/cpuinfo", "r");
1804 if (!fgets(line, sizeof(line), f)) {
1807 if (!strncmp(line, field, field_len)) {
1808 pstrcpy(value, len, line);
1819 uint32_t kvmppc_get_tbfreq(void)
1823 uint32_t retval = NANOSECONDS_PER_SECOND;
1825 if (read_cpuinfo("timebase", line, sizeof(line))) {
1829 if (!(ns = strchr(line, ':'))) {
1838 bool kvmppc_get_host_serial(char **value)
1840 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1844 bool kvmppc_get_host_model(char **value)
1846 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1849 /* Try to find a device tree node for a CPU with clock-frequency property */
1850 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1852 struct dirent *dirp;
1855 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1856 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1861 while ((dirp = readdir(dp)) != NULL) {
1863 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1865 f = fopen(buf, "r");
1867 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1874 if (buf[0] == '\0') {
1875 printf("Unknown host!\n");
1882 static uint64_t kvmppc_read_int_dt(const char *filename)
1891 f = fopen(filename, "rb");
1896 len = fread(&u, 1, sizeof(u), f);
1900 /* property is a 32-bit quantity */
1901 return be32_to_cpu(u.v32);
1903 return be64_to_cpu(u.v64);
1909 /* Read a CPU node property from the host device tree that's a single
1910 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1911 * (can't find or open the property, or doesn't understand the
1913 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1915 char buf[PATH_MAX], *tmp;
1918 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1922 tmp = g_strdup_printf("%s/%s", buf, propname);
1923 val = kvmppc_read_int_dt(tmp);
1929 uint64_t kvmppc_get_clockfreq(void)
1931 return kvmppc_read_int_cpu_dt("clock-frequency");
1934 static int kvmppc_get_dec_bits(void)
1936 int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits");
1944 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1946 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1947 CPUState *cs = CPU(cpu);
1949 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1950 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1957 int kvmppc_get_hasidle(CPUPPCState *env)
1959 struct kvm_ppc_pvinfo pvinfo;
1961 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1962 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1969 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1971 uint32_t *hc = (uint32_t*)buf;
1972 struct kvm_ppc_pvinfo pvinfo;
1974 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1975 memcpy(buf, pvinfo.hcall, buf_len);
1980 * Fallback to always fail hypercalls regardless of endianness:
1982 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1984 * b .+8 (becomes nop in wrong endian)
1985 * bswap32(li r3, -1)
1988 hc[0] = cpu_to_be32(0x08000048);
1989 hc[1] = cpu_to_be32(0x3860ffff);
1990 hc[2] = cpu_to_be32(0x48000008);
1991 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1996 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1998 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2001 void kvmppc_enable_logical_ci_hcalls(void)
2004 * FIXME: it would be nice if we could detect the cases where
2005 * we're using a device which requires the in kernel
2006 * implementation of these hcalls, but the kernel lacks them and
2007 * produce a warning.
2009 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2010 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2013 void kvmppc_enable_set_mode_hcall(void)
2015 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2018 void kvmppc_enable_clear_ref_mod_hcalls(void)
2020 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2021 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2024 void kvmppc_set_papr(PowerPCCPU *cpu)
2026 CPUState *cs = CPU(cpu);
2029 if (!kvm_enabled()) {
2033 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2035 error_report("This vCPU type or KVM version does not support PAPR");
2039 /* Update the capability flag so we sync the right information
2044 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2046 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2049 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2051 CPUState *cs = CPU(cpu);
2054 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2055 if (ret && mpic_proxy) {
2056 error_report("This KVM version does not support EPR");
2061 int kvmppc_smt_threads(void)
2063 return cap_ppc_smt ? cap_ppc_smt : 1;
2066 int kvmppc_set_smt_threads(int smt)
2070 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2077 void kvmppc_hint_smt_possible(Error **errp)
2083 assert(kvm_enabled());
2084 if (cap_ppc_smt_possible) {
2085 g = g_string_new("Available VSMT modes:");
2086 for (i = 63; i >= 0; i--) {
2087 if ((1UL << i) & cap_ppc_smt_possible) {
2088 g_string_append_printf(g, " %lu", (1UL << i));
2091 s = g_string_free(g, false);
2092 error_append_hint(errp, "%s.\n", s);
2095 error_append_hint(errp,
2096 "This KVM seems to be too old to support VSMT.\n");
2102 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2104 struct kvm_ppc_smmu_info info;
2105 long rampagesize, best_page_shift;
2108 /* Find the largest hardware supported page size that's less than
2109 * or equal to the (logical) backing page size of guest RAM */
2110 kvm_get_smmu_info(&info, &error_fatal);
2111 rampagesize = qemu_getrampagesize();
2112 best_page_shift = 0;
2114 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2115 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2117 if (!sps->page_shift) {
2121 if ((sps->page_shift > best_page_shift)
2122 && ((1UL << sps->page_shift) <= rampagesize)) {
2123 best_page_shift = sps->page_shift;
2127 return MIN(current_size,
2128 1ULL << (best_page_shift + hash_shift - 7));
2132 bool kvmppc_spapr_use_multitce(void)
2134 return cap_spapr_multitce;
2137 int kvmppc_spapr_enable_inkernel_multitce(void)
2141 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2142 H_PUT_TCE_INDIRECT, 1);
2144 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2151 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2152 uint64_t bus_offset, uint32_t nb_table,
2153 int *pfd, bool need_vfio)
2159 /* Must set fd to -1 so we don't try to munmap when called for
2160 * destroying the table, which the upper layers -will- do
2163 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2167 if (cap_spapr_tce_64) {
2168 struct kvm_create_spapr_tce_64 args = {
2170 .page_shift = page_shift,
2171 .offset = bus_offset >> page_shift,
2175 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2178 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2182 } else if (cap_spapr_tce) {
2183 uint64_t window_size = (uint64_t) nb_table << page_shift;
2184 struct kvm_create_spapr_tce args = {
2186 .window_size = window_size,
2188 if ((window_size != args.window_size) || bus_offset) {
2191 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2193 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2201 len = nb_table * sizeof(uint64_t);
2202 /* FIXME: round this up to page size */
2204 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2205 if (table == MAP_FAILED) {
2206 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2216 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2224 len = nb_table * sizeof(uint64_t);
2225 if ((munmap(table, len) < 0) ||
2227 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2229 /* Leak the table */
2235 int kvmppc_reset_htab(int shift_hint)
2237 uint32_t shift = shift_hint;
2239 if (!kvm_enabled()) {
2240 /* Full emulation, tell caller to allocate htab itself */
2243 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2245 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2246 if (ret == -ENOTTY) {
2247 /* At least some versions of PR KVM advertise the
2248 * capability, but don't implement the ioctl(). Oops.
2249 * Return 0 so that we allocate the htab in qemu, as is
2250 * correct for PR. */
2252 } else if (ret < 0) {
2258 /* We have a kernel that predates the htab reset calls. For PR
2259 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2260 * this era, it has allocated a 16MB fixed size hash table already. */
2261 if (kvmppc_is_pr(kvm_state)) {
2262 /* PR - tell caller to allocate htab */
2265 /* HV - assume 16MB kernel allocated htab */
2270 static inline uint32_t mfpvr(void)
2279 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2288 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2290 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2291 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2292 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2294 /* Now fix up the class with information we can query from the host */
2297 alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2298 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2299 alter_insns(&pcc->insns_flags2, PPC2_VSX,
2300 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2301 alter_insns(&pcc->insns_flags2, PPC2_DFP,
2302 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2304 if (dcache_size != -1) {
2305 pcc->l1_dcache_size = dcache_size;
2308 if (icache_size != -1) {
2309 pcc->l1_icache_size = icache_size;
2312 #if defined(TARGET_PPC64)
2313 pcc->radix_page_info = kvm_get_radix_page_info();
2315 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2317 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2318 * compliant. More importantly, advertising ISA 3.00
2319 * architected mode may prevent guests from activating
2320 * necessary DD1 workarounds.
2322 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2323 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2325 #endif /* defined(TARGET_PPC64) */
2328 bool kvmppc_has_cap_epr(void)
2333 bool kvmppc_has_cap_fixup_hcalls(void)
2335 return cap_fixup_hcalls;
2338 bool kvmppc_has_cap_htm(void)
2343 bool kvmppc_has_cap_mmu_radix(void)
2345 return cap_mmu_radix;
2348 bool kvmppc_has_cap_mmu_hash_v3(void)
2350 return cap_mmu_hash_v3;
2353 static bool kvmppc_power8_host(void)
2358 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2359 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2360 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2361 (base_pvr == CPU_POWERPC_POWER8_BASE);
2363 #endif /* TARGET_PPC64 */
2367 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2369 bool l1d_thread_priv_req = !kvmppc_power8_host();
2371 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2373 } else if ((!l1d_thread_priv_req ||
2374 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2375 (c.character & c.character_mask
2376 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2383 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2385 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2387 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2394 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2396 if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) &&
2397 (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) &&
2398 (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) {
2399 return SPAPR_CAP_FIXED_NA;
2400 } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) {
2401 return SPAPR_CAP_WORKAROUND;
2402 } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2403 return SPAPR_CAP_FIXED_CCD;
2404 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2405 return SPAPR_CAP_FIXED_IBS;
2411 static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
2413 if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) {
2419 static void kvmppc_get_cpu_characteristics(KVMState *s)
2421 struct kvm_ppc_cpu_char c;
2425 cap_ppc_safe_cache = 0;
2426 cap_ppc_safe_bounds_check = 0;
2427 cap_ppc_safe_indirect_branch = 0;
2429 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2433 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2438 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2439 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2440 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2441 cap_ppc_count_cache_flush_assist =
2442 parse_cap_ppc_count_cache_flush_assist(c);
2445 int kvmppc_get_cap_safe_cache(void)
2447 return cap_ppc_safe_cache;
2450 int kvmppc_get_cap_safe_bounds_check(void)
2452 return cap_ppc_safe_bounds_check;
2455 int kvmppc_get_cap_safe_indirect_branch(void)
2457 return cap_ppc_safe_indirect_branch;
2460 int kvmppc_get_cap_count_cache_flush_assist(void)
2462 return cap_ppc_count_cache_flush_assist;
2465 bool kvmppc_has_cap_nested_kvm_hv(void)
2467 return !!cap_ppc_nested_kvm_hv;
2470 int kvmppc_set_cap_nested_kvm_hv(int enable)
2472 return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2475 bool kvmppc_has_cap_spapr_vfio(void)
2477 return cap_spapr_vfio;
2480 int kvmppc_get_cap_large_decr(void)
2482 return cap_large_decr;
2485 int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
2487 CPUState *cs = CPU(cpu);
2490 kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2491 /* Do we need to modify the LPCR? */
2492 if (!!(lpcr & LPCR_LD) != !!enable) {
2498 kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2499 kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2501 if (!!(lpcr & LPCR_LD) != !!enable) {
2509 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2511 uint32_t host_pvr = mfpvr();
2512 PowerPCCPUClass *pvr_pcc;
2514 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2515 if (pvr_pcc == NULL) {
2516 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2522 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2524 TypeInfo type_info = {
2525 .name = TYPE_HOST_POWERPC_CPU,
2526 .class_init = kvmppc_host_cpu_class_init,
2528 MachineClass *mc = MACHINE_GET_CLASS(ms);
2529 PowerPCCPUClass *pvr_pcc;
2534 pvr_pcc = kvm_ppc_get_host_cpu_class();
2535 if (pvr_pcc == NULL) {
2538 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2539 type_register(&type_info);
2540 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2541 /* override TCG default cpu type with 'host' cpu model */
2542 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2545 oc = object_class_by_name(type_info.name);
2549 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2550 * we want "POWER8" to be a "family" alias that points to the current
2551 * host CPU type, too)
2553 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2554 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2555 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2558 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2559 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2570 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2572 struct kvm_rtas_token_args args = {
2576 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2580 strncpy(args.name, function, sizeof(args.name));
2582 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2585 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2587 struct kvm_get_htab_fd s = {
2588 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2589 .start_index = index,
2594 error_setg(errp, "KVM version doesn't support %s the HPT",
2595 write ? "writing" : "reading");
2599 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2601 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2602 write ? "writing" : "reading", write ? "to" : "from",
2610 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2612 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2613 uint8_t buf[bufsize];
2617 rc = read(fd, buf, bufsize);
2619 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2623 uint8_t *buffer = buf;
2626 struct kvm_get_htab_header *head =
2627 (struct kvm_get_htab_header *) buffer;
2628 size_t chunksize = sizeof(*head) +
2629 HASH_PTE_SIZE_64 * head->n_valid;
2631 qemu_put_be32(f, head->index);
2632 qemu_put_be16(f, head->n_valid);
2633 qemu_put_be16(f, head->n_invalid);
2634 qemu_put_buffer(f, (void *)(head + 1),
2635 HASH_PTE_SIZE_64 * head->n_valid);
2637 buffer += chunksize;
2643 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2645 return (rc == 0) ? 1 : 0;
2648 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2649 uint16_t n_valid, uint16_t n_invalid)
2651 struct kvm_get_htab_header *buf;
2652 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2655 buf = alloca(chunksize);
2657 buf->n_valid = n_valid;
2658 buf->n_invalid = n_invalid;
2660 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2662 rc = write(fd, buf, chunksize);
2664 fprintf(stderr, "Error writing KVM hash table: %s\n",
2668 if (rc != chunksize) {
2669 /* We should never get a short write on a single chunk */
2670 fprintf(stderr, "Short write, restoring KVM hash table\n");
2676 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2681 void kvm_arch_init_irq_routing(KVMState *s)
2685 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2690 fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2694 struct kvm_get_htab_header *hdr;
2695 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2696 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2698 rc = read(fd, buf, sizeof(buf));
2700 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2703 hdr = (struct kvm_get_htab_header *)buf;
2704 while ((i < n) && ((char *)hdr < (buf + rc))) {
2705 int invalid = hdr->n_invalid, valid = hdr->n_valid;
2707 if (hdr->index != (ptex + i)) {
2708 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2709 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2712 if (n - i < valid) {
2715 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2718 if ((n - i) < invalid) {
2721 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2724 hdr = (struct kvm_get_htab_header *)
2725 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2732 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2736 struct kvm_get_htab_header hdr;
2741 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2743 buf.hdr.n_valid = 1;
2744 buf.hdr.n_invalid = 0;
2745 buf.hdr.index = ptex;
2746 buf.pte0 = cpu_to_be64(pte0);
2747 buf.pte1 = cpu_to_be64(pte1);
2749 rc = write(fd, &buf, sizeof(buf));
2750 if (rc != sizeof(buf)) {
2751 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2756 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2757 uint64_t address, uint32_t data, PCIDevice *dev)
2762 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2763 int vector, PCIDevice *dev)
2768 int kvm_arch_release_virq_post(int virq)
2773 int kvm_arch_msi_data_to_gsi(uint32_t data)
2775 return data & 0xffff;
2778 int kvmppc_enable_hwrng(void)
2780 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2784 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2787 void kvmppc_check_papr_resize_hpt(Error **errp)
2789 if (!kvm_enabled()) {
2790 return; /* No KVM, we're good */
2793 if (cap_resize_hpt) {
2794 return; /* Kernel has explicit support, we're good */
2797 /* Otherwise fallback on looking for PR KVM */
2798 if (kvmppc_is_pr(kvm_state)) {
2803 "Hash page table resizing not available with this KVM version");
2806 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2808 CPUState *cs = CPU(cpu);
2809 struct kvm_ppc_resize_hpt rhpt = {
2814 if (!cap_resize_hpt) {
2818 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2821 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2823 CPUState *cs = CPU(cpu);
2824 struct kvm_ppc_resize_hpt rhpt = {
2829 if (!cap_resize_hpt) {
2833 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2837 * This is a helper function to detect a post migration scenario
2838 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2839 * the guest kernel can't handle a PVR value other than the actual host
2840 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2842 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2843 * (so, we're HV), return true. The workaround itself is done in
2846 * The order here is important: we'll only check for KVM PR as a
2847 * fallback if the guest kernel can't handle the situation itself.
2848 * We need to avoid as much as possible querying the running KVM type
2851 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2853 CPUState *cs = CPU(cpu);
2855 if (!kvm_enabled()) {
2859 if (cap_ppc_pvr_compat) {
2863 return !kvmppc_is_pr(cs->kvm_state);
2866 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2868 CPUState *cs = CPU(cpu);
2870 if (kvm_enabled()) {
2871 kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);