2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
51 #include "sysemu/kvm_int.h"
56 #define DPRINTF(fmt, ...) \
57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
59 #define DPRINTF(fmt, ...) \
63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
80 static int cap_one_reg;
82 static int cap_ppc_watchdog;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm; /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
94 static int cap_ppc_nested_kvm_hv;
96 static uint32_t debug_inst_opcode;
98 /* XXX We have a race condition where we actually have a level triggered
99 * interrupt, but the infrastructure can't expose that yet, so the guest
100 * takes but ignores it, goes to sleep and never gets notified that there's
101 * still an interrupt pending.
103 * As a quick workaround, let's just wake up again 20 ms after we injected
104 * an interrupt. That way we can assure that we're always reinjecting
105 * interrupts in case the guest swallowed them.
107 static QEMUTimer *idle_timer;
109 static void kvm_kick_cpu(void *opaque)
111 PowerPCCPU *cpu = opaque;
113 qemu_cpu_kick(CPU(cpu));
116 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
117 * should only be used for fallback tests - generally we should use
118 * explicit capabilities for the features we want, rather than
119 * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
122 /* Assume KVM-PR if the GET_PVINFO capability is available */
123 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
126 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
127 static void kvmppc_get_cpu_characteristics(KVMState *s);
129 int kvm_arch_init(MachineState *ms, KVMState *s)
131 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
132 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
133 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
134 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
135 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
136 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
137 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
138 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
139 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
140 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
141 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
142 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
143 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
144 /* Note: we don't set cap_papr here, because this capability is
145 * only activated after this by kvmppc_set_papr() */
146 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
147 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
148 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
149 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
150 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
151 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
152 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
153 kvmppc_get_cpu_characteristics(s);
154 cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
156 * Note: setting it to false because there is not such capability
157 * in KVM at this moment.
159 * TODO: call kvm_vm_check_extension() with the right capability
160 * after the kernel starts implementing it.*/
161 cap_ppc_pvr_compat = false;
163 if (!cap_interrupt_level) {
164 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
165 "VM to stall at times!\n");
168 kvm_ppc_register_host_cpu_type(ms);
173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
180 CPUPPCState *cenv = &cpu->env;
181 CPUState *cs = CPU(cpu);
182 struct kvm_sregs sregs;
185 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
186 /* What we're really trying to say is "if we're on BookE, we use
187 the native PVR for now". This is the only sane way to check
188 it though, so we potentially confuse users that they can run
189 BookE guests on BookS. Let's hope nobody dares enough :) */
193 fprintf(stderr, "kvm error: missing PVR setting capability\n");
198 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
203 sregs.pvr = cenv->spr[SPR_PVR];
204 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
207 /* Set up a shared TLB array with KVM */
208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
210 CPUPPCState *env = &cpu->env;
211 CPUState *cs = CPU(cpu);
212 struct kvm_book3e_206_tlb_params params = {};
213 struct kvm_config_tlb cfg = {};
214 unsigned int entries = 0;
217 if (!kvm_enabled() ||
218 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
222 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
224 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
225 params.tlb_sizes[i] = booke206_tlb_size(env, i);
226 params.tlb_ways[i] = booke206_tlb_ways(env, i);
227 entries += params.tlb_sizes[i];
230 assert(entries == env->nb_tlb);
231 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
233 env->tlb_dirty = true;
235 cfg.array = (uintptr_t)env->tlb.tlbm;
236 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
237 cfg.params = (uintptr_t)¶ms;
238 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
240 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
242 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
243 __func__, strerror(-ret));
247 env->kvm_sw_tlb = true;
252 #if defined(TARGET_PPC64)
253 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
257 assert(kvm_state != NULL);
259 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
260 error_setg(errp, "KVM doesn't expose the MMU features it supports");
261 error_append_hint(errp, "Consider switching to a newer KVM\n");
265 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
270 error_setg_errno(errp, -ret,
271 "KVM failed to provide the MMU features it supports");
274 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
276 KVMState *s = KVM_STATE(current_machine->accelerator);
277 struct ppc_radix_page_info *radix_page_info;
278 struct kvm_ppc_rmmu_info rmmu_info;
281 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
284 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
287 radix_page_info = g_malloc0(sizeof(*radix_page_info));
288 radix_page_info->count = 0;
289 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
290 if (rmmu_info.ap_encodings[i]) {
291 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
292 radix_page_info->count++;
295 return radix_page_info;
298 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
299 bool radix, bool gtse,
302 CPUState *cs = CPU(cpu);
305 struct kvm_ppc_mmuv3_cfg cfg = {
306 .process_table = proc_tbl,
310 flags |= KVM_PPC_MMUV3_RADIX;
313 flags |= KVM_PPC_MMUV3_GTSE;
316 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
323 return H_NOT_AVAILABLE;
329 bool kvmppc_hpt_needs_host_contiguous_pages(void)
331 static struct kvm_ppc_smmu_info smmu_info;
333 if (!kvm_enabled()) {
337 kvm_get_smmu_info(&smmu_info, &error_fatal);
338 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
341 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
343 struct kvm_ppc_smmu_info smmu_info;
345 Error *local_err = NULL;
347 /* For now, we only have anything to check on hash64 MMUs */
348 if (!cpu->hash64_opts || !kvm_enabled()) {
352 kvm_get_smmu_info(&smmu_info, &local_err);
354 error_propagate(errp, local_err);
358 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
359 && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
361 "KVM does not support 1TiB segments which guest expects");
365 if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
366 error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
367 smmu_info.slb_size, cpu->hash64_opts->slb_size);
372 * Verify that every pagesize supported by the cpu model is
373 * supported by KVM with the same encodings
375 for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
376 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
377 struct kvm_ppc_one_seg_page_size *ksps;
379 for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
380 if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
384 if (ik >= ARRAY_SIZE(smmu_info.sps)) {
385 error_setg(errp, "KVM doesn't support for base page shift %u",
390 ksps = &smmu_info.sps[ik];
391 if (ksps->slb_enc != qsps->slb_enc) {
393 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
394 ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
398 for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
399 for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
400 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
405 if (jk >= ARRAY_SIZE(ksps->enc)) {
406 error_setg(errp, "KVM doesn't support page shift %u/%u",
407 qsps->enc[jq].page_shift, qsps->page_shift);
410 if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
412 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
413 ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
414 qsps->page_shift, qsps->enc[jq].pte_enc);
420 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
421 /* Mostly what guest pagesizes we can use are related to the
422 * host pages used to map guest RAM, which is handled in the
423 * platform code. Cache-Inhibited largepages (64k) however are
424 * used for I/O, so if they're mapped to the host at all it
425 * will be a normal mapping, not a special hugepage one used
427 if (getpagesize() < 0x10000) {
429 "KVM can't supply 64kiB CI pages, which guest expects");
433 #endif /* !defined (TARGET_PPC64) */
435 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
437 return POWERPC_CPU(cpu)->vcpu_id;
440 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
441 * book3s supports only 1 watchpoint, so array size
442 * of 4 is sufficient for now.
444 #define MAX_HW_BKPTS 4
446 static struct HWBreakpoint {
449 } hw_debug_points[MAX_HW_BKPTS];
451 static CPUWatchpoint hw_watchpoint;
453 /* Default there is no breakpoint and watchpoint supported */
454 static int max_hw_breakpoint;
455 static int max_hw_watchpoint;
456 static int nb_hw_breakpoint;
457 static int nb_hw_watchpoint;
459 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
461 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
462 max_hw_breakpoint = 2;
463 max_hw_watchpoint = 2;
466 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
467 fprintf(stderr, "Error initializing h/w breakpoints\n");
472 int kvm_arch_init_vcpu(CPUState *cs)
474 PowerPCCPU *cpu = POWERPC_CPU(cs);
475 CPUPPCState *cenv = &cpu->env;
478 /* Synchronize sregs with kvm */
479 ret = kvm_arch_sync_sregs(cpu);
481 if (ret == -EINVAL) {
482 error_report("Register sync failed... If you're using kvm-hv.ko,"
483 " only \"-cpu host\" is possible");
488 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
490 switch (cenv->mmu_model) {
491 case POWERPC_MMU_BOOKE206:
492 /* This target supports access to KVM's guest TLB */
493 ret = kvm_booke206_tlb_init(cpu);
495 case POWERPC_MMU_2_07:
496 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
497 /* KVM-HV has transactional memory on POWER8 also without the
498 * KVM_CAP_PPC_HTM extension, so enable it here instead as
499 * long as it's availble to userspace on the host. */
500 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
509 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
510 kvmppc_hw_debug_points_init(cenv);
515 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
517 CPUPPCState *env = &cpu->env;
518 CPUState *cs = CPU(cpu);
519 struct kvm_dirty_tlb dirty_tlb;
520 unsigned char *bitmap;
523 if (!env->kvm_sw_tlb) {
527 bitmap = g_malloc((env->nb_tlb + 7) / 8);
528 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
530 dirty_tlb.bitmap = (uintptr_t)bitmap;
531 dirty_tlb.num_dirty = env->nb_tlb;
533 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
535 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
536 __func__, strerror(-ret));
542 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
544 PowerPCCPU *cpu = POWERPC_CPU(cs);
545 CPUPPCState *env = &cpu->env;
550 struct kvm_one_reg reg = {
552 .addr = (uintptr_t) &val,
556 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
558 trace_kvm_failed_spr_get(spr, strerror(errno));
560 switch (id & KVM_REG_SIZE_MASK) {
561 case KVM_REG_SIZE_U32:
562 env->spr[spr] = val.u32;
565 case KVM_REG_SIZE_U64:
566 env->spr[spr] = val.u64;
570 /* Don't handle this size yet */
576 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
578 PowerPCCPU *cpu = POWERPC_CPU(cs);
579 CPUPPCState *env = &cpu->env;
584 struct kvm_one_reg reg = {
586 .addr = (uintptr_t) &val,
590 switch (id & KVM_REG_SIZE_MASK) {
591 case KVM_REG_SIZE_U32:
592 val.u32 = env->spr[spr];
595 case KVM_REG_SIZE_U64:
596 val.u64 = env->spr[spr];
600 /* Don't handle this size yet */
604 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
606 trace_kvm_failed_spr_set(spr, strerror(errno));
610 static int kvm_put_fp(CPUState *cs)
612 PowerPCCPU *cpu = POWERPC_CPU(cs);
613 CPUPPCState *env = &cpu->env;
614 struct kvm_one_reg reg;
618 if (env->insns_flags & PPC_FLOAT) {
619 uint64_t fpscr = env->fpscr;
620 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
622 reg.id = KVM_REG_PPC_FPSCR;
623 reg.addr = (uintptr_t)&fpscr;
624 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
626 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
630 for (i = 0; i < 32; i++) {
632 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
633 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
635 #ifdef HOST_WORDS_BIGENDIAN
636 vsr[0] = float64_val(*fpr);
640 vsr[1] = float64_val(*fpr);
642 reg.addr = (uintptr_t) &vsr;
643 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
645 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
647 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
654 if (env->insns_flags & PPC_ALTIVEC) {
655 reg.id = KVM_REG_PPC_VSCR;
656 reg.addr = (uintptr_t)&env->vscr;
657 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
659 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
663 for (i = 0; i < 32; i++) {
664 reg.id = KVM_REG_PPC_VR(i);
665 reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
666 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
668 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
677 static int kvm_get_fp(CPUState *cs)
679 PowerPCCPU *cpu = POWERPC_CPU(cs);
680 CPUPPCState *env = &cpu->env;
681 struct kvm_one_reg reg;
685 if (env->insns_flags & PPC_FLOAT) {
687 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
689 reg.id = KVM_REG_PPC_FPSCR;
690 reg.addr = (uintptr_t)&fpscr;
691 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
693 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
699 for (i = 0; i < 32; i++) {
701 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
702 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
704 reg.addr = (uintptr_t) &vsr;
705 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
707 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
709 DPRINTF("Unable to get %s%d from KVM: %s\n",
710 vsx ? "VSR" : "FPR", i, strerror(errno));
713 #ifdef HOST_WORDS_BIGENDIAN
728 if (env->insns_flags & PPC_ALTIVEC) {
729 reg.id = KVM_REG_PPC_VSCR;
730 reg.addr = (uintptr_t)&env->vscr;
731 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
733 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
737 for (i = 0; i < 32; i++) {
738 reg.id = KVM_REG_PPC_VR(i);
739 reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
740 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
742 DPRINTF("Unable to get VR%d from KVM: %s\n",
752 #if defined(TARGET_PPC64)
753 static int kvm_get_vpa(CPUState *cs)
755 PowerPCCPU *cpu = POWERPC_CPU(cs);
756 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
757 struct kvm_one_reg reg;
760 reg.id = KVM_REG_PPC_VPA_ADDR;
761 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
762 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
764 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
768 assert((uintptr_t)&spapr_cpu->slb_shadow_size
769 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
770 reg.id = KVM_REG_PPC_VPA_SLB;
771 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
772 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
774 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
779 assert((uintptr_t)&spapr_cpu->dtl_size
780 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
781 reg.id = KVM_REG_PPC_VPA_DTL;
782 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
783 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
785 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
793 static int kvm_put_vpa(CPUState *cs)
795 PowerPCCPU *cpu = POWERPC_CPU(cs);
796 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
797 struct kvm_one_reg reg;
800 /* SLB shadow or DTL can't be registered unless a master VPA is
801 * registered. That means when restoring state, if a VPA *is*
802 * registered, we need to set that up first. If not, we need to
803 * deregister the others before deregistering the master VPA */
804 assert(spapr_cpu->vpa_addr
805 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
807 if (spapr_cpu->vpa_addr) {
808 reg.id = KVM_REG_PPC_VPA_ADDR;
809 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
810 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
812 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
817 assert((uintptr_t)&spapr_cpu->slb_shadow_size
818 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
819 reg.id = KVM_REG_PPC_VPA_SLB;
820 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
821 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
823 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
827 assert((uintptr_t)&spapr_cpu->dtl_size
828 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
829 reg.id = KVM_REG_PPC_VPA_DTL;
830 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
831 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
833 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
838 if (!spapr_cpu->vpa_addr) {
839 reg.id = KVM_REG_PPC_VPA_ADDR;
840 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
841 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
843 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
850 #endif /* TARGET_PPC64 */
852 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
854 CPUPPCState *env = &cpu->env;
855 struct kvm_sregs sregs;
858 sregs.pvr = env->spr[SPR_PVR];
861 PPCVirtualHypervisorClass *vhc =
862 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
863 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
865 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
870 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
871 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
872 if (env->slb[i].esid & SLB_ESID_V) {
873 sregs.u.s.ppc64.slb[i].slbe |= i;
875 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
880 for (i = 0; i < 16; i++) {
881 sregs.u.s.ppc32.sr[i] = env->sr[i];
885 for (i = 0; i < 8; i++) {
886 /* Beware. We have to swap upper and lower bits here */
887 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
889 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
893 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
896 int kvm_arch_put_registers(CPUState *cs, int level)
898 PowerPCCPU *cpu = POWERPC_CPU(cs);
899 CPUPPCState *env = &cpu->env;
900 struct kvm_regs regs;
904 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
911 regs.xer = cpu_read_xer(env);
915 regs.srr0 = env->spr[SPR_SRR0];
916 regs.srr1 = env->spr[SPR_SRR1];
918 regs.sprg0 = env->spr[SPR_SPRG0];
919 regs.sprg1 = env->spr[SPR_SPRG1];
920 regs.sprg2 = env->spr[SPR_SPRG2];
921 regs.sprg3 = env->spr[SPR_SPRG3];
922 regs.sprg4 = env->spr[SPR_SPRG4];
923 regs.sprg5 = env->spr[SPR_SPRG5];
924 regs.sprg6 = env->spr[SPR_SPRG6];
925 regs.sprg7 = env->spr[SPR_SPRG7];
927 regs.pid = env->spr[SPR_BOOKE_PID];
929 for (i = 0;i < 32; i++)
930 regs.gpr[i] = env->gpr[i];
933 for (i = 0; i < 8; i++) {
934 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
937 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s);
943 if (env->tlb_dirty) {
945 env->tlb_dirty = false;
948 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
949 ret = kvmppc_put_books_sregs(cpu);
955 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
956 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
962 /* We deliberately ignore errors here, for kernels which have
963 * the ONE_REG calls, but don't support the specific
964 * registers, there's a reasonable chance things will still
965 * work, at least until we try to migrate. */
966 for (i = 0; i < 1024; i++) {
967 uint64_t id = env->spr_cb[i].one_reg_id;
970 kvm_put_one_spr(cs, id, i);
976 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
977 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
979 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
980 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
982 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
983 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
984 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
986 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
987 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
988 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
989 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
990 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
991 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
995 if (kvm_put_vpa(cs) < 0) {
996 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1000 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1001 #endif /* TARGET_PPC64 */
1007 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1009 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1012 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1014 CPUPPCState *env = &cpu->env;
1015 struct kvm_sregs sregs;
1018 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1023 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1024 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1025 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1026 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1027 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1028 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1029 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1030 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1031 env->spr[SPR_DECR] = sregs.u.e.dec;
1032 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1033 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1034 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1037 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1038 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1039 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1040 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1041 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1042 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1045 if (sregs.u.e.features & KVM_SREGS_E_64) {
1046 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1049 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1050 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1053 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1054 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1055 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1056 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1057 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1058 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1059 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1060 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1061 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1062 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1063 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1064 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1065 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1066 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1067 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1068 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1069 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1070 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1071 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1072 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1073 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1074 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1075 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1076 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1077 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1078 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1079 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1080 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1081 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1082 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1083 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1084 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1085 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1087 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1088 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1089 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1090 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1091 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1092 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1093 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1096 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1097 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1098 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1101 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1102 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1103 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1104 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1105 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1109 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1110 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1111 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1112 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1113 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1114 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1115 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1116 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1117 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1118 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1119 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1122 if (sregs.u.e.features & KVM_SREGS_EXP) {
1123 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1126 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1127 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1128 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1131 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1132 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1133 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1134 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1136 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1137 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1138 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1145 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1147 CPUPPCState *env = &cpu->env;
1148 struct kvm_sregs sregs;
1152 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1158 ppc_store_sdr1(env, sregs.u.s.sdr1);
1164 * The packed SLB array we get from KVM_GET_SREGS only contains
1165 * information about valid entries. So we flush our internal copy
1166 * to get rid of stale ones, then put all valid SLB entries back
1169 memset(env->slb, 0, sizeof(env->slb));
1170 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1171 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1172 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1174 * Only restore valid entries
1176 if (rb & SLB_ESID_V) {
1177 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1183 for (i = 0; i < 16; i++) {
1184 env->sr[i] = sregs.u.s.ppc32.sr[i];
1188 for (i = 0; i < 8; i++) {
1189 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1190 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1191 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1192 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1198 int kvm_arch_get_registers(CPUState *cs)
1200 PowerPCCPU *cpu = POWERPC_CPU(cs);
1201 CPUPPCState *env = &cpu->env;
1202 struct kvm_regs regs;
1206 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
1211 for (i = 7; i >= 0; i--) {
1212 env->crf[i] = cr & 15;
1216 env->ctr = regs.ctr;
1218 cpu_write_xer(env, regs.xer);
1219 env->msr = regs.msr;
1222 env->spr[SPR_SRR0] = regs.srr0;
1223 env->spr[SPR_SRR1] = regs.srr1;
1225 env->spr[SPR_SPRG0] = regs.sprg0;
1226 env->spr[SPR_SPRG1] = regs.sprg1;
1227 env->spr[SPR_SPRG2] = regs.sprg2;
1228 env->spr[SPR_SPRG3] = regs.sprg3;
1229 env->spr[SPR_SPRG4] = regs.sprg4;
1230 env->spr[SPR_SPRG5] = regs.sprg5;
1231 env->spr[SPR_SPRG6] = regs.sprg6;
1232 env->spr[SPR_SPRG7] = regs.sprg7;
1234 env->spr[SPR_BOOKE_PID] = regs.pid;
1236 for (i = 0;i < 32; i++)
1237 env->gpr[i] = regs.gpr[i];
1241 if (cap_booke_sregs) {
1242 ret = kvmppc_get_booke_sregs(cpu);
1249 ret = kvmppc_get_books_sregs(cpu);
1256 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1262 /* We deliberately ignore errors here, for kernels which have
1263 * the ONE_REG calls, but don't support the specific
1264 * registers, there's a reasonable chance things will still
1265 * work, at least until we try to migrate. */
1266 for (i = 0; i < 1024; i++) {
1267 uint64_t id = env->spr_cb[i].one_reg_id;
1270 kvm_get_one_spr(cs, id, i);
1276 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1277 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1279 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1280 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1282 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1283 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1284 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1285 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1286 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1287 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1288 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1289 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1290 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1291 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1295 if (kvm_get_vpa(cs) < 0) {
1296 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1300 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1307 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1309 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1311 if (irq != PPC_INTERRUPT_EXT) {
1315 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1319 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1324 #if defined(TARGET_PPC64)
1325 #define PPC_INPUT_INT PPC970_INPUT_INT
1327 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1330 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1332 PowerPCCPU *cpu = POWERPC_CPU(cs);
1333 CPUPPCState *env = &cpu->env;
1337 qemu_mutex_lock_iothread();
1339 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1340 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1341 if (!cap_interrupt_level &&
1342 run->ready_for_interrupt_injection &&
1343 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1344 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1346 /* For now KVM disregards the 'irq' argument. However, in the
1347 * future KVM could cache it in-kernel to avoid a heavyweight exit
1348 * when reading the UIC.
1350 irq = KVM_INTERRUPT_SET;
1352 DPRINTF("injected interrupt %d\n", irq);
1353 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1355 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1358 /* Always wake up soon in case the interrupt was level based */
1359 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1360 (NANOSECONDS_PER_SECOND / 50));
1363 /* We don't know if there are more interrupts pending after this. However,
1364 * the guest will return to userspace in the course of handling this one
1365 * anyways, so we will get a chance to deliver the rest. */
1367 qemu_mutex_unlock_iothread();
1370 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1372 return MEMTXATTRS_UNSPECIFIED;
1375 int kvm_arch_process_async_events(CPUState *cs)
1380 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1382 CPUState *cs = CPU(cpu);
1383 CPUPPCState *env = &cpu->env;
1385 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1387 cs->exception_index = EXCP_HLT;
1393 /* map dcr access to existing qemu dcr emulation */
1394 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1396 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1397 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1402 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1404 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1405 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1410 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1412 /* Mixed endian case is not handled */
1413 uint32_t sc = debug_inst_opcode;
1415 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1417 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1424 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1428 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1429 sc != debug_inst_opcode ||
1430 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1438 static int find_hw_breakpoint(target_ulong addr, int type)
1442 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1443 <= ARRAY_SIZE(hw_debug_points));
1445 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1446 if (hw_debug_points[n].addr == addr &&
1447 hw_debug_points[n].type == type) {
1455 static int find_hw_watchpoint(target_ulong addr, int *flag)
1459 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1461 *flag = BP_MEM_ACCESS;
1465 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1467 *flag = BP_MEM_WRITE;
1471 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1473 *flag = BP_MEM_READ;
1480 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1481 target_ulong len, int type)
1483 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1487 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1488 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1491 case GDB_BREAKPOINT_HW:
1492 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1496 if (find_hw_breakpoint(addr, type) >= 0) {
1503 case GDB_WATCHPOINT_WRITE:
1504 case GDB_WATCHPOINT_READ:
1505 case GDB_WATCHPOINT_ACCESS:
1506 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1510 if (find_hw_breakpoint(addr, type) >= 0) {
1524 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1525 target_ulong len, int type)
1529 n = find_hw_breakpoint(addr, type);
1535 case GDB_BREAKPOINT_HW:
1539 case GDB_WATCHPOINT_WRITE:
1540 case GDB_WATCHPOINT_READ:
1541 case GDB_WATCHPOINT_ACCESS:
1548 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1553 void kvm_arch_remove_all_hw_breakpoints(void)
1555 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1558 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1562 /* Software Breakpoint updates */
1563 if (kvm_sw_breakpoints_active(cs)) {
1564 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1567 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1568 <= ARRAY_SIZE(hw_debug_points));
1569 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1571 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1572 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1573 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1574 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1575 switch (hw_debug_points[n].type) {
1576 case GDB_BREAKPOINT_HW:
1577 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1579 case GDB_WATCHPOINT_WRITE:
1580 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1582 case GDB_WATCHPOINT_READ:
1583 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1585 case GDB_WATCHPOINT_ACCESS:
1586 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1587 KVMPPC_DEBUG_WATCH_READ;
1590 cpu_abort(cs, "Unsupported breakpoint type\n");
1592 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1597 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1599 CPUState *cs = CPU(cpu);
1600 CPUPPCState *env = &cpu->env;
1601 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1606 if (cs->singlestep_enabled) {
1608 } else if (arch_info->status) {
1609 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1610 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1611 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1615 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1616 KVMPPC_DEBUG_WATCH_WRITE)) {
1617 n = find_hw_watchpoint(arch_info->address, &flag);
1620 cs->watchpoint_hit = &hw_watchpoint;
1621 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1622 hw_watchpoint.flags = flag;
1626 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1629 /* QEMU is not able to handle debug exception, so inject
1630 * program exception to guest;
1631 * Yes program exception NOT debug exception !!
1632 * When QEMU is using debug resources then debug exception must
1633 * be always set. To achieve this we set MSR_DE and also set
1634 * MSRP_DEP so guest cannot change MSR_DE.
1635 * When emulating debug resource for guest we want guest
1636 * to control MSR_DE (enable/disable debug interrupt on need).
1637 * Supporting both configurations are NOT possible.
1638 * So the result is that we cannot share debug resources
1639 * between QEMU and Guest on BOOKE architecture.
1640 * In the current design QEMU gets the priority over guest,
1641 * this means that if QEMU is using debug resources then guest
1643 * For software breakpoint QEMU uses a privileged instruction;
1644 * So there cannot be any reason that we are here for guest
1645 * set debug exception, only possibility is guest executed a
1646 * privileged / illegal instruction and that's why we are
1647 * injecting a program interrupt.
1650 cpu_synchronize_state(cs);
1651 /* env->nip is PC, so increment this by 4 to use
1652 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1655 cs->exception_index = POWERPC_EXCP_PROGRAM;
1656 env->error_code = POWERPC_EXCP_INVAL;
1657 ppc_cpu_do_interrupt(cs);
1663 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1665 PowerPCCPU *cpu = POWERPC_CPU(cs);
1666 CPUPPCState *env = &cpu->env;
1669 qemu_mutex_lock_iothread();
1671 switch (run->exit_reason) {
1673 if (run->dcr.is_write) {
1674 DPRINTF("handle dcr write\n");
1675 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1677 DPRINTF("handle dcr read\n");
1678 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1682 DPRINTF("handle halt\n");
1683 ret = kvmppc_handle_halt(cpu);
1685 #if defined(TARGET_PPC64)
1686 case KVM_EXIT_PAPR_HCALL:
1687 DPRINTF("handle PAPR hypercall\n");
1688 run->papr_hcall.ret = spapr_hypercall(cpu,
1690 run->papr_hcall.args);
1695 DPRINTF("handle epr\n");
1696 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1699 case KVM_EXIT_WATCHDOG:
1700 DPRINTF("handle watchdog expiry\n");
1701 watchdog_perform_action();
1705 case KVM_EXIT_DEBUG:
1706 DPRINTF("handle debug exception\n");
1707 if (kvm_handle_debug(cpu, run)) {
1711 /* re-enter, this exception was guest-internal */
1716 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1721 qemu_mutex_unlock_iothread();
1725 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1727 CPUState *cs = CPU(cpu);
1728 uint32_t bits = tsr_bits;
1729 struct kvm_one_reg reg = {
1730 .id = KVM_REG_PPC_OR_TSR,
1731 .addr = (uintptr_t) &bits,
1734 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1737 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1740 CPUState *cs = CPU(cpu);
1741 uint32_t bits = tsr_bits;
1742 struct kvm_one_reg reg = {
1743 .id = KVM_REG_PPC_CLEAR_TSR,
1744 .addr = (uintptr_t) &bits,
1747 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1750 int kvmppc_set_tcr(PowerPCCPU *cpu)
1752 CPUState *cs = CPU(cpu);
1753 CPUPPCState *env = &cpu->env;
1754 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1756 struct kvm_one_reg reg = {
1757 .id = KVM_REG_PPC_TCR,
1758 .addr = (uintptr_t) &tcr,
1761 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1764 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1766 CPUState *cs = CPU(cpu);
1769 if (!kvm_enabled()) {
1773 if (!cap_ppc_watchdog) {
1774 printf("warning: KVM does not support watchdog");
1778 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1780 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1781 __func__, strerror(-ret));
1788 static int read_cpuinfo(const char *field, char *value, int len)
1792 int field_len = strlen(field);
1795 f = fopen("/proc/cpuinfo", "r");
1801 if (!fgets(line, sizeof(line), f)) {
1804 if (!strncmp(line, field, field_len)) {
1805 pstrcpy(value, len, line);
1816 uint32_t kvmppc_get_tbfreq(void)
1820 uint32_t retval = NANOSECONDS_PER_SECOND;
1822 if (read_cpuinfo("timebase", line, sizeof(line))) {
1826 if (!(ns = strchr(line, ':'))) {
1835 bool kvmppc_get_host_serial(char **value)
1837 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1841 bool kvmppc_get_host_model(char **value)
1843 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1846 /* Try to find a device tree node for a CPU with clock-frequency property */
1847 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1849 struct dirent *dirp;
1852 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1853 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1858 while ((dirp = readdir(dp)) != NULL) {
1860 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1862 f = fopen(buf, "r");
1864 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1871 if (buf[0] == '\0') {
1872 printf("Unknown host!\n");
1879 static uint64_t kvmppc_read_int_dt(const char *filename)
1888 f = fopen(filename, "rb");
1893 len = fread(&u, 1, sizeof(u), f);
1897 /* property is a 32-bit quantity */
1898 return be32_to_cpu(u.v32);
1900 return be64_to_cpu(u.v64);
1906 /* Read a CPU node property from the host device tree that's a single
1907 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1908 * (can't find or open the property, or doesn't understand the
1910 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1912 char buf[PATH_MAX], *tmp;
1915 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1919 tmp = g_strdup_printf("%s/%s", buf, propname);
1920 val = kvmppc_read_int_dt(tmp);
1926 uint64_t kvmppc_get_clockfreq(void)
1928 return kvmppc_read_int_cpu_dt("clock-frequency");
1931 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1933 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1934 CPUState *cs = CPU(cpu);
1936 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1937 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1944 int kvmppc_get_hasidle(CPUPPCState *env)
1946 struct kvm_ppc_pvinfo pvinfo;
1948 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1949 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1956 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1958 uint32_t *hc = (uint32_t*)buf;
1959 struct kvm_ppc_pvinfo pvinfo;
1961 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1962 memcpy(buf, pvinfo.hcall, buf_len);
1967 * Fallback to always fail hypercalls regardless of endianness:
1969 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1971 * b .+8 (becomes nop in wrong endian)
1972 * bswap32(li r3, -1)
1975 hc[0] = cpu_to_be32(0x08000048);
1976 hc[1] = cpu_to_be32(0x3860ffff);
1977 hc[2] = cpu_to_be32(0x48000008);
1978 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1983 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1985 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1988 void kvmppc_enable_logical_ci_hcalls(void)
1991 * FIXME: it would be nice if we could detect the cases where
1992 * we're using a device which requires the in kernel
1993 * implementation of these hcalls, but the kernel lacks them and
1994 * produce a warning.
1996 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1997 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2000 void kvmppc_enable_set_mode_hcall(void)
2002 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2005 void kvmppc_enable_clear_ref_mod_hcalls(void)
2007 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2008 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2011 void kvmppc_set_papr(PowerPCCPU *cpu)
2013 CPUState *cs = CPU(cpu);
2016 if (!kvm_enabled()) {
2020 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2022 error_report("This vCPU type or KVM version does not support PAPR");
2026 /* Update the capability flag so we sync the right information
2031 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2033 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2036 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2038 CPUState *cs = CPU(cpu);
2041 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2042 if (ret && mpic_proxy) {
2043 error_report("This KVM version does not support EPR");
2048 int kvmppc_smt_threads(void)
2050 return cap_ppc_smt ? cap_ppc_smt : 1;
2053 int kvmppc_set_smt_threads(int smt)
2057 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2064 void kvmppc_hint_smt_possible(Error **errp)
2070 assert(kvm_enabled());
2071 if (cap_ppc_smt_possible) {
2072 g = g_string_new("Available VSMT modes:");
2073 for (i = 63; i >= 0; i--) {
2074 if ((1UL << i) & cap_ppc_smt_possible) {
2075 g_string_append_printf(g, " %lu", (1UL << i));
2078 s = g_string_free(g, false);
2079 error_append_hint(errp, "%s.\n", s);
2082 error_append_hint(errp,
2083 "This KVM seems to be too old to support VSMT.\n");
2089 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2091 struct kvm_ppc_smmu_info info;
2092 long rampagesize, best_page_shift;
2095 /* Find the largest hardware supported page size that's less than
2096 * or equal to the (logical) backing page size of guest RAM */
2097 kvm_get_smmu_info(&info, &error_fatal);
2098 rampagesize = qemu_getrampagesize();
2099 best_page_shift = 0;
2101 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2102 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2104 if (!sps->page_shift) {
2108 if ((sps->page_shift > best_page_shift)
2109 && ((1UL << sps->page_shift) <= rampagesize)) {
2110 best_page_shift = sps->page_shift;
2114 return MIN(current_size,
2115 1ULL << (best_page_shift + hash_shift - 7));
2119 bool kvmppc_spapr_use_multitce(void)
2121 return cap_spapr_multitce;
2124 int kvmppc_spapr_enable_inkernel_multitce(void)
2128 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2129 H_PUT_TCE_INDIRECT, 1);
2131 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2138 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2139 uint64_t bus_offset, uint32_t nb_table,
2140 int *pfd, bool need_vfio)
2146 /* Must set fd to -1 so we don't try to munmap when called for
2147 * destroying the table, which the upper layers -will- do
2150 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2154 if (cap_spapr_tce_64) {
2155 struct kvm_create_spapr_tce_64 args = {
2157 .page_shift = page_shift,
2158 .offset = bus_offset >> page_shift,
2162 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2165 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2169 } else if (cap_spapr_tce) {
2170 uint64_t window_size = (uint64_t) nb_table << page_shift;
2171 struct kvm_create_spapr_tce args = {
2173 .window_size = window_size,
2175 if ((window_size != args.window_size) || bus_offset) {
2178 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2180 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2188 len = nb_table * sizeof(uint64_t);
2189 /* FIXME: round this up to page size */
2191 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2192 if (table == MAP_FAILED) {
2193 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2203 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2211 len = nb_table * sizeof(uint64_t);
2212 if ((munmap(table, len) < 0) ||
2214 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2216 /* Leak the table */
2222 int kvmppc_reset_htab(int shift_hint)
2224 uint32_t shift = shift_hint;
2226 if (!kvm_enabled()) {
2227 /* Full emulation, tell caller to allocate htab itself */
2230 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2232 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2233 if (ret == -ENOTTY) {
2234 /* At least some versions of PR KVM advertise the
2235 * capability, but don't implement the ioctl(). Oops.
2236 * Return 0 so that we allocate the htab in qemu, as is
2237 * correct for PR. */
2239 } else if (ret < 0) {
2245 /* We have a kernel that predates the htab reset calls. For PR
2246 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2247 * this era, it has allocated a 16MB fixed size hash table already. */
2248 if (kvmppc_is_pr(kvm_state)) {
2249 /* PR - tell caller to allocate htab */
2252 /* HV - assume 16MB kernel allocated htab */
2257 static inline uint32_t mfpvr(void)
2266 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2275 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2277 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2278 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2279 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2281 /* Now fix up the class with information we can query from the host */
2284 alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2285 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2286 alter_insns(&pcc->insns_flags2, PPC2_VSX,
2287 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2288 alter_insns(&pcc->insns_flags2, PPC2_DFP,
2289 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2291 if (dcache_size != -1) {
2292 pcc->l1_dcache_size = dcache_size;
2295 if (icache_size != -1) {
2296 pcc->l1_icache_size = icache_size;
2299 #if defined(TARGET_PPC64)
2300 pcc->radix_page_info = kvm_get_radix_page_info();
2302 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2304 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2305 * compliant. More importantly, advertising ISA 3.00
2306 * architected mode may prevent guests from activating
2307 * necessary DD1 workarounds.
2309 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2310 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2312 #endif /* defined(TARGET_PPC64) */
2315 bool kvmppc_has_cap_epr(void)
2320 bool kvmppc_has_cap_fixup_hcalls(void)
2322 return cap_fixup_hcalls;
2325 bool kvmppc_has_cap_htm(void)
2330 bool kvmppc_has_cap_mmu_radix(void)
2332 return cap_mmu_radix;
2335 bool kvmppc_has_cap_mmu_hash_v3(void)
2337 return cap_mmu_hash_v3;
2340 static bool kvmppc_power8_host(void)
2345 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2346 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2347 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2348 (base_pvr == CPU_POWERPC_POWER8_BASE);
2350 #endif /* TARGET_PPC64 */
2354 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2356 bool l1d_thread_priv_req = !kvmppc_power8_host();
2358 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2360 } else if ((!l1d_thread_priv_req ||
2361 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2362 (c.character & c.character_mask
2363 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2370 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2372 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2374 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2381 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2383 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2384 return SPAPR_CAP_FIXED_CCD;
2385 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2386 return SPAPR_CAP_FIXED_IBS;
2392 static void kvmppc_get_cpu_characteristics(KVMState *s)
2394 struct kvm_ppc_cpu_char c;
2398 cap_ppc_safe_cache = 0;
2399 cap_ppc_safe_bounds_check = 0;
2400 cap_ppc_safe_indirect_branch = 0;
2402 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2406 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2411 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2412 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2413 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2416 int kvmppc_get_cap_safe_cache(void)
2418 return cap_ppc_safe_cache;
2421 int kvmppc_get_cap_safe_bounds_check(void)
2423 return cap_ppc_safe_bounds_check;
2426 int kvmppc_get_cap_safe_indirect_branch(void)
2428 return cap_ppc_safe_indirect_branch;
2431 bool kvmppc_has_cap_nested_kvm_hv(void)
2433 return !!cap_ppc_nested_kvm_hv;
2436 int kvmppc_set_cap_nested_kvm_hv(int enable)
2438 return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2441 bool kvmppc_has_cap_spapr_vfio(void)
2443 return cap_spapr_vfio;
2446 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2448 uint32_t host_pvr = mfpvr();
2449 PowerPCCPUClass *pvr_pcc;
2451 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2452 if (pvr_pcc == NULL) {
2453 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2459 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2461 TypeInfo type_info = {
2462 .name = TYPE_HOST_POWERPC_CPU,
2463 .class_init = kvmppc_host_cpu_class_init,
2465 MachineClass *mc = MACHINE_GET_CLASS(ms);
2466 PowerPCCPUClass *pvr_pcc;
2471 pvr_pcc = kvm_ppc_get_host_cpu_class();
2472 if (pvr_pcc == NULL) {
2475 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2476 type_register(&type_info);
2477 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2478 /* override TCG default cpu type with 'host' cpu model */
2479 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2482 oc = object_class_by_name(type_info.name);
2486 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2487 * we want "POWER8" to be a "family" alias that points to the current
2488 * host CPU type, too)
2490 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2491 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2492 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2495 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2496 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2507 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2509 struct kvm_rtas_token_args args = {
2513 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2517 strncpy(args.name, function, sizeof(args.name));
2519 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2522 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2524 struct kvm_get_htab_fd s = {
2525 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2526 .start_index = index,
2531 error_setg(errp, "KVM version doesn't support %s the HPT",
2532 write ? "writing" : "reading");
2536 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2538 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2539 write ? "writing" : "reading", write ? "to" : "from",
2547 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2549 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2550 uint8_t buf[bufsize];
2554 rc = read(fd, buf, bufsize);
2556 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2560 uint8_t *buffer = buf;
2563 struct kvm_get_htab_header *head =
2564 (struct kvm_get_htab_header *) buffer;
2565 size_t chunksize = sizeof(*head) +
2566 HASH_PTE_SIZE_64 * head->n_valid;
2568 qemu_put_be32(f, head->index);
2569 qemu_put_be16(f, head->n_valid);
2570 qemu_put_be16(f, head->n_invalid);
2571 qemu_put_buffer(f, (void *)(head + 1),
2572 HASH_PTE_SIZE_64 * head->n_valid);
2574 buffer += chunksize;
2580 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2582 return (rc == 0) ? 1 : 0;
2585 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2586 uint16_t n_valid, uint16_t n_invalid)
2588 struct kvm_get_htab_header *buf;
2589 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2592 buf = alloca(chunksize);
2594 buf->n_valid = n_valid;
2595 buf->n_invalid = n_invalid;
2597 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2599 rc = write(fd, buf, chunksize);
2601 fprintf(stderr, "Error writing KVM hash table: %s\n",
2605 if (rc != chunksize) {
2606 /* We should never get a short write on a single chunk */
2607 fprintf(stderr, "Short write, restoring KVM hash table\n");
2613 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2618 void kvm_arch_init_irq_routing(KVMState *s)
2622 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2627 fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2631 struct kvm_get_htab_header *hdr;
2632 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2633 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2635 rc = read(fd, buf, sizeof(buf));
2637 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2640 hdr = (struct kvm_get_htab_header *)buf;
2641 while ((i < n) && ((char *)hdr < (buf + rc))) {
2642 int invalid = hdr->n_invalid, valid = hdr->n_valid;
2644 if (hdr->index != (ptex + i)) {
2645 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2646 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2649 if (n - i < valid) {
2652 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2655 if ((n - i) < invalid) {
2658 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2661 hdr = (struct kvm_get_htab_header *)
2662 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2669 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2673 struct kvm_get_htab_header hdr;
2678 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2680 buf.hdr.n_valid = 1;
2681 buf.hdr.n_invalid = 0;
2682 buf.hdr.index = ptex;
2683 buf.pte0 = cpu_to_be64(pte0);
2684 buf.pte1 = cpu_to_be64(pte1);
2686 rc = write(fd, &buf, sizeof(buf));
2687 if (rc != sizeof(buf)) {
2688 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2693 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2694 uint64_t address, uint32_t data, PCIDevice *dev)
2699 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2700 int vector, PCIDevice *dev)
2705 int kvm_arch_release_virq_post(int virq)
2710 int kvm_arch_msi_data_to_gsi(uint32_t data)
2712 return data & 0xffff;
2715 int kvmppc_enable_hwrng(void)
2717 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2721 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2724 void kvmppc_check_papr_resize_hpt(Error **errp)
2726 if (!kvm_enabled()) {
2727 return; /* No KVM, we're good */
2730 if (cap_resize_hpt) {
2731 return; /* Kernel has explicit support, we're good */
2734 /* Otherwise fallback on looking for PR KVM */
2735 if (kvmppc_is_pr(kvm_state)) {
2740 "Hash page table resizing not available with this KVM version");
2743 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2745 CPUState *cs = CPU(cpu);
2746 struct kvm_ppc_resize_hpt rhpt = {
2751 if (!cap_resize_hpt) {
2755 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2758 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2760 CPUState *cs = CPU(cpu);
2761 struct kvm_ppc_resize_hpt rhpt = {
2766 if (!cap_resize_hpt) {
2770 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2774 * This is a helper function to detect a post migration scenario
2775 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2776 * the guest kernel can't handle a PVR value other than the actual host
2777 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2779 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2780 * (so, we're HV), return true. The workaround itself is done in
2783 * The order here is important: we'll only check for KVM PR as a
2784 * fallback if the guest kernel can't handle the situation itself.
2785 * We need to avoid as much as possible querying the running KVM type
2788 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2790 CPUState *cs = CPU(cpu);
2792 if (!kvm_enabled()) {
2796 if (cap_ppc_pvr_compat) {
2800 return !kvmppc_is_pr(cs->kvm_state);
2803 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2805 CPUState *cs = CPU(cpu);
2807 if (kvm_enabled()) {
2808 kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);