2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
51 #include "sysemu/kvm_int.h"
56 #define DPRINTF(fmt, ...) \
57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
59 #define DPRINTF(fmt, ...) \
63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
80 static int cap_one_reg;
82 static int cap_ppc_watchdog;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm; /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
95 static uint32_t debug_inst_opcode;
97 /* XXX We have a race condition where we actually have a level triggered
98 * interrupt, but the infrastructure can't expose that yet, so the guest
99 * takes but ignores it, goes to sleep and never gets notified that there's
100 * still an interrupt pending.
102 * As a quick workaround, let's just wake up again 20 ms after we injected
103 * an interrupt. That way we can assure that we're always reinjecting
104 * interrupts in case the guest swallowed them.
106 static QEMUTimer *idle_timer;
108 static void kvm_kick_cpu(void *opaque)
110 PowerPCCPU *cpu = opaque;
112 qemu_cpu_kick(CPU(cpu));
115 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
116 * should only be used for fallback tests - generally we should use
117 * explicit capabilities for the features we want, rather than
118 * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
121 /* Assume KVM-PR if the GET_PVINFO capability is available */
122 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
128 int kvm_arch_init(MachineState *ms, KVMState *s)
130 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143 /* Note: we don't set cap_papr here, because this capability is
144 * only activated after this by kvmppc_set_papr() */
145 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152 kvmppc_get_cpu_characteristics(s);
154 * Note: setting it to false because there is not such capability
155 * in KVM at this moment.
157 * TODO: call kvm_vm_check_extension() with the right capability
158 * after the kernel starts implementing it.*/
159 cap_ppc_pvr_compat = false;
161 if (!cap_interrupt_level) {
162 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163 "VM to stall at times!\n");
166 kvm_ppc_register_host_cpu_type(ms);
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
178 CPUPPCState *cenv = &cpu->env;
179 CPUState *cs = CPU(cpu);
180 struct kvm_sregs sregs;
183 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184 /* What we're really trying to say is "if we're on BookE, we use
185 the native PVR for now". This is the only sane way to check
186 it though, so we potentially confuse users that they can run
187 BookE guests on BookS. Let's hope nobody dares enough :) */
191 fprintf(stderr, "kvm error: missing PVR setting capability\n");
196 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
201 sregs.pvr = cenv->spr[SPR_PVR];
202 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
208 CPUPPCState *env = &cpu->env;
209 CPUState *cs = CPU(cpu);
210 struct kvm_book3e_206_tlb_params params = {};
211 struct kvm_config_tlb cfg = {};
212 unsigned int entries = 0;
215 if (!kvm_enabled() ||
216 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
220 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
222 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223 params.tlb_sizes[i] = booke206_tlb_size(env, i);
224 params.tlb_ways[i] = booke206_tlb_ways(env, i);
225 entries += params.tlb_sizes[i];
228 assert(entries == env->nb_tlb);
229 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
231 env->tlb_dirty = true;
233 cfg.array = (uintptr_t)env->tlb.tlbm;
234 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235 cfg.params = (uintptr_t)¶ms;
236 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
238 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
240 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241 __func__, strerror(-ret));
245 env->kvm_sw_tlb = true;
250 #if defined(TARGET_PPC64)
251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
252 struct kvm_ppc_smmu_info *info)
254 CPUPPCState *env = &cpu->env;
255 CPUState *cs = CPU(cpu);
257 memset(info, 0, sizeof(*info));
259 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
260 * need to "guess" what the supported page sizes are.
262 * For that to work we make a few assumptions:
264 * - Check whether we are running "PR" KVM which only supports 4K
265 * and 16M pages, but supports them regardless of the backing
266 * store characteritics. We also don't support 1T segments.
268 * This is safe as if HV KVM ever supports that capability or PR
269 * KVM grows supports for more page/segment sizes, those versions
270 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
271 * will not hit this fallback
273 * - Else we are running HV KVM. This means we only support page
274 * sizes that fit in the backing store. Additionally we only
275 * advertize 64K pages if the processor is ARCH 2.06 and we assume
276 * P7 encodings for the SLB and hash table. Here too, we assume
277 * support for any newer processor will mean a kernel that
278 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
281 if (kvmppc_is_pr(cs->kvm_state)) {
286 /* Standard 4k base page size segment */
287 info->sps[0].page_shift = 12;
288 info->sps[0].slb_enc = 0;
289 info->sps[0].enc[0].page_shift = 12;
290 info->sps[0].enc[0].pte_enc = 0;
292 /* Standard 16M large page size segment */
293 info->sps[1].page_shift = 24;
294 info->sps[1].slb_enc = SLB_VSID_L;
295 info->sps[1].enc[0].page_shift = 24;
296 info->sps[1].enc[0].pte_enc = 0;
300 /* HV KVM has backing store size restrictions */
301 info->flags = KVM_PPC_PAGE_SIZES_REAL;
303 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
304 info->flags |= KVM_PPC_1T_SEGMENTS;
307 if (env->mmu_model == POWERPC_MMU_2_06 ||
308 env->mmu_model == POWERPC_MMU_2_07) {
314 /* Standard 4k base page size segment */
315 info->sps[i].page_shift = 12;
316 info->sps[i].slb_enc = 0;
317 info->sps[i].enc[0].page_shift = 12;
318 info->sps[i].enc[0].pte_enc = 0;
321 /* 64K on MMU 2.06 and later */
322 if (env->mmu_model == POWERPC_MMU_2_06 ||
323 env->mmu_model == POWERPC_MMU_2_07) {
324 info->sps[i].page_shift = 16;
325 info->sps[i].slb_enc = 0x110;
326 info->sps[i].enc[0].page_shift = 16;
327 info->sps[i].enc[0].pte_enc = 1;
331 /* Standard 16M large page size segment */
332 info->sps[i].page_shift = 24;
333 info->sps[i].slb_enc = SLB_VSID_L;
334 info->sps[i].enc[0].page_shift = 24;
335 info->sps[i].enc[0].pte_enc = 0;
339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
341 CPUState *cs = CPU(cpu);
344 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
345 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
351 kvm_get_fallback_smmu_info(cpu, info);
354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
356 KVMState *s = KVM_STATE(current_machine->accelerator);
357 struct ppc_radix_page_info *radix_page_info;
358 struct kvm_ppc_rmmu_info rmmu_info;
361 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
364 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
367 radix_page_info = g_malloc0(sizeof(*radix_page_info));
368 radix_page_info->count = 0;
369 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
370 if (rmmu_info.ap_encodings[i]) {
371 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
372 radix_page_info->count++;
375 return radix_page_info;
378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
379 bool radix, bool gtse,
382 CPUState *cs = CPU(cpu);
385 struct kvm_ppc_mmuv3_cfg cfg = {
386 .process_table = proc_tbl,
390 flags |= KVM_PPC_MMUV3_RADIX;
393 flags |= KVM_PPC_MMUV3_GTSE;
396 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
403 return H_NOT_AVAILABLE;
409 bool kvmppc_hpt_needs_host_contiguous_pages(void)
411 PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
412 static struct kvm_ppc_smmu_info smmu_info;
414 if (!kvm_enabled()) {
418 kvm_get_smmu_info(cpu, &smmu_info);
419 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
422 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
424 if (!kvmppc_hpt_needs_host_contiguous_pages()) {
428 return (1ul << shift) <= rampgsize;
431 static long max_cpu_page_size;
433 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
435 static struct kvm_ppc_smmu_info smmu_info;
436 static bool has_smmu_info;
437 CPUPPCState *env = &cpu->env;
440 /* We only handle page sizes for 64-bit server guests for now */
441 if (!(env->mmu_model & POWERPC_MMU_64)) {
445 /* Collect MMU info from kernel if not already */
446 if (!has_smmu_info) {
447 kvm_get_smmu_info(cpu, &smmu_info);
448 has_smmu_info = true;
451 if (!max_cpu_page_size) {
452 max_cpu_page_size = qemu_getrampagesize();
455 /* Convert to QEMU form */
456 memset(cpu->hash64_opts->sps, 0, sizeof(*cpu->hash64_opts->sps));
458 /* If we have HV KVM, we need to forbid CI large pages if our
459 * host page size is smaller than 64K.
461 if (kvmppc_hpt_needs_host_contiguous_pages()) {
462 if (getpagesize() >= 0x10000) {
463 cpu->hash64_opts->flags |= PPC_HASH64_CI_LARGEPAGE;
465 cpu->hash64_opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
470 * XXX This loop should be an entry wide AND of the capabilities that
471 * the selected CPU has with the capabilities that KVM supports.
473 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
474 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
475 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
477 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
481 qsps->page_shift = ksps->page_shift;
482 qsps->slb_enc = ksps->slb_enc;
483 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
484 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
485 ksps->enc[jk].page_shift)) {
488 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
489 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
490 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
494 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
498 cpu->hash64_opts->slb_size = smmu_info.slb_size;
499 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
500 cpu->hash64_opts->flags &= ~PPC_HASH64_1TSEG;
503 #else /* defined (TARGET_PPC64) */
505 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
509 #endif /* !defined (TARGET_PPC64) */
511 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
513 return POWERPC_CPU(cpu)->vcpu_id;
516 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
517 * book3s supports only 1 watchpoint, so array size
518 * of 4 is sufficient for now.
520 #define MAX_HW_BKPTS 4
522 static struct HWBreakpoint {
525 } hw_debug_points[MAX_HW_BKPTS];
527 static CPUWatchpoint hw_watchpoint;
529 /* Default there is no breakpoint and watchpoint supported */
530 static int max_hw_breakpoint;
531 static int max_hw_watchpoint;
532 static int nb_hw_breakpoint;
533 static int nb_hw_watchpoint;
535 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
537 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
538 max_hw_breakpoint = 2;
539 max_hw_watchpoint = 2;
542 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
543 fprintf(stderr, "Error initializing h/w breakpoints\n");
548 int kvm_arch_init_vcpu(CPUState *cs)
550 PowerPCCPU *cpu = POWERPC_CPU(cs);
551 CPUPPCState *cenv = &cpu->env;
554 /* Gather server mmu info from KVM and update the CPU state */
555 kvm_fixup_page_sizes(cpu);
557 /* Synchronize sregs with kvm */
558 ret = kvm_arch_sync_sregs(cpu);
560 if (ret == -EINVAL) {
561 error_report("Register sync failed... If you're using kvm-hv.ko,"
562 " only \"-cpu host\" is possible");
567 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
569 switch (cenv->mmu_model) {
570 case POWERPC_MMU_BOOKE206:
571 /* This target supports access to KVM's guest TLB */
572 ret = kvm_booke206_tlb_init(cpu);
574 case POWERPC_MMU_2_07:
575 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
576 /* KVM-HV has transactional memory on POWER8 also without the
577 * KVM_CAP_PPC_HTM extension, so enable it here instead as
578 * long as it's availble to userspace on the host. */
579 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
588 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
589 kvmppc_hw_debug_points_init(cenv);
594 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
596 CPUPPCState *env = &cpu->env;
597 CPUState *cs = CPU(cpu);
598 struct kvm_dirty_tlb dirty_tlb;
599 unsigned char *bitmap;
602 if (!env->kvm_sw_tlb) {
606 bitmap = g_malloc((env->nb_tlb + 7) / 8);
607 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
609 dirty_tlb.bitmap = (uintptr_t)bitmap;
610 dirty_tlb.num_dirty = env->nb_tlb;
612 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
614 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
615 __func__, strerror(-ret));
621 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
623 PowerPCCPU *cpu = POWERPC_CPU(cs);
624 CPUPPCState *env = &cpu->env;
629 struct kvm_one_reg reg = {
631 .addr = (uintptr_t) &val,
635 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
637 trace_kvm_failed_spr_get(spr, strerror(errno));
639 switch (id & KVM_REG_SIZE_MASK) {
640 case KVM_REG_SIZE_U32:
641 env->spr[spr] = val.u32;
644 case KVM_REG_SIZE_U64:
645 env->spr[spr] = val.u64;
649 /* Don't handle this size yet */
655 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
657 PowerPCCPU *cpu = POWERPC_CPU(cs);
658 CPUPPCState *env = &cpu->env;
663 struct kvm_one_reg reg = {
665 .addr = (uintptr_t) &val,
669 switch (id & KVM_REG_SIZE_MASK) {
670 case KVM_REG_SIZE_U32:
671 val.u32 = env->spr[spr];
674 case KVM_REG_SIZE_U64:
675 val.u64 = env->spr[spr];
679 /* Don't handle this size yet */
683 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
685 trace_kvm_failed_spr_set(spr, strerror(errno));
689 static int kvm_put_fp(CPUState *cs)
691 PowerPCCPU *cpu = POWERPC_CPU(cs);
692 CPUPPCState *env = &cpu->env;
693 struct kvm_one_reg reg;
697 if (env->insns_flags & PPC_FLOAT) {
698 uint64_t fpscr = env->fpscr;
699 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
701 reg.id = KVM_REG_PPC_FPSCR;
702 reg.addr = (uintptr_t)&fpscr;
703 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
705 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
709 for (i = 0; i < 32; i++) {
712 #ifdef HOST_WORDS_BIGENDIAN
713 vsr[0] = float64_val(env->fpr[i]);
714 vsr[1] = env->vsr[i];
716 vsr[0] = env->vsr[i];
717 vsr[1] = float64_val(env->fpr[i]);
719 reg.addr = (uintptr_t) &vsr;
720 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
722 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
724 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
731 if (env->insns_flags & PPC_ALTIVEC) {
732 reg.id = KVM_REG_PPC_VSCR;
733 reg.addr = (uintptr_t)&env->vscr;
734 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
736 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
740 for (i = 0; i < 32; i++) {
741 reg.id = KVM_REG_PPC_VR(i);
742 reg.addr = (uintptr_t)&env->avr[i];
743 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
745 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
754 static int kvm_get_fp(CPUState *cs)
756 PowerPCCPU *cpu = POWERPC_CPU(cs);
757 CPUPPCState *env = &cpu->env;
758 struct kvm_one_reg reg;
762 if (env->insns_flags & PPC_FLOAT) {
764 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
766 reg.id = KVM_REG_PPC_FPSCR;
767 reg.addr = (uintptr_t)&fpscr;
768 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
770 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
776 for (i = 0; i < 32; i++) {
779 reg.addr = (uintptr_t) &vsr;
780 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
782 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
784 DPRINTF("Unable to get %s%d from KVM: %s\n",
785 vsx ? "VSR" : "FPR", i, strerror(errno));
788 #ifdef HOST_WORDS_BIGENDIAN
789 env->fpr[i] = vsr[0];
791 env->vsr[i] = vsr[1];
794 env->fpr[i] = vsr[1];
796 env->vsr[i] = vsr[0];
803 if (env->insns_flags & PPC_ALTIVEC) {
804 reg.id = KVM_REG_PPC_VSCR;
805 reg.addr = (uintptr_t)&env->vscr;
806 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
808 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
812 for (i = 0; i < 32; i++) {
813 reg.id = KVM_REG_PPC_VR(i);
814 reg.addr = (uintptr_t)&env->avr[i];
815 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
817 DPRINTF("Unable to get VR%d from KVM: %s\n",
827 #if defined(TARGET_PPC64)
828 static int kvm_get_vpa(CPUState *cs)
830 PowerPCCPU *cpu = POWERPC_CPU(cs);
831 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
832 struct kvm_one_reg reg;
835 reg.id = KVM_REG_PPC_VPA_ADDR;
836 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
837 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
839 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
843 assert((uintptr_t)&spapr_cpu->slb_shadow_size
844 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
845 reg.id = KVM_REG_PPC_VPA_SLB;
846 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
847 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
849 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
854 assert((uintptr_t)&spapr_cpu->dtl_size
855 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
856 reg.id = KVM_REG_PPC_VPA_DTL;
857 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
858 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
860 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
868 static int kvm_put_vpa(CPUState *cs)
870 PowerPCCPU *cpu = POWERPC_CPU(cs);
871 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
872 struct kvm_one_reg reg;
875 /* SLB shadow or DTL can't be registered unless a master VPA is
876 * registered. That means when restoring state, if a VPA *is*
877 * registered, we need to set that up first. If not, we need to
878 * deregister the others before deregistering the master VPA */
879 assert(spapr_cpu->vpa_addr
880 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
882 if (spapr_cpu->vpa_addr) {
883 reg.id = KVM_REG_PPC_VPA_ADDR;
884 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
885 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
887 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
892 assert((uintptr_t)&spapr_cpu->slb_shadow_size
893 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
894 reg.id = KVM_REG_PPC_VPA_SLB;
895 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
896 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
898 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
902 assert((uintptr_t)&spapr_cpu->dtl_size
903 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
904 reg.id = KVM_REG_PPC_VPA_DTL;
905 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
906 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
908 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
913 if (!spapr_cpu->vpa_addr) {
914 reg.id = KVM_REG_PPC_VPA_ADDR;
915 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
916 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
918 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
925 #endif /* TARGET_PPC64 */
927 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
929 CPUPPCState *env = &cpu->env;
930 struct kvm_sregs sregs;
933 sregs.pvr = env->spr[SPR_PVR];
936 PPCVirtualHypervisorClass *vhc =
937 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
938 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
940 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
945 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
946 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
947 if (env->slb[i].esid & SLB_ESID_V) {
948 sregs.u.s.ppc64.slb[i].slbe |= i;
950 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
955 for (i = 0; i < 16; i++) {
956 sregs.u.s.ppc32.sr[i] = env->sr[i];
960 for (i = 0; i < 8; i++) {
961 /* Beware. We have to swap upper and lower bits here */
962 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
964 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
968 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
971 int kvm_arch_put_registers(CPUState *cs, int level)
973 PowerPCCPU *cpu = POWERPC_CPU(cs);
974 CPUPPCState *env = &cpu->env;
975 struct kvm_regs regs;
979 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
986 regs.xer = cpu_read_xer(env);
990 regs.srr0 = env->spr[SPR_SRR0];
991 regs.srr1 = env->spr[SPR_SRR1];
993 regs.sprg0 = env->spr[SPR_SPRG0];
994 regs.sprg1 = env->spr[SPR_SPRG1];
995 regs.sprg2 = env->spr[SPR_SPRG2];
996 regs.sprg3 = env->spr[SPR_SPRG3];
997 regs.sprg4 = env->spr[SPR_SPRG4];
998 regs.sprg5 = env->spr[SPR_SPRG5];
999 regs.sprg6 = env->spr[SPR_SPRG6];
1000 regs.sprg7 = env->spr[SPR_SPRG7];
1002 regs.pid = env->spr[SPR_BOOKE_PID];
1004 for (i = 0;i < 32; i++)
1005 regs.gpr[i] = env->gpr[i];
1008 for (i = 0; i < 8; i++) {
1009 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1012 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s);
1018 if (env->tlb_dirty) {
1019 kvm_sw_tlb_put(cpu);
1020 env->tlb_dirty = false;
1023 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1024 ret = kvmppc_put_books_sregs(cpu);
1030 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1031 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1037 /* We deliberately ignore errors here, for kernels which have
1038 * the ONE_REG calls, but don't support the specific
1039 * registers, there's a reasonable chance things will still
1040 * work, at least until we try to migrate. */
1041 for (i = 0; i < 1024; i++) {
1042 uint64_t id = env->spr_cb[i].one_reg_id;
1045 kvm_put_one_spr(cs, id, i);
1051 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1052 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1054 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1057 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1065 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1070 if (kvm_put_vpa(cs) < 0) {
1071 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1075 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1076 #endif /* TARGET_PPC64 */
1082 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1084 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1087 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1089 CPUPPCState *env = &cpu->env;
1090 struct kvm_sregs sregs;
1093 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1098 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1099 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1100 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1101 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1102 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1103 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1104 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1105 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1106 env->spr[SPR_DECR] = sregs.u.e.dec;
1107 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1108 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1109 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1112 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1113 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1114 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1115 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1116 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1117 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1120 if (sregs.u.e.features & KVM_SREGS_E_64) {
1121 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1124 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1125 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1128 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1129 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1130 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1131 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1132 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1133 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1134 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1135 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1136 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1137 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1138 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1139 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1140 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1141 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1142 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1143 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1144 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1145 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1146 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1147 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1148 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1149 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1150 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1151 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1152 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1153 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1154 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1155 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1156 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1157 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1158 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1159 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1160 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1162 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1163 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1164 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1165 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1166 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1167 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1168 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1171 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1172 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1173 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1176 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1177 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1178 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1179 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1180 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1184 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1185 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1186 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1187 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1188 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1189 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1190 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1191 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1192 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1193 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1194 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1197 if (sregs.u.e.features & KVM_SREGS_EXP) {
1198 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1201 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1202 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1203 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1206 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1207 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1208 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1209 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1211 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1212 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1213 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1220 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1222 CPUPPCState *env = &cpu->env;
1223 struct kvm_sregs sregs;
1227 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1233 ppc_store_sdr1(env, sregs.u.s.sdr1);
1239 * The packed SLB array we get from KVM_GET_SREGS only contains
1240 * information about valid entries. So we flush our internal copy
1241 * to get rid of stale ones, then put all valid SLB entries back
1244 memset(env->slb, 0, sizeof(env->slb));
1245 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1246 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1247 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1249 * Only restore valid entries
1251 if (rb & SLB_ESID_V) {
1252 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1258 for (i = 0; i < 16; i++) {
1259 env->sr[i] = sregs.u.s.ppc32.sr[i];
1263 for (i = 0; i < 8; i++) {
1264 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1265 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1266 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1267 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1273 int kvm_arch_get_registers(CPUState *cs)
1275 PowerPCCPU *cpu = POWERPC_CPU(cs);
1276 CPUPPCState *env = &cpu->env;
1277 struct kvm_regs regs;
1281 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
1286 for (i = 7; i >= 0; i--) {
1287 env->crf[i] = cr & 15;
1291 env->ctr = regs.ctr;
1293 cpu_write_xer(env, regs.xer);
1294 env->msr = regs.msr;
1297 env->spr[SPR_SRR0] = regs.srr0;
1298 env->spr[SPR_SRR1] = regs.srr1;
1300 env->spr[SPR_SPRG0] = regs.sprg0;
1301 env->spr[SPR_SPRG1] = regs.sprg1;
1302 env->spr[SPR_SPRG2] = regs.sprg2;
1303 env->spr[SPR_SPRG3] = regs.sprg3;
1304 env->spr[SPR_SPRG4] = regs.sprg4;
1305 env->spr[SPR_SPRG5] = regs.sprg5;
1306 env->spr[SPR_SPRG6] = regs.sprg6;
1307 env->spr[SPR_SPRG7] = regs.sprg7;
1309 env->spr[SPR_BOOKE_PID] = regs.pid;
1311 for (i = 0;i < 32; i++)
1312 env->gpr[i] = regs.gpr[i];
1316 if (cap_booke_sregs) {
1317 ret = kvmppc_get_booke_sregs(cpu);
1324 ret = kvmppc_get_books_sregs(cpu);
1331 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1337 /* We deliberately ignore errors here, for kernels which have
1338 * the ONE_REG calls, but don't support the specific
1339 * registers, there's a reasonable chance things will still
1340 * work, at least until we try to migrate. */
1341 for (i = 0; i < 1024; i++) {
1342 uint64_t id = env->spr_cb[i].one_reg_id;
1345 kvm_get_one_spr(cs, id, i);
1351 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1352 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1354 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1357 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1365 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1370 if (kvm_get_vpa(cs) < 0) {
1371 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1375 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1382 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1384 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1386 if (irq != PPC_INTERRUPT_EXT) {
1390 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1394 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1399 #if defined(TARGET_PPCEMB)
1400 #define PPC_INPUT_INT PPC40x_INPUT_INT
1401 #elif defined(TARGET_PPC64)
1402 #define PPC_INPUT_INT PPC970_INPUT_INT
1404 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1407 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1409 PowerPCCPU *cpu = POWERPC_CPU(cs);
1410 CPUPPCState *env = &cpu->env;
1414 qemu_mutex_lock_iothread();
1416 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1417 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1418 if (!cap_interrupt_level &&
1419 run->ready_for_interrupt_injection &&
1420 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1421 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1423 /* For now KVM disregards the 'irq' argument. However, in the
1424 * future KVM could cache it in-kernel to avoid a heavyweight exit
1425 * when reading the UIC.
1427 irq = KVM_INTERRUPT_SET;
1429 DPRINTF("injected interrupt %d\n", irq);
1430 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1432 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1435 /* Always wake up soon in case the interrupt was level based */
1436 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1437 (NANOSECONDS_PER_SECOND / 50));
1440 /* We don't know if there are more interrupts pending after this. However,
1441 * the guest will return to userspace in the course of handling this one
1442 * anyways, so we will get a chance to deliver the rest. */
1444 qemu_mutex_unlock_iothread();
1447 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1449 return MEMTXATTRS_UNSPECIFIED;
1452 int kvm_arch_process_async_events(CPUState *cs)
1457 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1459 CPUState *cs = CPU(cpu);
1460 CPUPPCState *env = &cpu->env;
1462 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1464 cs->exception_index = EXCP_HLT;
1470 /* map dcr access to existing qemu dcr emulation */
1471 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1473 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1474 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1479 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1481 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1482 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1487 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1489 /* Mixed endian case is not handled */
1490 uint32_t sc = debug_inst_opcode;
1492 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1494 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1501 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1505 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1506 sc != debug_inst_opcode ||
1507 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1515 static int find_hw_breakpoint(target_ulong addr, int type)
1519 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1520 <= ARRAY_SIZE(hw_debug_points));
1522 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1523 if (hw_debug_points[n].addr == addr &&
1524 hw_debug_points[n].type == type) {
1532 static int find_hw_watchpoint(target_ulong addr, int *flag)
1536 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1538 *flag = BP_MEM_ACCESS;
1542 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1544 *flag = BP_MEM_WRITE;
1548 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1550 *flag = BP_MEM_READ;
1557 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1558 target_ulong len, int type)
1560 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1564 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1565 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1568 case GDB_BREAKPOINT_HW:
1569 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1573 if (find_hw_breakpoint(addr, type) >= 0) {
1580 case GDB_WATCHPOINT_WRITE:
1581 case GDB_WATCHPOINT_READ:
1582 case GDB_WATCHPOINT_ACCESS:
1583 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1587 if (find_hw_breakpoint(addr, type) >= 0) {
1601 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1602 target_ulong len, int type)
1606 n = find_hw_breakpoint(addr, type);
1612 case GDB_BREAKPOINT_HW:
1616 case GDB_WATCHPOINT_WRITE:
1617 case GDB_WATCHPOINT_READ:
1618 case GDB_WATCHPOINT_ACCESS:
1625 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1630 void kvm_arch_remove_all_hw_breakpoints(void)
1632 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1635 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1639 /* Software Breakpoint updates */
1640 if (kvm_sw_breakpoints_active(cs)) {
1641 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1644 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1645 <= ARRAY_SIZE(hw_debug_points));
1646 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1648 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1649 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1650 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1651 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1652 switch (hw_debug_points[n].type) {
1653 case GDB_BREAKPOINT_HW:
1654 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1656 case GDB_WATCHPOINT_WRITE:
1657 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1659 case GDB_WATCHPOINT_READ:
1660 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1662 case GDB_WATCHPOINT_ACCESS:
1663 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1664 KVMPPC_DEBUG_WATCH_READ;
1667 cpu_abort(cs, "Unsupported breakpoint type\n");
1669 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1674 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1676 CPUState *cs = CPU(cpu);
1677 CPUPPCState *env = &cpu->env;
1678 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1683 if (cs->singlestep_enabled) {
1685 } else if (arch_info->status) {
1686 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1687 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1688 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1692 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1693 KVMPPC_DEBUG_WATCH_WRITE)) {
1694 n = find_hw_watchpoint(arch_info->address, &flag);
1697 cs->watchpoint_hit = &hw_watchpoint;
1698 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1699 hw_watchpoint.flags = flag;
1703 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1706 /* QEMU is not able to handle debug exception, so inject
1707 * program exception to guest;
1708 * Yes program exception NOT debug exception !!
1709 * When QEMU is using debug resources then debug exception must
1710 * be always set. To achieve this we set MSR_DE and also set
1711 * MSRP_DEP so guest cannot change MSR_DE.
1712 * When emulating debug resource for guest we want guest
1713 * to control MSR_DE (enable/disable debug interrupt on need).
1714 * Supporting both configurations are NOT possible.
1715 * So the result is that we cannot share debug resources
1716 * between QEMU and Guest on BOOKE architecture.
1717 * In the current design QEMU gets the priority over guest,
1718 * this means that if QEMU is using debug resources then guest
1720 * For software breakpoint QEMU uses a privileged instruction;
1721 * So there cannot be any reason that we are here for guest
1722 * set debug exception, only possibility is guest executed a
1723 * privileged / illegal instruction and that's why we are
1724 * injecting a program interrupt.
1727 cpu_synchronize_state(cs);
1728 /* env->nip is PC, so increment this by 4 to use
1729 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1732 cs->exception_index = POWERPC_EXCP_PROGRAM;
1733 env->error_code = POWERPC_EXCP_INVAL;
1734 ppc_cpu_do_interrupt(cs);
1740 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1742 PowerPCCPU *cpu = POWERPC_CPU(cs);
1743 CPUPPCState *env = &cpu->env;
1746 qemu_mutex_lock_iothread();
1748 switch (run->exit_reason) {
1750 if (run->dcr.is_write) {
1751 DPRINTF("handle dcr write\n");
1752 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1754 DPRINTF("handle dcr read\n");
1755 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1759 DPRINTF("handle halt\n");
1760 ret = kvmppc_handle_halt(cpu);
1762 #if defined(TARGET_PPC64)
1763 case KVM_EXIT_PAPR_HCALL:
1764 DPRINTF("handle PAPR hypercall\n");
1765 run->papr_hcall.ret = spapr_hypercall(cpu,
1767 run->papr_hcall.args);
1772 DPRINTF("handle epr\n");
1773 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1776 case KVM_EXIT_WATCHDOG:
1777 DPRINTF("handle watchdog expiry\n");
1778 watchdog_perform_action();
1782 case KVM_EXIT_DEBUG:
1783 DPRINTF("handle debug exception\n");
1784 if (kvm_handle_debug(cpu, run)) {
1788 /* re-enter, this exception was guest-internal */
1793 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1798 qemu_mutex_unlock_iothread();
1802 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1804 CPUState *cs = CPU(cpu);
1805 uint32_t bits = tsr_bits;
1806 struct kvm_one_reg reg = {
1807 .id = KVM_REG_PPC_OR_TSR,
1808 .addr = (uintptr_t) &bits,
1811 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1814 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1817 CPUState *cs = CPU(cpu);
1818 uint32_t bits = tsr_bits;
1819 struct kvm_one_reg reg = {
1820 .id = KVM_REG_PPC_CLEAR_TSR,
1821 .addr = (uintptr_t) &bits,
1824 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1827 int kvmppc_set_tcr(PowerPCCPU *cpu)
1829 CPUState *cs = CPU(cpu);
1830 CPUPPCState *env = &cpu->env;
1831 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1833 struct kvm_one_reg reg = {
1834 .id = KVM_REG_PPC_TCR,
1835 .addr = (uintptr_t) &tcr,
1838 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1841 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1843 CPUState *cs = CPU(cpu);
1846 if (!kvm_enabled()) {
1850 if (!cap_ppc_watchdog) {
1851 printf("warning: KVM does not support watchdog");
1855 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1857 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1858 __func__, strerror(-ret));
1865 static int read_cpuinfo(const char *field, char *value, int len)
1869 int field_len = strlen(field);
1872 f = fopen("/proc/cpuinfo", "r");
1878 if (!fgets(line, sizeof(line), f)) {
1881 if (!strncmp(line, field, field_len)) {
1882 pstrcpy(value, len, line);
1893 uint32_t kvmppc_get_tbfreq(void)
1897 uint32_t retval = NANOSECONDS_PER_SECOND;
1899 if (read_cpuinfo("timebase", line, sizeof(line))) {
1903 if (!(ns = strchr(line, ':'))) {
1912 bool kvmppc_get_host_serial(char **value)
1914 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1918 bool kvmppc_get_host_model(char **value)
1920 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1923 /* Try to find a device tree node for a CPU with clock-frequency property */
1924 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1926 struct dirent *dirp;
1929 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1930 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1935 while ((dirp = readdir(dp)) != NULL) {
1937 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1939 f = fopen(buf, "r");
1941 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1948 if (buf[0] == '\0') {
1949 printf("Unknown host!\n");
1956 static uint64_t kvmppc_read_int_dt(const char *filename)
1965 f = fopen(filename, "rb");
1970 len = fread(&u, 1, sizeof(u), f);
1974 /* property is a 32-bit quantity */
1975 return be32_to_cpu(u.v32);
1977 return be64_to_cpu(u.v64);
1983 /* Read a CPU node property from the host device tree that's a single
1984 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1985 * (can't find or open the property, or doesn't understand the
1987 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1989 char buf[PATH_MAX], *tmp;
1992 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1996 tmp = g_strdup_printf("%s/%s", buf, propname);
1997 val = kvmppc_read_int_dt(tmp);
2003 uint64_t kvmppc_get_clockfreq(void)
2005 return kvmppc_read_int_cpu_dt("clock-frequency");
2008 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2010 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2011 CPUState *cs = CPU(cpu);
2013 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2014 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2021 int kvmppc_get_hasidle(CPUPPCState *env)
2023 struct kvm_ppc_pvinfo pvinfo;
2025 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2026 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2033 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2035 uint32_t *hc = (uint32_t*)buf;
2036 struct kvm_ppc_pvinfo pvinfo;
2038 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2039 memcpy(buf, pvinfo.hcall, buf_len);
2044 * Fallback to always fail hypercalls regardless of endianness:
2046 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2048 * b .+8 (becomes nop in wrong endian)
2049 * bswap32(li r3, -1)
2052 hc[0] = cpu_to_be32(0x08000048);
2053 hc[1] = cpu_to_be32(0x3860ffff);
2054 hc[2] = cpu_to_be32(0x48000008);
2055 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2060 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2062 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2065 void kvmppc_enable_logical_ci_hcalls(void)
2068 * FIXME: it would be nice if we could detect the cases where
2069 * we're using a device which requires the in kernel
2070 * implementation of these hcalls, but the kernel lacks them and
2071 * produce a warning.
2073 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2074 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2077 void kvmppc_enable_set_mode_hcall(void)
2079 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2082 void kvmppc_enable_clear_ref_mod_hcalls(void)
2084 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2085 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2088 void kvmppc_set_papr(PowerPCCPU *cpu)
2090 CPUState *cs = CPU(cpu);
2093 if (!kvm_enabled()) {
2097 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2099 error_report("This vCPU type or KVM version does not support PAPR");
2103 /* Update the capability flag so we sync the right information
2108 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2110 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2113 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2115 CPUState *cs = CPU(cpu);
2118 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2119 if (ret && mpic_proxy) {
2120 error_report("This KVM version does not support EPR");
2125 int kvmppc_smt_threads(void)
2127 return cap_ppc_smt ? cap_ppc_smt : 1;
2130 int kvmppc_set_smt_threads(int smt)
2134 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2141 void kvmppc_hint_smt_possible(Error **errp)
2147 assert(kvm_enabled());
2148 if (cap_ppc_smt_possible) {
2149 g = g_string_new("Available VSMT modes:");
2150 for (i = 63; i >= 0; i--) {
2151 if ((1UL << i) & cap_ppc_smt_possible) {
2152 g_string_append_printf(g, " %lu", (1UL << i));
2155 s = g_string_free(g, false);
2156 error_append_hint(errp, "%s.\n", s);
2159 error_append_hint(errp,
2160 "This KVM seems to be too old to support VSMT.\n");
2166 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2168 struct kvm_ppc_smmu_info info;
2169 long rampagesize, best_page_shift;
2172 /* Find the largest hardware supported page size that's less than
2173 * or equal to the (logical) backing page size of guest RAM */
2174 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2175 rampagesize = qemu_getrampagesize();
2176 best_page_shift = 0;
2178 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2179 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2181 if (!sps->page_shift) {
2185 if ((sps->page_shift > best_page_shift)
2186 && ((1UL << sps->page_shift) <= rampagesize)) {
2187 best_page_shift = sps->page_shift;
2191 return MIN(current_size,
2192 1ULL << (best_page_shift + hash_shift - 7));
2196 bool kvmppc_spapr_use_multitce(void)
2198 return cap_spapr_multitce;
2201 int kvmppc_spapr_enable_inkernel_multitce(void)
2205 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2206 H_PUT_TCE_INDIRECT, 1);
2208 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2215 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2216 uint64_t bus_offset, uint32_t nb_table,
2217 int *pfd, bool need_vfio)
2223 /* Must set fd to -1 so we don't try to munmap when called for
2224 * destroying the table, which the upper layers -will- do
2227 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2231 if (cap_spapr_tce_64) {
2232 struct kvm_create_spapr_tce_64 args = {
2234 .page_shift = page_shift,
2235 .offset = bus_offset >> page_shift,
2239 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2242 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2246 } else if (cap_spapr_tce) {
2247 uint64_t window_size = (uint64_t) nb_table << page_shift;
2248 struct kvm_create_spapr_tce args = {
2250 .window_size = window_size,
2252 if ((window_size != args.window_size) || bus_offset) {
2255 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2257 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2265 len = nb_table * sizeof(uint64_t);
2266 /* FIXME: round this up to page size */
2268 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2269 if (table == MAP_FAILED) {
2270 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2280 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2288 len = nb_table * sizeof(uint64_t);
2289 if ((munmap(table, len) < 0) ||
2291 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2293 /* Leak the table */
2299 int kvmppc_reset_htab(int shift_hint)
2301 uint32_t shift = shift_hint;
2303 if (!kvm_enabled()) {
2304 /* Full emulation, tell caller to allocate htab itself */
2307 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2309 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2310 if (ret == -ENOTTY) {
2311 /* At least some versions of PR KVM advertise the
2312 * capability, but don't implement the ioctl(). Oops.
2313 * Return 0 so that we allocate the htab in qemu, as is
2314 * correct for PR. */
2316 } else if (ret < 0) {
2322 /* We have a kernel that predates the htab reset calls. For PR
2323 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2324 * this era, it has allocated a 16MB fixed size hash table already. */
2325 if (kvmppc_is_pr(kvm_state)) {
2326 /* PR - tell caller to allocate htab */
2329 /* HV - assume 16MB kernel allocated htab */
2334 static inline uint32_t mfpvr(void)
2343 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2352 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2354 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2355 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2356 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2358 /* Now fix up the class with information we can query from the host */
2361 alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2362 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2363 alter_insns(&pcc->insns_flags2, PPC2_VSX,
2364 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2365 alter_insns(&pcc->insns_flags2, PPC2_DFP,
2366 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2368 if (dcache_size != -1) {
2369 pcc->l1_dcache_size = dcache_size;
2372 if (icache_size != -1) {
2373 pcc->l1_icache_size = icache_size;
2376 #if defined(TARGET_PPC64)
2377 pcc->radix_page_info = kvm_get_radix_page_info();
2379 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2381 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2382 * compliant. More importantly, advertising ISA 3.00
2383 * architected mode may prevent guests from activating
2384 * necessary DD1 workarounds.
2386 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2387 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2389 #endif /* defined(TARGET_PPC64) */
2392 bool kvmppc_has_cap_epr(void)
2397 bool kvmppc_has_cap_fixup_hcalls(void)
2399 return cap_fixup_hcalls;
2402 bool kvmppc_has_cap_htm(void)
2407 bool kvmppc_has_cap_mmu_radix(void)
2409 return cap_mmu_radix;
2412 bool kvmppc_has_cap_mmu_hash_v3(void)
2414 return cap_mmu_hash_v3;
2417 static bool kvmppc_power8_host(void)
2422 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2423 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2424 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2425 (base_pvr == CPU_POWERPC_POWER8_BASE);
2427 #endif /* TARGET_PPC64 */
2431 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2433 bool l1d_thread_priv_req = !kvmppc_power8_host();
2435 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2437 } else if ((!l1d_thread_priv_req ||
2438 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2439 (c.character & c.character_mask
2440 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2447 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2449 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2451 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2458 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2460 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2461 return SPAPR_CAP_FIXED_CCD;
2462 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2463 return SPAPR_CAP_FIXED_IBS;
2469 static void kvmppc_get_cpu_characteristics(KVMState *s)
2471 struct kvm_ppc_cpu_char c;
2475 cap_ppc_safe_cache = 0;
2476 cap_ppc_safe_bounds_check = 0;
2477 cap_ppc_safe_indirect_branch = 0;
2479 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2483 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2488 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2489 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2490 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2493 int kvmppc_get_cap_safe_cache(void)
2495 return cap_ppc_safe_cache;
2498 int kvmppc_get_cap_safe_bounds_check(void)
2500 return cap_ppc_safe_bounds_check;
2503 int kvmppc_get_cap_safe_indirect_branch(void)
2505 return cap_ppc_safe_indirect_branch;
2508 bool kvmppc_has_cap_spapr_vfio(void)
2510 return cap_spapr_vfio;
2513 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2515 uint32_t host_pvr = mfpvr();
2516 PowerPCCPUClass *pvr_pcc;
2518 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2519 if (pvr_pcc == NULL) {
2520 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2526 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2528 TypeInfo type_info = {
2529 .name = TYPE_HOST_POWERPC_CPU,
2530 .class_init = kvmppc_host_cpu_class_init,
2532 MachineClass *mc = MACHINE_GET_CLASS(ms);
2533 PowerPCCPUClass *pvr_pcc;
2538 pvr_pcc = kvm_ppc_get_host_cpu_class();
2539 if (pvr_pcc == NULL) {
2542 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2543 type_register(&type_info);
2544 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2545 /* override TCG default cpu type with 'host' cpu model */
2546 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2549 oc = object_class_by_name(type_info.name);
2553 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2554 * we want "POWER8" to be a "family" alias that points to the current
2555 * host CPU type, too)
2557 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2558 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2559 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2562 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2563 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2574 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2576 struct kvm_rtas_token_args args = {
2580 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2584 strncpy(args.name, function, sizeof(args.name));
2586 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2589 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2591 struct kvm_get_htab_fd s = {
2592 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2593 .start_index = index,
2598 error_setg(errp, "KVM version doesn't support %s the HPT",
2599 write ? "writing" : "reading");
2603 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2605 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2606 write ? "writing" : "reading", write ? "to" : "from",
2614 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2616 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2617 uint8_t buf[bufsize];
2621 rc = read(fd, buf, bufsize);
2623 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2627 uint8_t *buffer = buf;
2630 struct kvm_get_htab_header *head =
2631 (struct kvm_get_htab_header *) buffer;
2632 size_t chunksize = sizeof(*head) +
2633 HASH_PTE_SIZE_64 * head->n_valid;
2635 qemu_put_be32(f, head->index);
2636 qemu_put_be16(f, head->n_valid);
2637 qemu_put_be16(f, head->n_invalid);
2638 qemu_put_buffer(f, (void *)(head + 1),
2639 HASH_PTE_SIZE_64 * head->n_valid);
2641 buffer += chunksize;
2647 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2649 return (rc == 0) ? 1 : 0;
2652 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2653 uint16_t n_valid, uint16_t n_invalid)
2655 struct kvm_get_htab_header *buf;
2656 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2659 buf = alloca(chunksize);
2661 buf->n_valid = n_valid;
2662 buf->n_invalid = n_invalid;
2664 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2666 rc = write(fd, buf, chunksize);
2668 fprintf(stderr, "Error writing KVM hash table: %s\n",
2672 if (rc != chunksize) {
2673 /* We should never get a short write on a single chunk */
2674 fprintf(stderr, "Short write, restoring KVM hash table\n");
2680 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2685 void kvm_arch_init_irq_routing(KVMState *s)
2689 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2694 fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2698 struct kvm_get_htab_header *hdr;
2699 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2700 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2702 rc = read(fd, buf, sizeof(buf));
2704 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2707 hdr = (struct kvm_get_htab_header *)buf;
2708 while ((i < n) && ((char *)hdr < (buf + rc))) {
2709 int invalid = hdr->n_invalid, valid = hdr->n_valid;
2711 if (hdr->index != (ptex + i)) {
2712 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2713 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2716 if (n - i < valid) {
2719 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2722 if ((n - i) < invalid) {
2725 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2728 hdr = (struct kvm_get_htab_header *)
2729 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2736 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2740 struct kvm_get_htab_header hdr;
2745 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2747 buf.hdr.n_valid = 1;
2748 buf.hdr.n_invalid = 0;
2749 buf.hdr.index = ptex;
2750 buf.pte0 = cpu_to_be64(pte0);
2751 buf.pte1 = cpu_to_be64(pte1);
2753 rc = write(fd, &buf, sizeof(buf));
2754 if (rc != sizeof(buf)) {
2755 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2760 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2761 uint64_t address, uint32_t data, PCIDevice *dev)
2766 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2767 int vector, PCIDevice *dev)
2772 int kvm_arch_release_virq_post(int virq)
2777 int kvm_arch_msi_data_to_gsi(uint32_t data)
2779 return data & 0xffff;
2782 int kvmppc_enable_hwrng(void)
2784 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2788 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2791 void kvmppc_check_papr_resize_hpt(Error **errp)
2793 if (!kvm_enabled()) {
2794 return; /* No KVM, we're good */
2797 if (cap_resize_hpt) {
2798 return; /* Kernel has explicit support, we're good */
2801 /* Otherwise fallback on looking for PR KVM */
2802 if (kvmppc_is_pr(kvm_state)) {
2807 "Hash page table resizing not available with this KVM version");
2810 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2812 CPUState *cs = CPU(cpu);
2813 struct kvm_ppc_resize_hpt rhpt = {
2818 if (!cap_resize_hpt) {
2822 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2825 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2827 CPUState *cs = CPU(cpu);
2828 struct kvm_ppc_resize_hpt rhpt = {
2833 if (!cap_resize_hpt) {
2837 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2841 * This is a helper function to detect a post migration scenario
2842 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2843 * the guest kernel can't handle a PVR value other than the actual host
2844 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2846 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2847 * (so, we're HV), return true. The workaround itself is done in
2850 * The order here is important: we'll only check for KVM PR as a
2851 * fallback if the guest kernel can't handle the situation itself.
2852 * We need to avoid as much as possible querying the running KVM type
2855 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2857 CPUState *cs = CPU(cpu);
2859 if (!kvm_enabled()) {
2863 if (cap_ppc_pvr_compat) {
2867 return !kvmppc_is_pr(cs->kvm_state);