2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
51 #include "sysemu/kvm_int.h"
56 #define DPRINTF(fmt, ...) \
57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
59 #define DPRINTF(fmt, ...) \
63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_ppc_rma;
76 static int cap_spapr_tce;
77 static int cap_spapr_tce_64;
78 static int cap_spapr_multitce;
79 static int cap_spapr_vfio;
81 static int cap_one_reg;
83 static int cap_ppc_watchdog;
85 static int cap_htab_fd;
86 static int cap_fixup_hcalls;
87 static int cap_htm; /* Hardware transactional memory support */
88 static int cap_mmu_radix;
89 static int cap_mmu_hash_v3;
90 static int cap_resize_hpt;
91 static int cap_ppc_pvr_compat;
93 static uint32_t debug_inst_opcode;
95 /* XXX We have a race condition where we actually have a level triggered
96 * interrupt, but the infrastructure can't expose that yet, so the guest
97 * takes but ignores it, goes to sleep and never gets notified that there's
98 * still an interrupt pending.
100 * As a quick workaround, let's just wake up again 20 ms after we injected
101 * an interrupt. That way we can assure that we're always reinjecting
102 * interrupts in case the guest swallowed them.
104 static QEMUTimer *idle_timer;
106 static void kvm_kick_cpu(void *opaque)
108 PowerPCCPU *cpu = opaque;
110 qemu_cpu_kick(CPU(cpu));
113 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
114 * should only be used for fallback tests - generally we should use
115 * explicit capabilities for the features we want, rather than
116 * assuming what is/isn't available depending on the KVM variant. */
117 static bool kvmppc_is_pr(KVMState *ks)
119 /* Assume KVM-PR if the GET_PVINFO capability is available */
120 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
123 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
125 int kvm_arch_init(MachineState *ms, KVMState *s)
127 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
128 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
129 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
130 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
131 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
132 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
133 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
134 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
135 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
136 cap_spapr_vfio = false;
137 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
138 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
139 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
140 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
141 /* Note: we don't set cap_papr here, because this capability is
142 * only activated after this by kvmppc_set_papr() */
143 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
144 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
145 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
146 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
147 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
148 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
149 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
151 * Note: setting it to false because there is not such capability
152 * in KVM at this moment.
154 * TODO: call kvm_vm_check_extension() with the right capability
155 * after the kernel starts implementing it.*/
156 cap_ppc_pvr_compat = false;
158 if (!cap_interrupt_level) {
159 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
160 "VM to stall at times!\n");
163 kvm_ppc_register_host_cpu_type(ms);
168 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
173 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
175 CPUPPCState *cenv = &cpu->env;
176 CPUState *cs = CPU(cpu);
177 struct kvm_sregs sregs;
180 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
181 /* What we're really trying to say is "if we're on BookE, we use
182 the native PVR for now". This is the only sane way to check
183 it though, so we potentially confuse users that they can run
184 BookE guests on BookS. Let's hope nobody dares enough :) */
188 fprintf(stderr, "kvm error: missing PVR setting capability\n");
193 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
198 sregs.pvr = cenv->spr[SPR_PVR];
199 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
202 /* Set up a shared TLB array with KVM */
203 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
205 CPUPPCState *env = &cpu->env;
206 CPUState *cs = CPU(cpu);
207 struct kvm_book3e_206_tlb_params params = {};
208 struct kvm_config_tlb cfg = {};
209 unsigned int entries = 0;
212 if (!kvm_enabled() ||
213 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
219 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
220 params.tlb_sizes[i] = booke206_tlb_size(env, i);
221 params.tlb_ways[i] = booke206_tlb_ways(env, i);
222 entries += params.tlb_sizes[i];
225 assert(entries == env->nb_tlb);
226 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
228 env->tlb_dirty = true;
230 cfg.array = (uintptr_t)env->tlb.tlbm;
231 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
232 cfg.params = (uintptr_t)¶ms;
233 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
235 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
237 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
238 __func__, strerror(-ret));
242 env->kvm_sw_tlb = true;
247 #if defined(TARGET_PPC64)
248 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
249 struct kvm_ppc_smmu_info *info)
251 CPUPPCState *env = &cpu->env;
252 CPUState *cs = CPU(cpu);
254 memset(info, 0, sizeof(*info));
256 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
257 * need to "guess" what the supported page sizes are.
259 * For that to work we make a few assumptions:
261 * - Check whether we are running "PR" KVM which only supports 4K
262 * and 16M pages, but supports them regardless of the backing
263 * store characteritics. We also don't support 1T segments.
265 * This is safe as if HV KVM ever supports that capability or PR
266 * KVM grows supports for more page/segment sizes, those versions
267 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
268 * will not hit this fallback
270 * - Else we are running HV KVM. This means we only support page
271 * sizes that fit in the backing store. Additionally we only
272 * advertize 64K pages if the processor is ARCH 2.06 and we assume
273 * P7 encodings for the SLB and hash table. Here too, we assume
274 * support for any newer processor will mean a kernel that
275 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
278 if (kvmppc_is_pr(cs->kvm_state)) {
283 /* Standard 4k base page size segment */
284 info->sps[0].page_shift = 12;
285 info->sps[0].slb_enc = 0;
286 info->sps[0].enc[0].page_shift = 12;
287 info->sps[0].enc[0].pte_enc = 0;
289 /* Standard 16M large page size segment */
290 info->sps[1].page_shift = 24;
291 info->sps[1].slb_enc = SLB_VSID_L;
292 info->sps[1].enc[0].page_shift = 24;
293 info->sps[1].enc[0].pte_enc = 0;
297 /* HV KVM has backing store size restrictions */
298 info->flags = KVM_PPC_PAGE_SIZES_REAL;
300 if (env->mmu_model & POWERPC_MMU_1TSEG) {
301 info->flags |= KVM_PPC_1T_SEGMENTS;
304 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
305 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
311 /* Standard 4k base page size segment */
312 info->sps[i].page_shift = 12;
313 info->sps[i].slb_enc = 0;
314 info->sps[i].enc[0].page_shift = 12;
315 info->sps[i].enc[0].pte_enc = 0;
318 /* 64K on MMU 2.06 and later */
319 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
320 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
321 info->sps[i].page_shift = 16;
322 info->sps[i].slb_enc = 0x110;
323 info->sps[i].enc[0].page_shift = 16;
324 info->sps[i].enc[0].pte_enc = 1;
328 /* Standard 16M large page size segment */
329 info->sps[i].page_shift = 24;
330 info->sps[i].slb_enc = SLB_VSID_L;
331 info->sps[i].enc[0].page_shift = 24;
332 info->sps[i].enc[0].pte_enc = 0;
336 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
338 CPUState *cs = CPU(cpu);
341 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
342 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
348 kvm_get_fallback_smmu_info(cpu, info);
351 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
353 KVMState *s = KVM_STATE(current_machine->accelerator);
354 struct ppc_radix_page_info *radix_page_info;
355 struct kvm_ppc_rmmu_info rmmu_info;
358 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
361 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
364 radix_page_info = g_malloc0(sizeof(*radix_page_info));
365 radix_page_info->count = 0;
366 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
367 if (rmmu_info.ap_encodings[i]) {
368 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
369 radix_page_info->count++;
372 return radix_page_info;
375 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
376 bool radix, bool gtse,
379 CPUState *cs = CPU(cpu);
382 struct kvm_ppc_mmuv3_cfg cfg = {
383 .process_table = proc_tbl,
387 flags |= KVM_PPC_MMUV3_RADIX;
390 flags |= KVM_PPC_MMUV3_GTSE;
393 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
400 return H_NOT_AVAILABLE;
406 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
408 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
412 return (1ul << shift) <= rampgsize;
415 static long max_cpu_page_size;
417 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
419 static struct kvm_ppc_smmu_info smmu_info;
420 static bool has_smmu_info;
421 CPUPPCState *env = &cpu->env;
423 bool has_64k_pages = false;
425 /* We only handle page sizes for 64-bit server guests for now */
426 if (!(env->mmu_model & POWERPC_MMU_64)) {
430 /* Collect MMU info from kernel if not already */
431 if (!has_smmu_info) {
432 kvm_get_smmu_info(cpu, &smmu_info);
433 has_smmu_info = true;
436 if (!max_cpu_page_size) {
437 max_cpu_page_size = qemu_getrampagesize();
440 /* Convert to QEMU form */
441 memset(&env->sps, 0, sizeof(env->sps));
443 /* If we have HV KVM, we need to forbid CI large pages if our
444 * host page size is smaller than 64K.
446 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
447 env->ci_large_pages = getpagesize() >= 0x10000;
451 * XXX This loop should be an entry wide AND of the capabilities that
452 * the selected CPU has with the capabilities that KVM supports.
454 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
455 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
456 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
458 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
462 qsps->page_shift = ksps->page_shift;
463 qsps->slb_enc = ksps->slb_enc;
464 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
465 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
466 ksps->enc[jk].page_shift)) {
469 if (ksps->enc[jk].page_shift == 16) {
470 has_64k_pages = true;
472 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
473 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
474 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
478 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
482 env->slb_nr = smmu_info.slb_size;
483 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
484 env->mmu_model &= ~POWERPC_MMU_1TSEG;
486 if (!has_64k_pages) {
487 env->mmu_model &= ~POWERPC_MMU_64K;
491 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
493 Object *mem_obj = object_resolve_path(obj_path, NULL);
494 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
498 pagesize = qemu_mempath_getpagesize(mempath);
501 pagesize = getpagesize();
504 return pagesize >= max_cpu_page_size;
507 #else /* defined (TARGET_PPC64) */
509 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
513 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
518 #endif /* !defined (TARGET_PPC64) */
520 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
522 return POWERPC_CPU(cpu)->vcpu_id;
525 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
526 * book3s supports only 1 watchpoint, so array size
527 * of 4 is sufficient for now.
529 #define MAX_HW_BKPTS 4
531 static struct HWBreakpoint {
534 } hw_debug_points[MAX_HW_BKPTS];
536 static CPUWatchpoint hw_watchpoint;
538 /* Default there is no breakpoint and watchpoint supported */
539 static int max_hw_breakpoint;
540 static int max_hw_watchpoint;
541 static int nb_hw_breakpoint;
542 static int nb_hw_watchpoint;
544 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
546 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
547 max_hw_breakpoint = 2;
548 max_hw_watchpoint = 2;
551 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
552 fprintf(stderr, "Error initializing h/w breakpoints\n");
557 int kvm_arch_init_vcpu(CPUState *cs)
559 PowerPCCPU *cpu = POWERPC_CPU(cs);
560 CPUPPCState *cenv = &cpu->env;
563 /* Gather server mmu info from KVM and update the CPU state */
564 kvm_fixup_page_sizes(cpu);
566 /* Synchronize sregs with kvm */
567 ret = kvm_arch_sync_sregs(cpu);
569 if (ret == -EINVAL) {
570 error_report("Register sync failed... If you're using kvm-hv.ko,"
571 " only \"-cpu host\" is possible");
576 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
578 switch (cenv->mmu_model) {
579 case POWERPC_MMU_BOOKE206:
580 /* This target supports access to KVM's guest TLB */
581 ret = kvm_booke206_tlb_init(cpu);
583 case POWERPC_MMU_2_07:
584 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
585 /* KVM-HV has transactional memory on POWER8 also without the
586 * KVM_CAP_PPC_HTM extension, so enable it here instead as
587 * long as it's availble to userspace on the host. */
588 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
597 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
598 kvmppc_hw_debug_points_init(cenv);
603 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
605 CPUPPCState *env = &cpu->env;
606 CPUState *cs = CPU(cpu);
607 struct kvm_dirty_tlb dirty_tlb;
608 unsigned char *bitmap;
611 if (!env->kvm_sw_tlb) {
615 bitmap = g_malloc((env->nb_tlb + 7) / 8);
616 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
618 dirty_tlb.bitmap = (uintptr_t)bitmap;
619 dirty_tlb.num_dirty = env->nb_tlb;
621 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
623 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
624 __func__, strerror(-ret));
630 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
632 PowerPCCPU *cpu = POWERPC_CPU(cs);
633 CPUPPCState *env = &cpu->env;
638 struct kvm_one_reg reg = {
640 .addr = (uintptr_t) &val,
644 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
646 trace_kvm_failed_spr_get(spr, strerror(errno));
648 switch (id & KVM_REG_SIZE_MASK) {
649 case KVM_REG_SIZE_U32:
650 env->spr[spr] = val.u32;
653 case KVM_REG_SIZE_U64:
654 env->spr[spr] = val.u64;
658 /* Don't handle this size yet */
664 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
666 PowerPCCPU *cpu = POWERPC_CPU(cs);
667 CPUPPCState *env = &cpu->env;
672 struct kvm_one_reg reg = {
674 .addr = (uintptr_t) &val,
678 switch (id & KVM_REG_SIZE_MASK) {
679 case KVM_REG_SIZE_U32:
680 val.u32 = env->spr[spr];
683 case KVM_REG_SIZE_U64:
684 val.u64 = env->spr[spr];
688 /* Don't handle this size yet */
692 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
694 trace_kvm_failed_spr_set(spr, strerror(errno));
698 static int kvm_put_fp(CPUState *cs)
700 PowerPCCPU *cpu = POWERPC_CPU(cs);
701 CPUPPCState *env = &cpu->env;
702 struct kvm_one_reg reg;
706 if (env->insns_flags & PPC_FLOAT) {
707 uint64_t fpscr = env->fpscr;
708 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
710 reg.id = KVM_REG_PPC_FPSCR;
711 reg.addr = (uintptr_t)&fpscr;
712 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
714 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
718 for (i = 0; i < 32; i++) {
721 #ifdef HOST_WORDS_BIGENDIAN
722 vsr[0] = float64_val(env->fpr[i]);
723 vsr[1] = env->vsr[i];
725 vsr[0] = env->vsr[i];
726 vsr[1] = float64_val(env->fpr[i]);
728 reg.addr = (uintptr_t) &vsr;
729 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
731 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
733 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
740 if (env->insns_flags & PPC_ALTIVEC) {
741 reg.id = KVM_REG_PPC_VSCR;
742 reg.addr = (uintptr_t)&env->vscr;
743 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
745 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
749 for (i = 0; i < 32; i++) {
750 reg.id = KVM_REG_PPC_VR(i);
751 reg.addr = (uintptr_t)&env->avr[i];
752 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
754 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
763 static int kvm_get_fp(CPUState *cs)
765 PowerPCCPU *cpu = POWERPC_CPU(cs);
766 CPUPPCState *env = &cpu->env;
767 struct kvm_one_reg reg;
771 if (env->insns_flags & PPC_FLOAT) {
773 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
775 reg.id = KVM_REG_PPC_FPSCR;
776 reg.addr = (uintptr_t)&fpscr;
777 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
779 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
785 for (i = 0; i < 32; i++) {
788 reg.addr = (uintptr_t) &vsr;
789 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
791 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
793 DPRINTF("Unable to get %s%d from KVM: %s\n",
794 vsx ? "VSR" : "FPR", i, strerror(errno));
797 #ifdef HOST_WORDS_BIGENDIAN
798 env->fpr[i] = vsr[0];
800 env->vsr[i] = vsr[1];
803 env->fpr[i] = vsr[1];
805 env->vsr[i] = vsr[0];
812 if (env->insns_flags & PPC_ALTIVEC) {
813 reg.id = KVM_REG_PPC_VSCR;
814 reg.addr = (uintptr_t)&env->vscr;
815 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
817 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
821 for (i = 0; i < 32; i++) {
822 reg.id = KVM_REG_PPC_VR(i);
823 reg.addr = (uintptr_t)&env->avr[i];
824 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
826 DPRINTF("Unable to get VR%d from KVM: %s\n",
836 #if defined(TARGET_PPC64)
837 static int kvm_get_vpa(CPUState *cs)
839 PowerPCCPU *cpu = POWERPC_CPU(cs);
840 CPUPPCState *env = &cpu->env;
841 struct kvm_one_reg reg;
844 reg.id = KVM_REG_PPC_VPA_ADDR;
845 reg.addr = (uintptr_t)&env->vpa_addr;
846 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
848 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
852 assert((uintptr_t)&env->slb_shadow_size
853 == ((uintptr_t)&env->slb_shadow_addr + 8));
854 reg.id = KVM_REG_PPC_VPA_SLB;
855 reg.addr = (uintptr_t)&env->slb_shadow_addr;
856 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
858 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
863 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
864 reg.id = KVM_REG_PPC_VPA_DTL;
865 reg.addr = (uintptr_t)&env->dtl_addr;
866 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
868 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
876 static int kvm_put_vpa(CPUState *cs)
878 PowerPCCPU *cpu = POWERPC_CPU(cs);
879 CPUPPCState *env = &cpu->env;
880 struct kvm_one_reg reg;
883 /* SLB shadow or DTL can't be registered unless a master VPA is
884 * registered. That means when restoring state, if a VPA *is*
885 * registered, we need to set that up first. If not, we need to
886 * deregister the others before deregistering the master VPA */
887 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
890 reg.id = KVM_REG_PPC_VPA_ADDR;
891 reg.addr = (uintptr_t)&env->vpa_addr;
892 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
894 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
899 assert((uintptr_t)&env->slb_shadow_size
900 == ((uintptr_t)&env->slb_shadow_addr + 8));
901 reg.id = KVM_REG_PPC_VPA_SLB;
902 reg.addr = (uintptr_t)&env->slb_shadow_addr;
903 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
905 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
909 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
910 reg.id = KVM_REG_PPC_VPA_DTL;
911 reg.addr = (uintptr_t)&env->dtl_addr;
912 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
914 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
919 if (!env->vpa_addr) {
920 reg.id = KVM_REG_PPC_VPA_ADDR;
921 reg.addr = (uintptr_t)&env->vpa_addr;
922 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
924 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
931 #endif /* TARGET_PPC64 */
933 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
935 CPUPPCState *env = &cpu->env;
936 struct kvm_sregs sregs;
939 sregs.pvr = env->spr[SPR_PVR];
942 PPCVirtualHypervisorClass *vhc =
943 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
944 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
946 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
951 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
952 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
953 if (env->slb[i].esid & SLB_ESID_V) {
954 sregs.u.s.ppc64.slb[i].slbe |= i;
956 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
961 for (i = 0; i < 16; i++) {
962 sregs.u.s.ppc32.sr[i] = env->sr[i];
966 for (i = 0; i < 8; i++) {
967 /* Beware. We have to swap upper and lower bits here */
968 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
970 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
974 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
977 int kvm_arch_put_registers(CPUState *cs, int level)
979 PowerPCCPU *cpu = POWERPC_CPU(cs);
980 CPUPPCState *env = &cpu->env;
981 struct kvm_regs regs;
985 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
992 regs.xer = cpu_read_xer(env);
996 regs.srr0 = env->spr[SPR_SRR0];
997 regs.srr1 = env->spr[SPR_SRR1];
999 regs.sprg0 = env->spr[SPR_SPRG0];
1000 regs.sprg1 = env->spr[SPR_SPRG1];
1001 regs.sprg2 = env->spr[SPR_SPRG2];
1002 regs.sprg3 = env->spr[SPR_SPRG3];
1003 regs.sprg4 = env->spr[SPR_SPRG4];
1004 regs.sprg5 = env->spr[SPR_SPRG5];
1005 regs.sprg6 = env->spr[SPR_SPRG6];
1006 regs.sprg7 = env->spr[SPR_SPRG7];
1008 regs.pid = env->spr[SPR_BOOKE_PID];
1010 for (i = 0;i < 32; i++)
1011 regs.gpr[i] = env->gpr[i];
1014 for (i = 0; i < 8; i++) {
1015 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1018 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s);
1024 if (env->tlb_dirty) {
1025 kvm_sw_tlb_put(cpu);
1026 env->tlb_dirty = false;
1029 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1030 ret = kvmppc_put_books_sregs(cpu);
1036 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1037 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1043 /* We deliberately ignore errors here, for kernels which have
1044 * the ONE_REG calls, but don't support the specific
1045 * registers, there's a reasonable chance things will still
1046 * work, at least until we try to migrate. */
1047 for (i = 0; i < 1024; i++) {
1048 uint64_t id = env->spr_cb[i].one_reg_id;
1051 kvm_put_one_spr(cs, id, i);
1057 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1060 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1065 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1067 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1068 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1069 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1070 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1071 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1072 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1076 if (kvm_put_vpa(cs) < 0) {
1077 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1081 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1082 #endif /* TARGET_PPC64 */
1088 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1090 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1093 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1095 CPUPPCState *env = &cpu->env;
1096 struct kvm_sregs sregs;
1099 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1104 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1105 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1106 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1107 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1108 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1109 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1110 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1111 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1112 env->spr[SPR_DECR] = sregs.u.e.dec;
1113 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1114 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1115 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1118 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1119 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1120 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1121 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1122 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1123 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1126 if (sregs.u.e.features & KVM_SREGS_E_64) {
1127 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1130 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1131 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1134 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1135 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1136 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1137 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1138 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1139 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1140 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1141 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1142 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1143 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1144 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1145 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1146 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1147 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1148 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1149 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1150 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1151 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1152 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1153 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1154 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1155 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1156 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1157 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1158 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1159 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1160 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1161 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1162 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1163 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1164 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1165 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1166 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1168 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1169 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1170 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1171 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1172 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1173 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1174 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1177 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1178 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1179 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1182 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1183 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1184 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1185 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1186 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1190 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1191 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1192 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1193 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1194 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1195 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1196 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1197 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1198 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1199 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1200 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1203 if (sregs.u.e.features & KVM_SREGS_EXP) {
1204 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1207 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1208 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1209 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1212 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1213 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1214 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1215 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1217 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1218 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1219 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1226 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1228 CPUPPCState *env = &cpu->env;
1229 struct kvm_sregs sregs;
1233 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1239 ppc_store_sdr1(env, sregs.u.s.sdr1);
1245 * The packed SLB array we get from KVM_GET_SREGS only contains
1246 * information about valid entries. So we flush our internal copy
1247 * to get rid of stale ones, then put all valid SLB entries back
1250 memset(env->slb, 0, sizeof(env->slb));
1251 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1252 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1253 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1255 * Only restore valid entries
1257 if (rb & SLB_ESID_V) {
1258 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1264 for (i = 0; i < 16; i++) {
1265 env->sr[i] = sregs.u.s.ppc32.sr[i];
1269 for (i = 0; i < 8; i++) {
1270 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1271 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1272 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1273 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1279 int kvm_arch_get_registers(CPUState *cs)
1281 PowerPCCPU *cpu = POWERPC_CPU(cs);
1282 CPUPPCState *env = &cpu->env;
1283 struct kvm_regs regs;
1287 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
1292 for (i = 7; i >= 0; i--) {
1293 env->crf[i] = cr & 15;
1297 env->ctr = regs.ctr;
1299 cpu_write_xer(env, regs.xer);
1300 env->msr = regs.msr;
1303 env->spr[SPR_SRR0] = regs.srr0;
1304 env->spr[SPR_SRR1] = regs.srr1;
1306 env->spr[SPR_SPRG0] = regs.sprg0;
1307 env->spr[SPR_SPRG1] = regs.sprg1;
1308 env->spr[SPR_SPRG2] = regs.sprg2;
1309 env->spr[SPR_SPRG3] = regs.sprg3;
1310 env->spr[SPR_SPRG4] = regs.sprg4;
1311 env->spr[SPR_SPRG5] = regs.sprg5;
1312 env->spr[SPR_SPRG6] = regs.sprg6;
1313 env->spr[SPR_SPRG7] = regs.sprg7;
1315 env->spr[SPR_BOOKE_PID] = regs.pid;
1317 for (i = 0;i < 32; i++)
1318 env->gpr[i] = regs.gpr[i];
1322 if (cap_booke_sregs) {
1323 ret = kvmppc_get_booke_sregs(cpu);
1330 ret = kvmppc_get_books_sregs(cpu);
1337 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1343 /* We deliberately ignore errors here, for kernels which have
1344 * the ONE_REG calls, but don't support the specific
1345 * registers, there's a reasonable chance things will still
1346 * work, at least until we try to migrate. */
1347 for (i = 0; i < 1024; i++) {
1348 uint64_t id = env->spr_cb[i].one_reg_id;
1351 kvm_get_one_spr(cs, id, i);
1357 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1360 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1365 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1367 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1368 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1369 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1370 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1371 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1372 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1376 if (kvm_get_vpa(cs) < 0) {
1377 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1381 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1388 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1390 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1392 if (irq != PPC_INTERRUPT_EXT) {
1396 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1400 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1405 #if defined(TARGET_PPCEMB)
1406 #define PPC_INPUT_INT PPC40x_INPUT_INT
1407 #elif defined(TARGET_PPC64)
1408 #define PPC_INPUT_INT PPC970_INPUT_INT
1410 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1413 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1415 PowerPCCPU *cpu = POWERPC_CPU(cs);
1416 CPUPPCState *env = &cpu->env;
1420 qemu_mutex_lock_iothread();
1422 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1423 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1424 if (!cap_interrupt_level &&
1425 run->ready_for_interrupt_injection &&
1426 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1427 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1429 /* For now KVM disregards the 'irq' argument. However, in the
1430 * future KVM could cache it in-kernel to avoid a heavyweight exit
1431 * when reading the UIC.
1433 irq = KVM_INTERRUPT_SET;
1435 DPRINTF("injected interrupt %d\n", irq);
1436 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1438 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1441 /* Always wake up soon in case the interrupt was level based */
1442 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1443 (NANOSECONDS_PER_SECOND / 50));
1446 /* We don't know if there are more interrupts pending after this. However,
1447 * the guest will return to userspace in the course of handling this one
1448 * anyways, so we will get a chance to deliver the rest. */
1450 qemu_mutex_unlock_iothread();
1453 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1455 return MEMTXATTRS_UNSPECIFIED;
1458 int kvm_arch_process_async_events(CPUState *cs)
1463 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1465 CPUState *cs = CPU(cpu);
1466 CPUPPCState *env = &cpu->env;
1468 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1470 cs->exception_index = EXCP_HLT;
1476 /* map dcr access to existing qemu dcr emulation */
1477 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1479 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1480 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1485 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1487 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1488 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1493 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1495 /* Mixed endian case is not handled */
1496 uint32_t sc = debug_inst_opcode;
1498 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1500 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1507 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1511 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1512 sc != debug_inst_opcode ||
1513 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1521 static int find_hw_breakpoint(target_ulong addr, int type)
1525 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1526 <= ARRAY_SIZE(hw_debug_points));
1528 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1529 if (hw_debug_points[n].addr == addr &&
1530 hw_debug_points[n].type == type) {
1538 static int find_hw_watchpoint(target_ulong addr, int *flag)
1542 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1544 *flag = BP_MEM_ACCESS;
1548 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1550 *flag = BP_MEM_WRITE;
1554 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1556 *flag = BP_MEM_READ;
1563 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1564 target_ulong len, int type)
1566 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1570 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1571 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1574 case GDB_BREAKPOINT_HW:
1575 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1579 if (find_hw_breakpoint(addr, type) >= 0) {
1586 case GDB_WATCHPOINT_WRITE:
1587 case GDB_WATCHPOINT_READ:
1588 case GDB_WATCHPOINT_ACCESS:
1589 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1593 if (find_hw_breakpoint(addr, type) >= 0) {
1607 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1608 target_ulong len, int type)
1612 n = find_hw_breakpoint(addr, type);
1618 case GDB_BREAKPOINT_HW:
1622 case GDB_WATCHPOINT_WRITE:
1623 case GDB_WATCHPOINT_READ:
1624 case GDB_WATCHPOINT_ACCESS:
1631 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1636 void kvm_arch_remove_all_hw_breakpoints(void)
1638 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1641 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1645 /* Software Breakpoint updates */
1646 if (kvm_sw_breakpoints_active(cs)) {
1647 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1650 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1651 <= ARRAY_SIZE(hw_debug_points));
1652 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1654 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1655 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1656 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1657 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1658 switch (hw_debug_points[n].type) {
1659 case GDB_BREAKPOINT_HW:
1660 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1662 case GDB_WATCHPOINT_WRITE:
1663 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1665 case GDB_WATCHPOINT_READ:
1666 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1668 case GDB_WATCHPOINT_ACCESS:
1669 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1670 KVMPPC_DEBUG_WATCH_READ;
1673 cpu_abort(cs, "Unsupported breakpoint type\n");
1675 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1680 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1682 CPUState *cs = CPU(cpu);
1683 CPUPPCState *env = &cpu->env;
1684 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1689 if (cs->singlestep_enabled) {
1691 } else if (arch_info->status) {
1692 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1693 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1694 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1698 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1699 KVMPPC_DEBUG_WATCH_WRITE)) {
1700 n = find_hw_watchpoint(arch_info->address, &flag);
1703 cs->watchpoint_hit = &hw_watchpoint;
1704 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1705 hw_watchpoint.flags = flag;
1709 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1712 /* QEMU is not able to handle debug exception, so inject
1713 * program exception to guest;
1714 * Yes program exception NOT debug exception !!
1715 * When QEMU is using debug resources then debug exception must
1716 * be always set. To achieve this we set MSR_DE and also set
1717 * MSRP_DEP so guest cannot change MSR_DE.
1718 * When emulating debug resource for guest we want guest
1719 * to control MSR_DE (enable/disable debug interrupt on need).
1720 * Supporting both configurations are NOT possible.
1721 * So the result is that we cannot share debug resources
1722 * between QEMU and Guest on BOOKE architecture.
1723 * In the current design QEMU gets the priority over guest,
1724 * this means that if QEMU is using debug resources then guest
1726 * For software breakpoint QEMU uses a privileged instruction;
1727 * So there cannot be any reason that we are here for guest
1728 * set debug exception, only possibility is guest executed a
1729 * privileged / illegal instruction and that's why we are
1730 * injecting a program interrupt.
1733 cpu_synchronize_state(cs);
1734 /* env->nip is PC, so increment this by 4 to use
1735 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1738 cs->exception_index = POWERPC_EXCP_PROGRAM;
1739 env->error_code = POWERPC_EXCP_INVAL;
1740 ppc_cpu_do_interrupt(cs);
1746 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1748 PowerPCCPU *cpu = POWERPC_CPU(cs);
1749 CPUPPCState *env = &cpu->env;
1752 qemu_mutex_lock_iothread();
1754 switch (run->exit_reason) {
1756 if (run->dcr.is_write) {
1757 DPRINTF("handle dcr write\n");
1758 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1760 DPRINTF("handle dcr read\n");
1761 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1765 DPRINTF("handle halt\n");
1766 ret = kvmppc_handle_halt(cpu);
1768 #if defined(TARGET_PPC64)
1769 case KVM_EXIT_PAPR_HCALL:
1770 DPRINTF("handle PAPR hypercall\n");
1771 run->papr_hcall.ret = spapr_hypercall(cpu,
1773 run->papr_hcall.args);
1778 DPRINTF("handle epr\n");
1779 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1782 case KVM_EXIT_WATCHDOG:
1783 DPRINTF("handle watchdog expiry\n");
1784 watchdog_perform_action();
1788 case KVM_EXIT_DEBUG:
1789 DPRINTF("handle debug exception\n");
1790 if (kvm_handle_debug(cpu, run)) {
1794 /* re-enter, this exception was guest-internal */
1799 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1804 qemu_mutex_unlock_iothread();
1808 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1810 CPUState *cs = CPU(cpu);
1811 uint32_t bits = tsr_bits;
1812 struct kvm_one_reg reg = {
1813 .id = KVM_REG_PPC_OR_TSR,
1814 .addr = (uintptr_t) &bits,
1817 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1820 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1823 CPUState *cs = CPU(cpu);
1824 uint32_t bits = tsr_bits;
1825 struct kvm_one_reg reg = {
1826 .id = KVM_REG_PPC_CLEAR_TSR,
1827 .addr = (uintptr_t) &bits,
1830 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1833 int kvmppc_set_tcr(PowerPCCPU *cpu)
1835 CPUState *cs = CPU(cpu);
1836 CPUPPCState *env = &cpu->env;
1837 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1839 struct kvm_one_reg reg = {
1840 .id = KVM_REG_PPC_TCR,
1841 .addr = (uintptr_t) &tcr,
1844 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1847 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1849 CPUState *cs = CPU(cpu);
1852 if (!kvm_enabled()) {
1856 if (!cap_ppc_watchdog) {
1857 printf("warning: KVM does not support watchdog");
1861 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1863 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1864 __func__, strerror(-ret));
1871 static int read_cpuinfo(const char *field, char *value, int len)
1875 int field_len = strlen(field);
1878 f = fopen("/proc/cpuinfo", "r");
1884 if (!fgets(line, sizeof(line), f)) {
1887 if (!strncmp(line, field, field_len)) {
1888 pstrcpy(value, len, line);
1899 uint32_t kvmppc_get_tbfreq(void)
1903 uint32_t retval = NANOSECONDS_PER_SECOND;
1905 if (read_cpuinfo("timebase", line, sizeof(line))) {
1909 if (!(ns = strchr(line, ':'))) {
1918 bool kvmppc_get_host_serial(char **value)
1920 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1924 bool kvmppc_get_host_model(char **value)
1926 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1929 /* Try to find a device tree node for a CPU with clock-frequency property */
1930 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1932 struct dirent *dirp;
1935 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1936 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1941 while ((dirp = readdir(dp)) != NULL) {
1943 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1945 f = fopen(buf, "r");
1947 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1954 if (buf[0] == '\0') {
1955 printf("Unknown host!\n");
1962 static uint64_t kvmppc_read_int_dt(const char *filename)
1971 f = fopen(filename, "rb");
1976 len = fread(&u, 1, sizeof(u), f);
1980 /* property is a 32-bit quantity */
1981 return be32_to_cpu(u.v32);
1983 return be64_to_cpu(u.v64);
1989 /* Read a CPU node property from the host device tree that's a single
1990 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1991 * (can't find or open the property, or doesn't understand the
1993 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1995 char buf[PATH_MAX], *tmp;
1998 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2002 tmp = g_strdup_printf("%s/%s", buf, propname);
2003 val = kvmppc_read_int_dt(tmp);
2009 uint64_t kvmppc_get_clockfreq(void)
2011 return kvmppc_read_int_cpu_dt("clock-frequency");
2014 uint32_t kvmppc_get_vmx(void)
2016 return kvmppc_read_int_cpu_dt("ibm,vmx");
2019 uint32_t kvmppc_get_dfp(void)
2021 return kvmppc_read_int_cpu_dt("ibm,dfp");
2024 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2026 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2027 CPUState *cs = CPU(cpu);
2029 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2030 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2037 int kvmppc_get_hasidle(CPUPPCState *env)
2039 struct kvm_ppc_pvinfo pvinfo;
2041 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2042 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2049 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2051 uint32_t *hc = (uint32_t*)buf;
2052 struct kvm_ppc_pvinfo pvinfo;
2054 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2055 memcpy(buf, pvinfo.hcall, buf_len);
2060 * Fallback to always fail hypercalls regardless of endianness:
2062 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2064 * b .+8 (becomes nop in wrong endian)
2065 * bswap32(li r3, -1)
2068 hc[0] = cpu_to_be32(0x08000048);
2069 hc[1] = cpu_to_be32(0x3860ffff);
2070 hc[2] = cpu_to_be32(0x48000008);
2071 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2076 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2078 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2081 void kvmppc_enable_logical_ci_hcalls(void)
2084 * FIXME: it would be nice if we could detect the cases where
2085 * we're using a device which requires the in kernel
2086 * implementation of these hcalls, but the kernel lacks them and
2087 * produce a warning.
2089 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2090 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2093 void kvmppc_enable_set_mode_hcall(void)
2095 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2098 void kvmppc_enable_clear_ref_mod_hcalls(void)
2100 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2101 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2104 void kvmppc_set_papr(PowerPCCPU *cpu)
2106 CPUState *cs = CPU(cpu);
2109 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2111 error_report("This vCPU type or KVM version does not support PAPR");
2115 /* Update the capability flag so we sync the right information
2120 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2122 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2125 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2127 CPUState *cs = CPU(cpu);
2130 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2131 if (ret && mpic_proxy) {
2132 error_report("This KVM version does not support EPR");
2137 int kvmppc_smt_threads(void)
2139 return cap_ppc_smt ? cap_ppc_smt : 1;
2142 int kvmppc_set_smt_threads(int smt)
2146 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2153 void kvmppc_hint_smt_possible(Error **errp)
2159 assert(kvm_enabled());
2160 if (cap_ppc_smt_possible) {
2161 g = g_string_new("Available VSMT modes:");
2162 for (i = 63; i >= 0; i--) {
2163 if ((1UL << i) & cap_ppc_smt_possible) {
2164 g_string_append_printf(g, " %lu", (1UL << i));
2167 s = g_string_free(g, false);
2168 error_append_hint(errp, "%s.\n", s);
2171 error_append_hint(errp,
2172 "This KVM seems to be too old to support VSMT.\n");
2178 off_t kvmppc_alloc_rma(void **rma)
2182 struct kvm_allocate_rma ret;
2184 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2185 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2186 * not necessary on this hardware
2187 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2189 * FIXME: We should allow the user to force contiguous RMA
2190 * allocation in the cap_ppc_rma==1 case.
2192 if (cap_ppc_rma < 2) {
2196 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2198 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2203 size = MIN(ret.rma_size, 256ul << 20);
2205 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2206 if (*rma == MAP_FAILED) {
2207 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2214 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2216 struct kvm_ppc_smmu_info info;
2217 long rampagesize, best_page_shift;
2220 if (cap_ppc_rma >= 2) {
2221 return current_size;
2224 /* Find the largest hardware supported page size that's less than
2225 * or equal to the (logical) backing page size of guest RAM */
2226 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2227 rampagesize = qemu_getrampagesize();
2228 best_page_shift = 0;
2230 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2231 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2233 if (!sps->page_shift) {
2237 if ((sps->page_shift > best_page_shift)
2238 && ((1UL << sps->page_shift) <= rampagesize)) {
2239 best_page_shift = sps->page_shift;
2243 return MIN(current_size,
2244 1ULL << (best_page_shift + hash_shift - 7));
2248 bool kvmppc_spapr_use_multitce(void)
2250 return cap_spapr_multitce;
2253 int kvmppc_spapr_enable_inkernel_multitce(void)
2257 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2258 H_PUT_TCE_INDIRECT, 1);
2260 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2267 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2268 uint64_t bus_offset, uint32_t nb_table,
2269 int *pfd, bool need_vfio)
2275 /* Must set fd to -1 so we don't try to munmap when called for
2276 * destroying the table, which the upper layers -will- do
2279 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2283 if (cap_spapr_tce_64) {
2284 struct kvm_create_spapr_tce_64 args = {
2286 .page_shift = page_shift,
2287 .offset = bus_offset >> page_shift,
2291 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2294 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2298 } else if (cap_spapr_tce) {
2299 uint64_t window_size = (uint64_t) nb_table << page_shift;
2300 struct kvm_create_spapr_tce args = {
2302 .window_size = window_size,
2304 if ((window_size != args.window_size) || bus_offset) {
2307 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2309 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2317 len = nb_table * sizeof(uint64_t);
2318 /* FIXME: round this up to page size */
2320 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2321 if (table == MAP_FAILED) {
2322 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2332 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2340 len = nb_table * sizeof(uint64_t);
2341 if ((munmap(table, len) < 0) ||
2343 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2345 /* Leak the table */
2351 int kvmppc_reset_htab(int shift_hint)
2353 uint32_t shift = shift_hint;
2355 if (!kvm_enabled()) {
2356 /* Full emulation, tell caller to allocate htab itself */
2359 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2361 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2362 if (ret == -ENOTTY) {
2363 /* At least some versions of PR KVM advertise the
2364 * capability, but don't implement the ioctl(). Oops.
2365 * Return 0 so that we allocate the htab in qemu, as is
2366 * correct for PR. */
2368 } else if (ret < 0) {
2374 /* We have a kernel that predates the htab reset calls. For PR
2375 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2376 * this era, it has allocated a 16MB fixed size hash table already. */
2377 if (kvmppc_is_pr(kvm_state)) {
2378 /* PR - tell caller to allocate htab */
2381 /* HV - assume 16MB kernel allocated htab */
2386 static inline uint32_t mfpvr(void)
2395 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2404 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2406 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2407 uint32_t vmx = kvmppc_get_vmx();
2408 uint32_t dfp = kvmppc_get_dfp();
2409 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2410 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2412 /* Now fix up the class with information we can query from the host */
2416 /* Only override when we know what the host supports */
2417 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2418 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2421 /* Only override when we know what the host supports */
2422 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2425 if (dcache_size != -1) {
2426 pcc->l1_dcache_size = dcache_size;
2429 if (icache_size != -1) {
2430 pcc->l1_icache_size = icache_size;
2433 #if defined(TARGET_PPC64)
2434 pcc->radix_page_info = kvm_get_radix_page_info();
2436 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2438 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2439 * compliant. More importantly, advertising ISA 3.00
2440 * architected mode may prevent guests from activating
2441 * necessary DD1 workarounds.
2443 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2444 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2446 #endif /* defined(TARGET_PPC64) */
2449 bool kvmppc_has_cap_epr(void)
2454 bool kvmppc_has_cap_fixup_hcalls(void)
2456 return cap_fixup_hcalls;
2459 bool kvmppc_has_cap_htm(void)
2464 bool kvmppc_has_cap_mmu_radix(void)
2466 return cap_mmu_radix;
2469 bool kvmppc_has_cap_mmu_hash_v3(void)
2471 return cap_mmu_hash_v3;
2474 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2476 uint32_t host_pvr = mfpvr();
2477 PowerPCCPUClass *pvr_pcc;
2479 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2480 if (pvr_pcc == NULL) {
2481 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2487 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2489 TypeInfo type_info = {
2490 .name = TYPE_HOST_POWERPC_CPU,
2491 .class_init = kvmppc_host_cpu_class_init,
2493 MachineClass *mc = MACHINE_GET_CLASS(ms);
2494 PowerPCCPUClass *pvr_pcc;
2499 pvr_pcc = kvm_ppc_get_host_cpu_class();
2500 if (pvr_pcc == NULL) {
2503 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2504 type_register(&type_info);
2505 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2506 /* override TCG default cpu type with 'host' cpu model */
2507 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2510 oc = object_class_by_name(type_info.name);
2514 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2515 * we want "POWER8" to be a "family" alias that points to the current
2516 * host CPU type, too)
2518 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2519 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2520 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2523 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2524 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2535 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2537 struct kvm_rtas_token_args args = {
2541 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2545 strncpy(args.name, function, sizeof(args.name));
2547 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2550 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2552 struct kvm_get_htab_fd s = {
2553 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2554 .start_index = index,
2559 error_setg(errp, "KVM version doesn't support %s the HPT",
2560 write ? "writing" : "reading");
2564 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2566 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2567 write ? "writing" : "reading", write ? "to" : "from",
2575 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2577 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2578 uint8_t buf[bufsize];
2582 rc = read(fd, buf, bufsize);
2584 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2588 uint8_t *buffer = buf;
2591 struct kvm_get_htab_header *head =
2592 (struct kvm_get_htab_header *) buffer;
2593 size_t chunksize = sizeof(*head) +
2594 HASH_PTE_SIZE_64 * head->n_valid;
2596 qemu_put_be32(f, head->index);
2597 qemu_put_be16(f, head->n_valid);
2598 qemu_put_be16(f, head->n_invalid);
2599 qemu_put_buffer(f, (void *)(head + 1),
2600 HASH_PTE_SIZE_64 * head->n_valid);
2602 buffer += chunksize;
2608 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2610 return (rc == 0) ? 1 : 0;
2613 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2614 uint16_t n_valid, uint16_t n_invalid)
2616 struct kvm_get_htab_header *buf;
2617 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2620 buf = alloca(chunksize);
2622 buf->n_valid = n_valid;
2623 buf->n_invalid = n_invalid;
2625 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2627 rc = write(fd, buf, chunksize);
2629 fprintf(stderr, "Error writing KVM hash table: %s\n",
2633 if (rc != chunksize) {
2634 /* We should never get a short write on a single chunk */
2635 fprintf(stderr, "Short write, restoring KVM hash table\n");
2641 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2646 void kvm_arch_init_irq_routing(KVMState *s)
2650 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2655 fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2659 struct kvm_get_htab_header *hdr;
2660 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2661 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2663 rc = read(fd, buf, sizeof(buf));
2665 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2668 hdr = (struct kvm_get_htab_header *)buf;
2669 while ((i < n) && ((char *)hdr < (buf + rc))) {
2670 int invalid = hdr->n_invalid;
2672 if (hdr->index != (ptex + i)) {
2673 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2674 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2677 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2680 if ((n - i) < invalid) {
2683 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2684 i += hdr->n_invalid;
2686 hdr = (struct kvm_get_htab_header *)
2687 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2694 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2698 struct kvm_get_htab_header hdr;
2703 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2705 buf.hdr.n_valid = 1;
2706 buf.hdr.n_invalid = 0;
2707 buf.hdr.index = ptex;
2708 buf.pte0 = cpu_to_be64(pte0);
2709 buf.pte1 = cpu_to_be64(pte1);
2711 rc = write(fd, &buf, sizeof(buf));
2712 if (rc != sizeof(buf)) {
2713 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2718 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2719 uint64_t address, uint32_t data, PCIDevice *dev)
2724 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2725 int vector, PCIDevice *dev)
2730 int kvm_arch_release_virq_post(int virq)
2735 int kvm_arch_msi_data_to_gsi(uint32_t data)
2737 return data & 0xffff;
2740 int kvmppc_enable_hwrng(void)
2742 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2746 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2749 void kvmppc_check_papr_resize_hpt(Error **errp)
2751 if (!kvm_enabled()) {
2752 return; /* No KVM, we're good */
2755 if (cap_resize_hpt) {
2756 return; /* Kernel has explicit support, we're good */
2759 /* Otherwise fallback on looking for PR KVM */
2760 if (kvmppc_is_pr(kvm_state)) {
2765 "Hash page table resizing not available with this KVM version");
2768 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2770 CPUState *cs = CPU(cpu);
2771 struct kvm_ppc_resize_hpt rhpt = {
2776 if (!cap_resize_hpt) {
2780 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2783 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2785 CPUState *cs = CPU(cpu);
2786 struct kvm_ppc_resize_hpt rhpt = {
2791 if (!cap_resize_hpt) {
2795 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2799 * This is a helper function to detect a post migration scenario
2800 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2801 * the guest kernel can't handle a PVR value other than the actual host
2802 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2804 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2805 * (so, we're HV), return true. The workaround itself is done in
2808 * The order here is important: we'll only check for KVM PR as a
2809 * fallback if the guest kernel can't handle the situation itself.
2810 * We need to avoid as much as possible querying the running KVM type
2813 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2815 CPUState *cs = CPU(cpu);
2817 if (!kvm_enabled()) {
2821 if (cap_ppc_pvr_compat) {
2825 return !kvmppc_is_pr(cs->kvm_state);