2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
51 #include "sysemu/kvm_int.h"
56 #define DPRINTF(fmt, ...) \
57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
59 #define DPRINTF(fmt, ...) \
63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
80 static int cap_one_reg;
82 static int cap_ppc_watchdog;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm; /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
95 static uint32_t debug_inst_opcode;
97 /* XXX We have a race condition where we actually have a level triggered
98 * interrupt, but the infrastructure can't expose that yet, so the guest
99 * takes but ignores it, goes to sleep and never gets notified that there's
100 * still an interrupt pending.
102 * As a quick workaround, let's just wake up again 20 ms after we injected
103 * an interrupt. That way we can assure that we're always reinjecting
104 * interrupts in case the guest swallowed them.
106 static QEMUTimer *idle_timer;
108 static void kvm_kick_cpu(void *opaque)
110 PowerPCCPU *cpu = opaque;
112 qemu_cpu_kick(CPU(cpu));
115 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
116 * should only be used for fallback tests - generally we should use
117 * explicit capabilities for the features we want, rather than
118 * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
121 /* Assume KVM-PR if the GET_PVINFO capability is available */
122 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
128 int kvm_arch_init(MachineState *ms, KVMState *s)
130 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143 /* Note: we don't set cap_papr here, because this capability is
144 * only activated after this by kvmppc_set_papr() */
145 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152 kvmppc_get_cpu_characteristics(s);
154 * Note: setting it to false because there is not such capability
155 * in KVM at this moment.
157 * TODO: call kvm_vm_check_extension() with the right capability
158 * after the kernel starts implementing it.*/
159 cap_ppc_pvr_compat = false;
161 if (!cap_interrupt_level) {
162 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163 "VM to stall at times!\n");
166 kvm_ppc_register_host_cpu_type(ms);
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
178 CPUPPCState *cenv = &cpu->env;
179 CPUState *cs = CPU(cpu);
180 struct kvm_sregs sregs;
183 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184 /* What we're really trying to say is "if we're on BookE, we use
185 the native PVR for now". This is the only sane way to check
186 it though, so we potentially confuse users that they can run
187 BookE guests on BookS. Let's hope nobody dares enough :) */
191 fprintf(stderr, "kvm error: missing PVR setting capability\n");
196 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
201 sregs.pvr = cenv->spr[SPR_PVR];
202 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
208 CPUPPCState *env = &cpu->env;
209 CPUState *cs = CPU(cpu);
210 struct kvm_book3e_206_tlb_params params = {};
211 struct kvm_config_tlb cfg = {};
212 unsigned int entries = 0;
215 if (!kvm_enabled() ||
216 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
220 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
222 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223 params.tlb_sizes[i] = booke206_tlb_size(env, i);
224 params.tlb_ways[i] = booke206_tlb_ways(env, i);
225 entries += params.tlb_sizes[i];
228 assert(entries == env->nb_tlb);
229 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
231 env->tlb_dirty = true;
233 cfg.array = (uintptr_t)env->tlb.tlbm;
234 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235 cfg.params = (uintptr_t)¶ms;
236 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
238 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
240 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241 __func__, strerror(-ret));
245 env->kvm_sw_tlb = true;
250 #if defined(TARGET_PPC64)
251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
252 struct kvm_ppc_smmu_info *info)
254 CPUPPCState *env = &cpu->env;
255 CPUState *cs = CPU(cpu);
257 memset(info, 0, sizeof(*info));
259 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
260 * need to "guess" what the supported page sizes are.
262 * For that to work we make a few assumptions:
264 * - Check whether we are running "PR" KVM which only supports 4K
265 * and 16M pages, but supports them regardless of the backing
266 * store characteritics. We also don't support 1T segments.
268 * This is safe as if HV KVM ever supports that capability or PR
269 * KVM grows supports for more page/segment sizes, those versions
270 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
271 * will not hit this fallback
273 * - Else we are running HV KVM. This means we only support page
274 * sizes that fit in the backing store. Additionally we only
275 * advertize 64K pages if the processor is ARCH 2.06 and we assume
276 * P7 encodings for the SLB and hash table. Here too, we assume
277 * support for any newer processor will mean a kernel that
278 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
281 if (kvmppc_is_pr(cs->kvm_state)) {
286 /* Standard 4k base page size segment */
287 info->sps[0].page_shift = 12;
288 info->sps[0].slb_enc = 0;
289 info->sps[0].enc[0].page_shift = 12;
290 info->sps[0].enc[0].pte_enc = 0;
292 /* Standard 16M large page size segment */
293 info->sps[1].page_shift = 24;
294 info->sps[1].slb_enc = SLB_VSID_L;
295 info->sps[1].enc[0].page_shift = 24;
296 info->sps[1].enc[0].pte_enc = 0;
300 /* HV KVM has backing store size restrictions */
301 info->flags = KVM_PPC_PAGE_SIZES_REAL;
303 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
304 info->flags |= KVM_PPC_1T_SEGMENTS;
307 if (env->mmu_model == POWERPC_MMU_2_06 ||
308 env->mmu_model == POWERPC_MMU_2_07) {
314 /* Standard 4k base page size segment */
315 info->sps[i].page_shift = 12;
316 info->sps[i].slb_enc = 0;
317 info->sps[i].enc[0].page_shift = 12;
318 info->sps[i].enc[0].pte_enc = 0;
321 /* 64K on MMU 2.06 and later */
322 if (env->mmu_model == POWERPC_MMU_2_06 ||
323 env->mmu_model == POWERPC_MMU_2_07) {
324 info->sps[i].page_shift = 16;
325 info->sps[i].slb_enc = 0x110;
326 info->sps[i].enc[0].page_shift = 16;
327 info->sps[i].enc[0].pte_enc = 1;
331 /* Standard 16M large page size segment */
332 info->sps[i].page_shift = 24;
333 info->sps[i].slb_enc = SLB_VSID_L;
334 info->sps[i].enc[0].page_shift = 24;
335 info->sps[i].enc[0].pte_enc = 0;
339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
341 CPUState *cs = CPU(cpu);
344 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
345 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
351 kvm_get_fallback_smmu_info(cpu, info);
354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
356 KVMState *s = KVM_STATE(current_machine->accelerator);
357 struct ppc_radix_page_info *radix_page_info;
358 struct kvm_ppc_rmmu_info rmmu_info;
361 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
364 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
367 radix_page_info = g_malloc0(sizeof(*radix_page_info));
368 radix_page_info->count = 0;
369 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
370 if (rmmu_info.ap_encodings[i]) {
371 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
372 radix_page_info->count++;
375 return radix_page_info;
378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
379 bool radix, bool gtse,
382 CPUState *cs = CPU(cpu);
385 struct kvm_ppc_mmuv3_cfg cfg = {
386 .process_table = proc_tbl,
390 flags |= KVM_PPC_MMUV3_RADIX;
393 flags |= KVM_PPC_MMUV3_GTSE;
396 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
403 return H_NOT_AVAILABLE;
409 bool kvmppc_hpt_needs_host_contiguous_pages(void)
411 PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
412 static struct kvm_ppc_smmu_info smmu_info;
414 if (!kvm_enabled()) {
418 kvm_get_smmu_info(cpu, &smmu_info);
419 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
422 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
424 struct kvm_ppc_smmu_info smmu_info;
427 /* For now, we only have anything to check on hash64 MMUs */
428 if (!cpu->hash64_opts || !kvm_enabled()) {
432 kvm_get_smmu_info(cpu, &smmu_info);
434 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
435 && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
437 "KVM does not support 1TiB segments which guest expects");
441 if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
442 error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
443 smmu_info.slb_size, cpu->hash64_opts->slb_size);
448 * Verify that every pagesize supported by the cpu model is
449 * supported by KVM with the same encodings
451 for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
452 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
453 struct kvm_ppc_one_seg_page_size *ksps;
455 for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
456 if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
460 if (ik >= ARRAY_SIZE(smmu_info.sps)) {
461 error_setg(errp, "KVM doesn't support for base page shift %u",
466 ksps = &smmu_info.sps[ik];
467 if (ksps->slb_enc != qsps->slb_enc) {
469 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
470 ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
474 for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
475 for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
476 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
481 if (jk >= ARRAY_SIZE(ksps->enc)) {
482 error_setg(errp, "KVM doesn't support page shift %u/%u",
483 qsps->enc[jq].page_shift, qsps->page_shift);
486 if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
488 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
489 ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
490 qsps->page_shift, qsps->enc[jq].pte_enc);
496 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
497 /* Mostly what guest pagesizes we can use are related to the
498 * host pages used to map guest RAM, which is handled in the
499 * platform code. Cache-Inhibited largepages (64k) however are
500 * used for I/O, so if they're mapped to the host at all it
501 * will be a normal mapping, not a special hugepage one used
503 if (getpagesize() < 0x10000) {
505 "KVM can't supply 64kiB CI pages, which guest expects");
509 #endif /* !defined (TARGET_PPC64) */
511 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
513 return POWERPC_CPU(cpu)->vcpu_id;
516 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
517 * book3s supports only 1 watchpoint, so array size
518 * of 4 is sufficient for now.
520 #define MAX_HW_BKPTS 4
522 static struct HWBreakpoint {
525 } hw_debug_points[MAX_HW_BKPTS];
527 static CPUWatchpoint hw_watchpoint;
529 /* Default there is no breakpoint and watchpoint supported */
530 static int max_hw_breakpoint;
531 static int max_hw_watchpoint;
532 static int nb_hw_breakpoint;
533 static int nb_hw_watchpoint;
535 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
537 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
538 max_hw_breakpoint = 2;
539 max_hw_watchpoint = 2;
542 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
543 fprintf(stderr, "Error initializing h/w breakpoints\n");
548 int kvm_arch_init_vcpu(CPUState *cs)
550 PowerPCCPU *cpu = POWERPC_CPU(cs);
551 CPUPPCState *cenv = &cpu->env;
554 /* Synchronize sregs with kvm */
555 ret = kvm_arch_sync_sregs(cpu);
557 if (ret == -EINVAL) {
558 error_report("Register sync failed... If you're using kvm-hv.ko,"
559 " only \"-cpu host\" is possible");
564 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
566 switch (cenv->mmu_model) {
567 case POWERPC_MMU_BOOKE206:
568 /* This target supports access to KVM's guest TLB */
569 ret = kvm_booke206_tlb_init(cpu);
571 case POWERPC_MMU_2_07:
572 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
573 /* KVM-HV has transactional memory on POWER8 also without the
574 * KVM_CAP_PPC_HTM extension, so enable it here instead as
575 * long as it's availble to userspace on the host. */
576 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
585 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
586 kvmppc_hw_debug_points_init(cenv);
591 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
593 CPUPPCState *env = &cpu->env;
594 CPUState *cs = CPU(cpu);
595 struct kvm_dirty_tlb dirty_tlb;
596 unsigned char *bitmap;
599 if (!env->kvm_sw_tlb) {
603 bitmap = g_malloc((env->nb_tlb + 7) / 8);
604 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
606 dirty_tlb.bitmap = (uintptr_t)bitmap;
607 dirty_tlb.num_dirty = env->nb_tlb;
609 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
611 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
612 __func__, strerror(-ret));
618 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
620 PowerPCCPU *cpu = POWERPC_CPU(cs);
621 CPUPPCState *env = &cpu->env;
626 struct kvm_one_reg reg = {
628 .addr = (uintptr_t) &val,
632 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
634 trace_kvm_failed_spr_get(spr, strerror(errno));
636 switch (id & KVM_REG_SIZE_MASK) {
637 case KVM_REG_SIZE_U32:
638 env->spr[spr] = val.u32;
641 case KVM_REG_SIZE_U64:
642 env->spr[spr] = val.u64;
646 /* Don't handle this size yet */
652 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
654 PowerPCCPU *cpu = POWERPC_CPU(cs);
655 CPUPPCState *env = &cpu->env;
660 struct kvm_one_reg reg = {
662 .addr = (uintptr_t) &val,
666 switch (id & KVM_REG_SIZE_MASK) {
667 case KVM_REG_SIZE_U32:
668 val.u32 = env->spr[spr];
671 case KVM_REG_SIZE_U64:
672 val.u64 = env->spr[spr];
676 /* Don't handle this size yet */
680 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
682 trace_kvm_failed_spr_set(spr, strerror(errno));
686 static int kvm_put_fp(CPUState *cs)
688 PowerPCCPU *cpu = POWERPC_CPU(cs);
689 CPUPPCState *env = &cpu->env;
690 struct kvm_one_reg reg;
694 if (env->insns_flags & PPC_FLOAT) {
695 uint64_t fpscr = env->fpscr;
696 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
698 reg.id = KVM_REG_PPC_FPSCR;
699 reg.addr = (uintptr_t)&fpscr;
700 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
702 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
706 for (i = 0; i < 32; i++) {
709 #ifdef HOST_WORDS_BIGENDIAN
710 vsr[0] = float64_val(env->fpr[i]);
711 vsr[1] = env->vsr[i];
713 vsr[0] = env->vsr[i];
714 vsr[1] = float64_val(env->fpr[i]);
716 reg.addr = (uintptr_t) &vsr;
717 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
719 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
721 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
728 if (env->insns_flags & PPC_ALTIVEC) {
729 reg.id = KVM_REG_PPC_VSCR;
730 reg.addr = (uintptr_t)&env->vscr;
731 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
733 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
737 for (i = 0; i < 32; i++) {
738 reg.id = KVM_REG_PPC_VR(i);
739 reg.addr = (uintptr_t)&env->avr[i];
740 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
742 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
751 static int kvm_get_fp(CPUState *cs)
753 PowerPCCPU *cpu = POWERPC_CPU(cs);
754 CPUPPCState *env = &cpu->env;
755 struct kvm_one_reg reg;
759 if (env->insns_flags & PPC_FLOAT) {
761 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
763 reg.id = KVM_REG_PPC_FPSCR;
764 reg.addr = (uintptr_t)&fpscr;
765 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
767 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
773 for (i = 0; i < 32; i++) {
776 reg.addr = (uintptr_t) &vsr;
777 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
779 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
781 DPRINTF("Unable to get %s%d from KVM: %s\n",
782 vsx ? "VSR" : "FPR", i, strerror(errno));
785 #ifdef HOST_WORDS_BIGENDIAN
786 env->fpr[i] = vsr[0];
788 env->vsr[i] = vsr[1];
791 env->fpr[i] = vsr[1];
793 env->vsr[i] = vsr[0];
800 if (env->insns_flags & PPC_ALTIVEC) {
801 reg.id = KVM_REG_PPC_VSCR;
802 reg.addr = (uintptr_t)&env->vscr;
803 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
805 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
809 for (i = 0; i < 32; i++) {
810 reg.id = KVM_REG_PPC_VR(i);
811 reg.addr = (uintptr_t)&env->avr[i];
812 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
814 DPRINTF("Unable to get VR%d from KVM: %s\n",
824 #if defined(TARGET_PPC64)
825 static int kvm_get_vpa(CPUState *cs)
827 PowerPCCPU *cpu = POWERPC_CPU(cs);
828 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
829 struct kvm_one_reg reg;
832 reg.id = KVM_REG_PPC_VPA_ADDR;
833 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
834 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
836 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
840 assert((uintptr_t)&spapr_cpu->slb_shadow_size
841 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
842 reg.id = KVM_REG_PPC_VPA_SLB;
843 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
844 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
846 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
851 assert((uintptr_t)&spapr_cpu->dtl_size
852 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
853 reg.id = KVM_REG_PPC_VPA_DTL;
854 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
855 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
857 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
865 static int kvm_put_vpa(CPUState *cs)
867 PowerPCCPU *cpu = POWERPC_CPU(cs);
868 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
869 struct kvm_one_reg reg;
872 /* SLB shadow or DTL can't be registered unless a master VPA is
873 * registered. That means when restoring state, if a VPA *is*
874 * registered, we need to set that up first. If not, we need to
875 * deregister the others before deregistering the master VPA */
876 assert(spapr_cpu->vpa_addr
877 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
879 if (spapr_cpu->vpa_addr) {
880 reg.id = KVM_REG_PPC_VPA_ADDR;
881 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
882 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
884 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
889 assert((uintptr_t)&spapr_cpu->slb_shadow_size
890 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
891 reg.id = KVM_REG_PPC_VPA_SLB;
892 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
893 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
895 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
899 assert((uintptr_t)&spapr_cpu->dtl_size
900 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
901 reg.id = KVM_REG_PPC_VPA_DTL;
902 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
903 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
905 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
910 if (!spapr_cpu->vpa_addr) {
911 reg.id = KVM_REG_PPC_VPA_ADDR;
912 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
913 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
915 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
922 #endif /* TARGET_PPC64 */
924 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
926 CPUPPCState *env = &cpu->env;
927 struct kvm_sregs sregs;
930 sregs.pvr = env->spr[SPR_PVR];
933 PPCVirtualHypervisorClass *vhc =
934 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
935 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
937 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
942 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
943 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
944 if (env->slb[i].esid & SLB_ESID_V) {
945 sregs.u.s.ppc64.slb[i].slbe |= i;
947 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
952 for (i = 0; i < 16; i++) {
953 sregs.u.s.ppc32.sr[i] = env->sr[i];
957 for (i = 0; i < 8; i++) {
958 /* Beware. We have to swap upper and lower bits here */
959 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
961 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
965 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
968 int kvm_arch_put_registers(CPUState *cs, int level)
970 PowerPCCPU *cpu = POWERPC_CPU(cs);
971 CPUPPCState *env = &cpu->env;
972 struct kvm_regs regs;
976 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
983 regs.xer = cpu_read_xer(env);
987 regs.srr0 = env->spr[SPR_SRR0];
988 regs.srr1 = env->spr[SPR_SRR1];
990 regs.sprg0 = env->spr[SPR_SPRG0];
991 regs.sprg1 = env->spr[SPR_SPRG1];
992 regs.sprg2 = env->spr[SPR_SPRG2];
993 regs.sprg3 = env->spr[SPR_SPRG3];
994 regs.sprg4 = env->spr[SPR_SPRG4];
995 regs.sprg5 = env->spr[SPR_SPRG5];
996 regs.sprg6 = env->spr[SPR_SPRG6];
997 regs.sprg7 = env->spr[SPR_SPRG7];
999 regs.pid = env->spr[SPR_BOOKE_PID];
1001 for (i = 0;i < 32; i++)
1002 regs.gpr[i] = env->gpr[i];
1005 for (i = 0; i < 8; i++) {
1006 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1009 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s);
1015 if (env->tlb_dirty) {
1016 kvm_sw_tlb_put(cpu);
1017 env->tlb_dirty = false;
1020 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1021 ret = kvmppc_put_books_sregs(cpu);
1027 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1028 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1034 /* We deliberately ignore errors here, for kernels which have
1035 * the ONE_REG calls, but don't support the specific
1036 * registers, there's a reasonable chance things will still
1037 * work, at least until we try to migrate. */
1038 for (i = 0; i < 1024; i++) {
1039 uint64_t id = env->spr_cb[i].one_reg_id;
1042 kvm_put_one_spr(cs, id, i);
1048 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1049 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1051 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1052 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1054 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1056 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1057 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1067 if (kvm_put_vpa(cs) < 0) {
1068 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1072 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1073 #endif /* TARGET_PPC64 */
1079 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1081 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1084 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1086 CPUPPCState *env = &cpu->env;
1087 struct kvm_sregs sregs;
1090 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1095 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1096 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1097 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1098 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1099 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1100 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1101 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1102 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1103 env->spr[SPR_DECR] = sregs.u.e.dec;
1104 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1105 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1106 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1109 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1110 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1111 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1112 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1113 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1114 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1117 if (sregs.u.e.features & KVM_SREGS_E_64) {
1118 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1121 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1122 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1125 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1126 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1127 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1128 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1129 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1130 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1131 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1132 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1133 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1134 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1135 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1136 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1137 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1138 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1139 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1140 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1141 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1142 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1143 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1144 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1145 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1146 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1147 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1148 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1149 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1150 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1151 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1152 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1153 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1154 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1155 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1156 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1157 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1159 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1160 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1161 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1162 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1163 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1164 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1165 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1168 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1169 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1170 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1173 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1174 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1175 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1176 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1177 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1181 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1182 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1183 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1184 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1185 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1186 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1187 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1188 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1189 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1190 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1191 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1194 if (sregs.u.e.features & KVM_SREGS_EXP) {
1195 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1198 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1199 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1200 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1203 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1204 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1205 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1206 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1208 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1209 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1210 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1217 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1219 CPUPPCState *env = &cpu->env;
1220 struct kvm_sregs sregs;
1224 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1230 ppc_store_sdr1(env, sregs.u.s.sdr1);
1236 * The packed SLB array we get from KVM_GET_SREGS only contains
1237 * information about valid entries. So we flush our internal copy
1238 * to get rid of stale ones, then put all valid SLB entries back
1241 memset(env->slb, 0, sizeof(env->slb));
1242 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1243 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1244 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1246 * Only restore valid entries
1248 if (rb & SLB_ESID_V) {
1249 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1255 for (i = 0; i < 16; i++) {
1256 env->sr[i] = sregs.u.s.ppc32.sr[i];
1260 for (i = 0; i < 8; i++) {
1261 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1262 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1263 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1264 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1270 int kvm_arch_get_registers(CPUState *cs)
1272 PowerPCCPU *cpu = POWERPC_CPU(cs);
1273 CPUPPCState *env = &cpu->env;
1274 struct kvm_regs regs;
1278 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
1283 for (i = 7; i >= 0; i--) {
1284 env->crf[i] = cr & 15;
1288 env->ctr = regs.ctr;
1290 cpu_write_xer(env, regs.xer);
1291 env->msr = regs.msr;
1294 env->spr[SPR_SRR0] = regs.srr0;
1295 env->spr[SPR_SRR1] = regs.srr1;
1297 env->spr[SPR_SPRG0] = regs.sprg0;
1298 env->spr[SPR_SPRG1] = regs.sprg1;
1299 env->spr[SPR_SPRG2] = regs.sprg2;
1300 env->spr[SPR_SPRG3] = regs.sprg3;
1301 env->spr[SPR_SPRG4] = regs.sprg4;
1302 env->spr[SPR_SPRG5] = regs.sprg5;
1303 env->spr[SPR_SPRG6] = regs.sprg6;
1304 env->spr[SPR_SPRG7] = regs.sprg7;
1306 env->spr[SPR_BOOKE_PID] = regs.pid;
1308 for (i = 0;i < 32; i++)
1309 env->gpr[i] = regs.gpr[i];
1313 if (cap_booke_sregs) {
1314 ret = kvmppc_get_booke_sregs(cpu);
1321 ret = kvmppc_get_books_sregs(cpu);
1328 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1334 /* We deliberately ignore errors here, for kernels which have
1335 * the ONE_REG calls, but don't support the specific
1336 * registers, there's a reasonable chance things will still
1337 * work, at least until we try to migrate. */
1338 for (i = 0; i < 1024; i++) {
1339 uint64_t id = env->spr_cb[i].one_reg_id;
1342 kvm_get_one_spr(cs, id, i);
1348 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1349 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1351 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1352 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1354 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1356 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1357 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1367 if (kvm_get_vpa(cs) < 0) {
1368 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1372 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1379 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1381 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1383 if (irq != PPC_INTERRUPT_EXT) {
1387 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1391 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1396 #if defined(TARGET_PPCEMB)
1397 #define PPC_INPUT_INT PPC40x_INPUT_INT
1398 #elif defined(TARGET_PPC64)
1399 #define PPC_INPUT_INT PPC970_INPUT_INT
1401 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1404 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1406 PowerPCCPU *cpu = POWERPC_CPU(cs);
1407 CPUPPCState *env = &cpu->env;
1411 qemu_mutex_lock_iothread();
1413 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1414 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1415 if (!cap_interrupt_level &&
1416 run->ready_for_interrupt_injection &&
1417 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1418 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1420 /* For now KVM disregards the 'irq' argument. However, in the
1421 * future KVM could cache it in-kernel to avoid a heavyweight exit
1422 * when reading the UIC.
1424 irq = KVM_INTERRUPT_SET;
1426 DPRINTF("injected interrupt %d\n", irq);
1427 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1429 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1432 /* Always wake up soon in case the interrupt was level based */
1433 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1434 (NANOSECONDS_PER_SECOND / 50));
1437 /* We don't know if there are more interrupts pending after this. However,
1438 * the guest will return to userspace in the course of handling this one
1439 * anyways, so we will get a chance to deliver the rest. */
1441 qemu_mutex_unlock_iothread();
1444 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1446 return MEMTXATTRS_UNSPECIFIED;
1449 int kvm_arch_process_async_events(CPUState *cs)
1454 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1456 CPUState *cs = CPU(cpu);
1457 CPUPPCState *env = &cpu->env;
1459 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1461 cs->exception_index = EXCP_HLT;
1467 /* map dcr access to existing qemu dcr emulation */
1468 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1470 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1471 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1476 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1478 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1479 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1484 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1486 /* Mixed endian case is not handled */
1487 uint32_t sc = debug_inst_opcode;
1489 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1491 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1498 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1502 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1503 sc != debug_inst_opcode ||
1504 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1512 static int find_hw_breakpoint(target_ulong addr, int type)
1516 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1517 <= ARRAY_SIZE(hw_debug_points));
1519 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1520 if (hw_debug_points[n].addr == addr &&
1521 hw_debug_points[n].type == type) {
1529 static int find_hw_watchpoint(target_ulong addr, int *flag)
1533 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1535 *flag = BP_MEM_ACCESS;
1539 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1541 *flag = BP_MEM_WRITE;
1545 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1547 *flag = BP_MEM_READ;
1554 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1555 target_ulong len, int type)
1557 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1561 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1562 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1565 case GDB_BREAKPOINT_HW:
1566 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1570 if (find_hw_breakpoint(addr, type) >= 0) {
1577 case GDB_WATCHPOINT_WRITE:
1578 case GDB_WATCHPOINT_READ:
1579 case GDB_WATCHPOINT_ACCESS:
1580 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1584 if (find_hw_breakpoint(addr, type) >= 0) {
1598 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1599 target_ulong len, int type)
1603 n = find_hw_breakpoint(addr, type);
1609 case GDB_BREAKPOINT_HW:
1613 case GDB_WATCHPOINT_WRITE:
1614 case GDB_WATCHPOINT_READ:
1615 case GDB_WATCHPOINT_ACCESS:
1622 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1627 void kvm_arch_remove_all_hw_breakpoints(void)
1629 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1632 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1636 /* Software Breakpoint updates */
1637 if (kvm_sw_breakpoints_active(cs)) {
1638 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1641 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1642 <= ARRAY_SIZE(hw_debug_points));
1643 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1645 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1646 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1647 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1648 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1649 switch (hw_debug_points[n].type) {
1650 case GDB_BREAKPOINT_HW:
1651 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1653 case GDB_WATCHPOINT_WRITE:
1654 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1656 case GDB_WATCHPOINT_READ:
1657 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1659 case GDB_WATCHPOINT_ACCESS:
1660 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1661 KVMPPC_DEBUG_WATCH_READ;
1664 cpu_abort(cs, "Unsupported breakpoint type\n");
1666 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1671 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1673 CPUState *cs = CPU(cpu);
1674 CPUPPCState *env = &cpu->env;
1675 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1680 if (cs->singlestep_enabled) {
1682 } else if (arch_info->status) {
1683 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1684 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1685 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1689 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1690 KVMPPC_DEBUG_WATCH_WRITE)) {
1691 n = find_hw_watchpoint(arch_info->address, &flag);
1694 cs->watchpoint_hit = &hw_watchpoint;
1695 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1696 hw_watchpoint.flags = flag;
1700 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1703 /* QEMU is not able to handle debug exception, so inject
1704 * program exception to guest;
1705 * Yes program exception NOT debug exception !!
1706 * When QEMU is using debug resources then debug exception must
1707 * be always set. To achieve this we set MSR_DE and also set
1708 * MSRP_DEP so guest cannot change MSR_DE.
1709 * When emulating debug resource for guest we want guest
1710 * to control MSR_DE (enable/disable debug interrupt on need).
1711 * Supporting both configurations are NOT possible.
1712 * So the result is that we cannot share debug resources
1713 * between QEMU and Guest on BOOKE architecture.
1714 * In the current design QEMU gets the priority over guest,
1715 * this means that if QEMU is using debug resources then guest
1717 * For software breakpoint QEMU uses a privileged instruction;
1718 * So there cannot be any reason that we are here for guest
1719 * set debug exception, only possibility is guest executed a
1720 * privileged / illegal instruction and that's why we are
1721 * injecting a program interrupt.
1724 cpu_synchronize_state(cs);
1725 /* env->nip is PC, so increment this by 4 to use
1726 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1729 cs->exception_index = POWERPC_EXCP_PROGRAM;
1730 env->error_code = POWERPC_EXCP_INVAL;
1731 ppc_cpu_do_interrupt(cs);
1737 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1739 PowerPCCPU *cpu = POWERPC_CPU(cs);
1740 CPUPPCState *env = &cpu->env;
1743 qemu_mutex_lock_iothread();
1745 switch (run->exit_reason) {
1747 if (run->dcr.is_write) {
1748 DPRINTF("handle dcr write\n");
1749 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1751 DPRINTF("handle dcr read\n");
1752 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1756 DPRINTF("handle halt\n");
1757 ret = kvmppc_handle_halt(cpu);
1759 #if defined(TARGET_PPC64)
1760 case KVM_EXIT_PAPR_HCALL:
1761 DPRINTF("handle PAPR hypercall\n");
1762 run->papr_hcall.ret = spapr_hypercall(cpu,
1764 run->papr_hcall.args);
1769 DPRINTF("handle epr\n");
1770 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1773 case KVM_EXIT_WATCHDOG:
1774 DPRINTF("handle watchdog expiry\n");
1775 watchdog_perform_action();
1779 case KVM_EXIT_DEBUG:
1780 DPRINTF("handle debug exception\n");
1781 if (kvm_handle_debug(cpu, run)) {
1785 /* re-enter, this exception was guest-internal */
1790 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1795 qemu_mutex_unlock_iothread();
1799 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1801 CPUState *cs = CPU(cpu);
1802 uint32_t bits = tsr_bits;
1803 struct kvm_one_reg reg = {
1804 .id = KVM_REG_PPC_OR_TSR,
1805 .addr = (uintptr_t) &bits,
1808 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1811 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1814 CPUState *cs = CPU(cpu);
1815 uint32_t bits = tsr_bits;
1816 struct kvm_one_reg reg = {
1817 .id = KVM_REG_PPC_CLEAR_TSR,
1818 .addr = (uintptr_t) &bits,
1821 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1824 int kvmppc_set_tcr(PowerPCCPU *cpu)
1826 CPUState *cs = CPU(cpu);
1827 CPUPPCState *env = &cpu->env;
1828 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1830 struct kvm_one_reg reg = {
1831 .id = KVM_REG_PPC_TCR,
1832 .addr = (uintptr_t) &tcr,
1835 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1838 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1840 CPUState *cs = CPU(cpu);
1843 if (!kvm_enabled()) {
1847 if (!cap_ppc_watchdog) {
1848 printf("warning: KVM does not support watchdog");
1852 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1854 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1855 __func__, strerror(-ret));
1862 static int read_cpuinfo(const char *field, char *value, int len)
1866 int field_len = strlen(field);
1869 f = fopen("/proc/cpuinfo", "r");
1875 if (!fgets(line, sizeof(line), f)) {
1878 if (!strncmp(line, field, field_len)) {
1879 pstrcpy(value, len, line);
1890 uint32_t kvmppc_get_tbfreq(void)
1894 uint32_t retval = NANOSECONDS_PER_SECOND;
1896 if (read_cpuinfo("timebase", line, sizeof(line))) {
1900 if (!(ns = strchr(line, ':'))) {
1909 bool kvmppc_get_host_serial(char **value)
1911 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1915 bool kvmppc_get_host_model(char **value)
1917 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1920 /* Try to find a device tree node for a CPU with clock-frequency property */
1921 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1923 struct dirent *dirp;
1926 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1927 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1932 while ((dirp = readdir(dp)) != NULL) {
1934 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1936 f = fopen(buf, "r");
1938 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1945 if (buf[0] == '\0') {
1946 printf("Unknown host!\n");
1953 static uint64_t kvmppc_read_int_dt(const char *filename)
1962 f = fopen(filename, "rb");
1967 len = fread(&u, 1, sizeof(u), f);
1971 /* property is a 32-bit quantity */
1972 return be32_to_cpu(u.v32);
1974 return be64_to_cpu(u.v64);
1980 /* Read a CPU node property from the host device tree that's a single
1981 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1982 * (can't find or open the property, or doesn't understand the
1984 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1986 char buf[PATH_MAX], *tmp;
1989 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1993 tmp = g_strdup_printf("%s/%s", buf, propname);
1994 val = kvmppc_read_int_dt(tmp);
2000 uint64_t kvmppc_get_clockfreq(void)
2002 return kvmppc_read_int_cpu_dt("clock-frequency");
2005 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2007 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2008 CPUState *cs = CPU(cpu);
2010 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2011 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2018 int kvmppc_get_hasidle(CPUPPCState *env)
2020 struct kvm_ppc_pvinfo pvinfo;
2022 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2023 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2030 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2032 uint32_t *hc = (uint32_t*)buf;
2033 struct kvm_ppc_pvinfo pvinfo;
2035 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2036 memcpy(buf, pvinfo.hcall, buf_len);
2041 * Fallback to always fail hypercalls regardless of endianness:
2043 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2045 * b .+8 (becomes nop in wrong endian)
2046 * bswap32(li r3, -1)
2049 hc[0] = cpu_to_be32(0x08000048);
2050 hc[1] = cpu_to_be32(0x3860ffff);
2051 hc[2] = cpu_to_be32(0x48000008);
2052 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2057 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2059 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2062 void kvmppc_enable_logical_ci_hcalls(void)
2065 * FIXME: it would be nice if we could detect the cases where
2066 * we're using a device which requires the in kernel
2067 * implementation of these hcalls, but the kernel lacks them and
2068 * produce a warning.
2070 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2071 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2074 void kvmppc_enable_set_mode_hcall(void)
2076 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2079 void kvmppc_enable_clear_ref_mod_hcalls(void)
2081 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2082 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2085 void kvmppc_set_papr(PowerPCCPU *cpu)
2087 CPUState *cs = CPU(cpu);
2090 if (!kvm_enabled()) {
2094 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2096 error_report("This vCPU type or KVM version does not support PAPR");
2100 /* Update the capability flag so we sync the right information
2105 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2107 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2110 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2112 CPUState *cs = CPU(cpu);
2115 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2116 if (ret && mpic_proxy) {
2117 error_report("This KVM version does not support EPR");
2122 int kvmppc_smt_threads(void)
2124 return cap_ppc_smt ? cap_ppc_smt : 1;
2127 int kvmppc_set_smt_threads(int smt)
2131 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2138 void kvmppc_hint_smt_possible(Error **errp)
2144 assert(kvm_enabled());
2145 if (cap_ppc_smt_possible) {
2146 g = g_string_new("Available VSMT modes:");
2147 for (i = 63; i >= 0; i--) {
2148 if ((1UL << i) & cap_ppc_smt_possible) {
2149 g_string_append_printf(g, " %lu", (1UL << i));
2152 s = g_string_free(g, false);
2153 error_append_hint(errp, "%s.\n", s);
2156 error_append_hint(errp,
2157 "This KVM seems to be too old to support VSMT.\n");
2163 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2165 struct kvm_ppc_smmu_info info;
2166 long rampagesize, best_page_shift;
2169 /* Find the largest hardware supported page size that's less than
2170 * or equal to the (logical) backing page size of guest RAM */
2171 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2172 rampagesize = qemu_getrampagesize();
2173 best_page_shift = 0;
2175 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2176 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2178 if (!sps->page_shift) {
2182 if ((sps->page_shift > best_page_shift)
2183 && ((1UL << sps->page_shift) <= rampagesize)) {
2184 best_page_shift = sps->page_shift;
2188 return MIN(current_size,
2189 1ULL << (best_page_shift + hash_shift - 7));
2193 bool kvmppc_spapr_use_multitce(void)
2195 return cap_spapr_multitce;
2198 int kvmppc_spapr_enable_inkernel_multitce(void)
2202 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2203 H_PUT_TCE_INDIRECT, 1);
2205 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2212 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2213 uint64_t bus_offset, uint32_t nb_table,
2214 int *pfd, bool need_vfio)
2220 /* Must set fd to -1 so we don't try to munmap when called for
2221 * destroying the table, which the upper layers -will- do
2224 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2228 if (cap_spapr_tce_64) {
2229 struct kvm_create_spapr_tce_64 args = {
2231 .page_shift = page_shift,
2232 .offset = bus_offset >> page_shift,
2236 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2239 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2243 } else if (cap_spapr_tce) {
2244 uint64_t window_size = (uint64_t) nb_table << page_shift;
2245 struct kvm_create_spapr_tce args = {
2247 .window_size = window_size,
2249 if ((window_size != args.window_size) || bus_offset) {
2252 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2254 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2262 len = nb_table * sizeof(uint64_t);
2263 /* FIXME: round this up to page size */
2265 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2266 if (table == MAP_FAILED) {
2267 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2277 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2285 len = nb_table * sizeof(uint64_t);
2286 if ((munmap(table, len) < 0) ||
2288 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2290 /* Leak the table */
2296 int kvmppc_reset_htab(int shift_hint)
2298 uint32_t shift = shift_hint;
2300 if (!kvm_enabled()) {
2301 /* Full emulation, tell caller to allocate htab itself */
2304 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2306 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2307 if (ret == -ENOTTY) {
2308 /* At least some versions of PR KVM advertise the
2309 * capability, but don't implement the ioctl(). Oops.
2310 * Return 0 so that we allocate the htab in qemu, as is
2311 * correct for PR. */
2313 } else if (ret < 0) {
2319 /* We have a kernel that predates the htab reset calls. For PR
2320 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2321 * this era, it has allocated a 16MB fixed size hash table already. */
2322 if (kvmppc_is_pr(kvm_state)) {
2323 /* PR - tell caller to allocate htab */
2326 /* HV - assume 16MB kernel allocated htab */
2331 static inline uint32_t mfpvr(void)
2340 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2349 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2351 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2352 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2353 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2355 /* Now fix up the class with information we can query from the host */
2358 alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2359 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2360 alter_insns(&pcc->insns_flags2, PPC2_VSX,
2361 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2362 alter_insns(&pcc->insns_flags2, PPC2_DFP,
2363 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2365 if (dcache_size != -1) {
2366 pcc->l1_dcache_size = dcache_size;
2369 if (icache_size != -1) {
2370 pcc->l1_icache_size = icache_size;
2373 #if defined(TARGET_PPC64)
2374 pcc->radix_page_info = kvm_get_radix_page_info();
2376 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2378 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2379 * compliant. More importantly, advertising ISA 3.00
2380 * architected mode may prevent guests from activating
2381 * necessary DD1 workarounds.
2383 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2384 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2386 #endif /* defined(TARGET_PPC64) */
2389 bool kvmppc_has_cap_epr(void)
2394 bool kvmppc_has_cap_fixup_hcalls(void)
2396 return cap_fixup_hcalls;
2399 bool kvmppc_has_cap_htm(void)
2404 bool kvmppc_has_cap_mmu_radix(void)
2406 return cap_mmu_radix;
2409 bool kvmppc_has_cap_mmu_hash_v3(void)
2411 return cap_mmu_hash_v3;
2414 static bool kvmppc_power8_host(void)
2419 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2420 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2421 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2422 (base_pvr == CPU_POWERPC_POWER8_BASE);
2424 #endif /* TARGET_PPC64 */
2428 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2430 bool l1d_thread_priv_req = !kvmppc_power8_host();
2432 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2434 } else if ((!l1d_thread_priv_req ||
2435 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2436 (c.character & c.character_mask
2437 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2444 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2446 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2448 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2455 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2457 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2458 return SPAPR_CAP_FIXED_CCD;
2459 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2460 return SPAPR_CAP_FIXED_IBS;
2466 static void kvmppc_get_cpu_characteristics(KVMState *s)
2468 struct kvm_ppc_cpu_char c;
2472 cap_ppc_safe_cache = 0;
2473 cap_ppc_safe_bounds_check = 0;
2474 cap_ppc_safe_indirect_branch = 0;
2476 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2480 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2485 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2486 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2487 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2490 int kvmppc_get_cap_safe_cache(void)
2492 return cap_ppc_safe_cache;
2495 int kvmppc_get_cap_safe_bounds_check(void)
2497 return cap_ppc_safe_bounds_check;
2500 int kvmppc_get_cap_safe_indirect_branch(void)
2502 return cap_ppc_safe_indirect_branch;
2505 bool kvmppc_has_cap_spapr_vfio(void)
2507 return cap_spapr_vfio;
2510 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2512 uint32_t host_pvr = mfpvr();
2513 PowerPCCPUClass *pvr_pcc;
2515 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2516 if (pvr_pcc == NULL) {
2517 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2523 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2525 TypeInfo type_info = {
2526 .name = TYPE_HOST_POWERPC_CPU,
2527 .class_init = kvmppc_host_cpu_class_init,
2529 MachineClass *mc = MACHINE_GET_CLASS(ms);
2530 PowerPCCPUClass *pvr_pcc;
2535 pvr_pcc = kvm_ppc_get_host_cpu_class();
2536 if (pvr_pcc == NULL) {
2539 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2540 type_register(&type_info);
2541 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2542 /* override TCG default cpu type with 'host' cpu model */
2543 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2546 oc = object_class_by_name(type_info.name);
2550 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2551 * we want "POWER8" to be a "family" alias that points to the current
2552 * host CPU type, too)
2554 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2555 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2556 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2559 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2560 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2571 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2573 struct kvm_rtas_token_args args = {
2577 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2581 strncpy(args.name, function, sizeof(args.name));
2583 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2586 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2588 struct kvm_get_htab_fd s = {
2589 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2590 .start_index = index,
2595 error_setg(errp, "KVM version doesn't support %s the HPT",
2596 write ? "writing" : "reading");
2600 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2602 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2603 write ? "writing" : "reading", write ? "to" : "from",
2611 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2613 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2614 uint8_t buf[bufsize];
2618 rc = read(fd, buf, bufsize);
2620 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2624 uint8_t *buffer = buf;
2627 struct kvm_get_htab_header *head =
2628 (struct kvm_get_htab_header *) buffer;
2629 size_t chunksize = sizeof(*head) +
2630 HASH_PTE_SIZE_64 * head->n_valid;
2632 qemu_put_be32(f, head->index);
2633 qemu_put_be16(f, head->n_valid);
2634 qemu_put_be16(f, head->n_invalid);
2635 qemu_put_buffer(f, (void *)(head + 1),
2636 HASH_PTE_SIZE_64 * head->n_valid);
2638 buffer += chunksize;
2644 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2646 return (rc == 0) ? 1 : 0;
2649 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2650 uint16_t n_valid, uint16_t n_invalid)
2652 struct kvm_get_htab_header *buf;
2653 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2656 buf = alloca(chunksize);
2658 buf->n_valid = n_valid;
2659 buf->n_invalid = n_invalid;
2661 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2663 rc = write(fd, buf, chunksize);
2665 fprintf(stderr, "Error writing KVM hash table: %s\n",
2669 if (rc != chunksize) {
2670 /* We should never get a short write on a single chunk */
2671 fprintf(stderr, "Short write, restoring KVM hash table\n");
2677 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2682 void kvm_arch_init_irq_routing(KVMState *s)
2686 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2691 fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2695 struct kvm_get_htab_header *hdr;
2696 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2697 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2699 rc = read(fd, buf, sizeof(buf));
2701 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2704 hdr = (struct kvm_get_htab_header *)buf;
2705 while ((i < n) && ((char *)hdr < (buf + rc))) {
2706 int invalid = hdr->n_invalid, valid = hdr->n_valid;
2708 if (hdr->index != (ptex + i)) {
2709 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2710 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2713 if (n - i < valid) {
2716 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2719 if ((n - i) < invalid) {
2722 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2725 hdr = (struct kvm_get_htab_header *)
2726 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2733 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2737 struct kvm_get_htab_header hdr;
2742 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2744 buf.hdr.n_valid = 1;
2745 buf.hdr.n_invalid = 0;
2746 buf.hdr.index = ptex;
2747 buf.pte0 = cpu_to_be64(pte0);
2748 buf.pte1 = cpu_to_be64(pte1);
2750 rc = write(fd, &buf, sizeof(buf));
2751 if (rc != sizeof(buf)) {
2752 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2757 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2758 uint64_t address, uint32_t data, PCIDevice *dev)
2763 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2764 int vector, PCIDevice *dev)
2769 int kvm_arch_release_virq_post(int virq)
2774 int kvm_arch_msi_data_to_gsi(uint32_t data)
2776 return data & 0xffff;
2779 int kvmppc_enable_hwrng(void)
2781 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2785 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2788 void kvmppc_check_papr_resize_hpt(Error **errp)
2790 if (!kvm_enabled()) {
2791 return; /* No KVM, we're good */
2794 if (cap_resize_hpt) {
2795 return; /* Kernel has explicit support, we're good */
2798 /* Otherwise fallback on looking for PR KVM */
2799 if (kvmppc_is_pr(kvm_state)) {
2804 "Hash page table resizing not available with this KVM version");
2807 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2809 CPUState *cs = CPU(cpu);
2810 struct kvm_ppc_resize_hpt rhpt = {
2815 if (!cap_resize_hpt) {
2819 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2822 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2824 CPUState *cs = CPU(cpu);
2825 struct kvm_ppc_resize_hpt rhpt = {
2830 if (!cap_resize_hpt) {
2834 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2838 * This is a helper function to detect a post migration scenario
2839 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2840 * the guest kernel can't handle a PVR value other than the actual host
2841 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2843 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2844 * (so, we're HV), return true. The workaround itself is done in
2847 * The order here is important: we'll only check for KVM PR as a
2848 * fallback if the guest kernel can't handle the situation itself.
2849 * We need to avoid as much as possible querying the running KVM type
2852 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2854 CPUState *cs = CPU(cpu);
2856 if (!kvm_enabled()) {
2860 if (cap_ppc_pvr_compat) {
2864 return !kvmppc_is_pr(cs->kvm_state);