2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
44 #define DPRINTF(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
47 #define DPRINTF(fmt, ...) \
51 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
57 static int cap_interrupt_unset = false;
58 static int cap_interrupt_level = false;
59 static int cap_segstate;
60 static int cap_booke_sregs;
61 static int cap_ppc_smt;
62 static int cap_ppc_rma;
63 static int cap_spapr_tce;
65 static int cap_one_reg;
67 static int cap_ppc_watchdog;
69 static int cap_htab_fd;
71 /* XXX We have a race condition where we actually have a level triggered
72 * interrupt, but the infrastructure can't expose that yet, so the guest
73 * takes but ignores it, goes to sleep and never gets notified that there's
74 * still an interrupt pending.
76 * As a quick workaround, let's just wake up again 20 ms after we injected
77 * an interrupt. That way we can assure that we're always reinjecting
78 * interrupts in case the guest swallowed them.
80 static QEMUTimer *idle_timer;
82 static void kvm_kick_cpu(void *opaque)
84 PowerPCCPU *cpu = opaque;
86 qemu_cpu_kick(CPU(cpu));
89 static int kvm_ppc_register_host_cpu_type(void);
91 int kvm_arch_init(KVMState *s)
93 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
94 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
95 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
96 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
97 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
98 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
99 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
100 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
101 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
102 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
103 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
104 /* Note: we don't set cap_papr here, because this capability is
105 * only activated after this by kvmppc_set_papr() */
106 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
108 if (!cap_interrupt_level) {
109 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
110 "VM to stall at times!\n");
113 kvm_ppc_register_host_cpu_type();
118 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
120 CPUPPCState *cenv = &cpu->env;
121 CPUState *cs = CPU(cpu);
122 struct kvm_sregs sregs;
125 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
126 /* What we're really trying to say is "if we're on BookE, we use
127 the native PVR for now". This is the only sane way to check
128 it though, so we potentially confuse users that they can run
129 BookE guests on BookS. Let's hope nobody dares enough :) */
133 fprintf(stderr, "kvm error: missing PVR setting capability\n");
138 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
143 sregs.pvr = cenv->spr[SPR_PVR];
144 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
147 /* Set up a shared TLB array with KVM */
148 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
150 CPUPPCState *env = &cpu->env;
151 CPUState *cs = CPU(cpu);
152 struct kvm_book3e_206_tlb_params params = {};
153 struct kvm_config_tlb cfg = {};
154 struct kvm_enable_cap encap = {};
155 unsigned int entries = 0;
158 if (!kvm_enabled() ||
159 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
163 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
165 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
166 params.tlb_sizes[i] = booke206_tlb_size(env, i);
167 params.tlb_ways[i] = booke206_tlb_ways(env, i);
168 entries += params.tlb_sizes[i];
171 assert(entries == env->nb_tlb);
172 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
174 env->tlb_dirty = true;
176 cfg.array = (uintptr_t)env->tlb.tlbm;
177 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
178 cfg.params = (uintptr_t)¶ms;
179 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
181 encap.cap = KVM_CAP_SW_TLB;
182 encap.args[0] = (uintptr_t)&cfg;
184 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
186 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
187 __func__, strerror(-ret));
191 env->kvm_sw_tlb = true;
196 #if defined(TARGET_PPC64)
197 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
198 struct kvm_ppc_smmu_info *info)
200 CPUPPCState *env = &cpu->env;
201 CPUState *cs = CPU(cpu);
203 memset(info, 0, sizeof(*info));
205 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
206 * need to "guess" what the supported page sizes are.
208 * For that to work we make a few assumptions:
210 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
211 * KVM which only supports 4K and 16M pages, but supports them
212 * regardless of the backing store characteritics. We also don't
213 * support 1T segments.
215 * This is safe as if HV KVM ever supports that capability or PR
216 * KVM grows supports for more page/segment sizes, those versions
217 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
218 * will not hit this fallback
220 * - Else we are running HV KVM. This means we only support page
221 * sizes that fit in the backing store. Additionally we only
222 * advertize 64K pages if the processor is ARCH 2.06 and we assume
223 * P7 encodings for the SLB and hash table. Here too, we assume
224 * support for any newer processor will mean a kernel that
225 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
228 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
233 /* Standard 4k base page size segment */
234 info->sps[0].page_shift = 12;
235 info->sps[0].slb_enc = 0;
236 info->sps[0].enc[0].page_shift = 12;
237 info->sps[0].enc[0].pte_enc = 0;
239 /* Standard 16M large page size segment */
240 info->sps[1].page_shift = 24;
241 info->sps[1].slb_enc = SLB_VSID_L;
242 info->sps[1].enc[0].page_shift = 24;
243 info->sps[1].enc[0].pte_enc = 0;
247 /* HV KVM has backing store size restrictions */
248 info->flags = KVM_PPC_PAGE_SIZES_REAL;
250 if (env->mmu_model & POWERPC_MMU_1TSEG) {
251 info->flags |= KVM_PPC_1T_SEGMENTS;
254 if (env->mmu_model == POWERPC_MMU_2_06) {
260 /* Standard 4k base page size segment */
261 info->sps[i].page_shift = 12;
262 info->sps[i].slb_enc = 0;
263 info->sps[i].enc[0].page_shift = 12;
264 info->sps[i].enc[0].pte_enc = 0;
267 /* 64K on MMU 2.06 */
268 if (env->mmu_model == POWERPC_MMU_2_06) {
269 info->sps[i].page_shift = 16;
270 info->sps[i].slb_enc = 0x110;
271 info->sps[i].enc[0].page_shift = 16;
272 info->sps[i].enc[0].pte_enc = 1;
276 /* Standard 16M large page size segment */
277 info->sps[i].page_shift = 24;
278 info->sps[i].slb_enc = SLB_VSID_L;
279 info->sps[i].enc[0].page_shift = 24;
280 info->sps[i].enc[0].pte_enc = 0;
284 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
286 CPUState *cs = CPU(cpu);
289 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
290 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
296 kvm_get_fallback_smmu_info(cpu, info);
299 static long getrampagesize(void)
305 /* guest RAM is backed by normal anonymous pages */
306 return getpagesize();
310 ret = statfs(mem_path, &fs);
311 } while (ret != 0 && errno == EINTR);
314 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
319 #define HUGETLBFS_MAGIC 0x958458f6
321 if (fs.f_type != HUGETLBFS_MAGIC) {
322 /* Explicit mempath, but it's ordinary pages */
323 return getpagesize();
326 /* It's hugepage, return the huge page size */
330 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
332 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
336 return (1ul << shift) <= rampgsize;
339 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
341 static struct kvm_ppc_smmu_info smmu_info;
342 static bool has_smmu_info;
343 CPUPPCState *env = &cpu->env;
347 /* We only handle page sizes for 64-bit server guests for now */
348 if (!(env->mmu_model & POWERPC_MMU_64)) {
352 /* Collect MMU info from kernel if not already */
353 if (!has_smmu_info) {
354 kvm_get_smmu_info(cpu, &smmu_info);
355 has_smmu_info = true;
358 rampagesize = getrampagesize();
360 /* Convert to QEMU form */
361 memset(&env->sps, 0, sizeof(env->sps));
363 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
364 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
365 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
367 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
371 qsps->page_shift = ksps->page_shift;
372 qsps->slb_enc = ksps->slb_enc;
373 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
374 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
375 ksps->enc[jk].page_shift)) {
378 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
379 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
380 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
384 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
388 env->slb_nr = smmu_info.slb_size;
389 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
390 env->mmu_model |= POWERPC_MMU_1TSEG;
392 env->mmu_model &= ~POWERPC_MMU_1TSEG;
395 #else /* defined (TARGET_PPC64) */
397 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
401 #endif /* !defined (TARGET_PPC64) */
403 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
405 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
408 int kvm_arch_init_vcpu(CPUState *cs)
410 PowerPCCPU *cpu = POWERPC_CPU(cs);
411 CPUPPCState *cenv = &cpu->env;
414 /* Gather server mmu info from KVM and update the CPU state */
415 kvm_fixup_page_sizes(cpu);
417 /* Synchronize sregs with kvm */
418 ret = kvm_arch_sync_sregs(cpu);
423 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
425 /* Some targets support access to KVM's guest TLB. */
426 switch (cenv->mmu_model) {
427 case POWERPC_MMU_BOOKE206:
428 ret = kvm_booke206_tlb_init(cpu);
437 void kvm_arch_reset_vcpu(CPUState *cpu)
441 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
443 CPUPPCState *env = &cpu->env;
444 CPUState *cs = CPU(cpu);
445 struct kvm_dirty_tlb dirty_tlb;
446 unsigned char *bitmap;
449 if (!env->kvm_sw_tlb) {
453 bitmap = g_malloc((env->nb_tlb + 7) / 8);
454 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
456 dirty_tlb.bitmap = (uintptr_t)bitmap;
457 dirty_tlb.num_dirty = env->nb_tlb;
459 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
461 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
462 __func__, strerror(-ret));
468 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
470 PowerPCCPU *cpu = POWERPC_CPU(cs);
471 CPUPPCState *env = &cpu->env;
476 struct kvm_one_reg reg = {
478 .addr = (uintptr_t) &val,
482 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
484 trace_kvm_failed_spr_get(spr, strerror(errno));
486 switch (id & KVM_REG_SIZE_MASK) {
487 case KVM_REG_SIZE_U32:
488 env->spr[spr] = val.u32;
491 case KVM_REG_SIZE_U64:
492 env->spr[spr] = val.u64;
496 /* Don't handle this size yet */
502 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
504 PowerPCCPU *cpu = POWERPC_CPU(cs);
505 CPUPPCState *env = &cpu->env;
510 struct kvm_one_reg reg = {
512 .addr = (uintptr_t) &val,
516 switch (id & KVM_REG_SIZE_MASK) {
517 case KVM_REG_SIZE_U32:
518 val.u32 = env->spr[spr];
521 case KVM_REG_SIZE_U64:
522 val.u64 = env->spr[spr];
526 /* Don't handle this size yet */
530 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
532 trace_kvm_failed_spr_set(spr, strerror(errno));
536 static int kvm_put_fp(CPUState *cs)
538 PowerPCCPU *cpu = POWERPC_CPU(cs);
539 CPUPPCState *env = &cpu->env;
540 struct kvm_one_reg reg;
544 if (env->insns_flags & PPC_FLOAT) {
545 uint64_t fpscr = env->fpscr;
546 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
548 reg.id = KVM_REG_PPC_FPSCR;
549 reg.addr = (uintptr_t)&fpscr;
550 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
552 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
556 for (i = 0; i < 32; i++) {
559 vsr[0] = float64_val(env->fpr[i]);
560 vsr[1] = env->vsr[i];
561 reg.addr = (uintptr_t) &vsr;
562 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
564 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
566 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
573 if (env->insns_flags & PPC_ALTIVEC) {
574 reg.id = KVM_REG_PPC_VSCR;
575 reg.addr = (uintptr_t)&env->vscr;
576 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
578 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
582 for (i = 0; i < 32; i++) {
583 reg.id = KVM_REG_PPC_VR(i);
584 reg.addr = (uintptr_t)&env->avr[i];
585 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
587 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
596 static int kvm_get_fp(CPUState *cs)
598 PowerPCCPU *cpu = POWERPC_CPU(cs);
599 CPUPPCState *env = &cpu->env;
600 struct kvm_one_reg reg;
604 if (env->insns_flags & PPC_FLOAT) {
606 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
608 reg.id = KVM_REG_PPC_FPSCR;
609 reg.addr = (uintptr_t)&fpscr;
610 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
612 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
618 for (i = 0; i < 32; i++) {
621 reg.addr = (uintptr_t) &vsr;
622 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
624 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
626 DPRINTF("Unable to get %s%d from KVM: %s\n",
627 vsx ? "VSR" : "FPR", i, strerror(errno));
630 env->fpr[i] = vsr[0];
632 env->vsr[i] = vsr[1];
638 if (env->insns_flags & PPC_ALTIVEC) {
639 reg.id = KVM_REG_PPC_VSCR;
640 reg.addr = (uintptr_t)&env->vscr;
641 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
643 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
647 for (i = 0; i < 32; i++) {
648 reg.id = KVM_REG_PPC_VR(i);
649 reg.addr = (uintptr_t)&env->avr[i];
650 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
652 DPRINTF("Unable to get VR%d from KVM: %s\n",
662 #if defined(TARGET_PPC64)
663 static int kvm_get_vpa(CPUState *cs)
665 PowerPCCPU *cpu = POWERPC_CPU(cs);
666 CPUPPCState *env = &cpu->env;
667 struct kvm_one_reg reg;
670 reg.id = KVM_REG_PPC_VPA_ADDR;
671 reg.addr = (uintptr_t)&env->vpa_addr;
672 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
674 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
678 assert((uintptr_t)&env->slb_shadow_size
679 == ((uintptr_t)&env->slb_shadow_addr + 8));
680 reg.id = KVM_REG_PPC_VPA_SLB;
681 reg.addr = (uintptr_t)&env->slb_shadow_addr;
682 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
684 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
689 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
690 reg.id = KVM_REG_PPC_VPA_DTL;
691 reg.addr = (uintptr_t)&env->dtl_addr;
692 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
694 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
702 static int kvm_put_vpa(CPUState *cs)
704 PowerPCCPU *cpu = POWERPC_CPU(cs);
705 CPUPPCState *env = &cpu->env;
706 struct kvm_one_reg reg;
709 /* SLB shadow or DTL can't be registered unless a master VPA is
710 * registered. That means when restoring state, if a VPA *is*
711 * registered, we need to set that up first. If not, we need to
712 * deregister the others before deregistering the master VPA */
713 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
716 reg.id = KVM_REG_PPC_VPA_ADDR;
717 reg.addr = (uintptr_t)&env->vpa_addr;
718 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
720 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
725 assert((uintptr_t)&env->slb_shadow_size
726 == ((uintptr_t)&env->slb_shadow_addr + 8));
727 reg.id = KVM_REG_PPC_VPA_SLB;
728 reg.addr = (uintptr_t)&env->slb_shadow_addr;
729 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
731 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
735 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
736 reg.id = KVM_REG_PPC_VPA_DTL;
737 reg.addr = (uintptr_t)&env->dtl_addr;
738 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
740 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
745 if (!env->vpa_addr) {
746 reg.id = KVM_REG_PPC_VPA_ADDR;
747 reg.addr = (uintptr_t)&env->vpa_addr;
748 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
750 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
757 #endif /* TARGET_PPC64 */
759 int kvm_arch_put_registers(CPUState *cs, int level)
761 PowerPCCPU *cpu = POWERPC_CPU(cs);
762 CPUPPCState *env = &cpu->env;
763 struct kvm_regs regs;
767 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
774 regs.xer = cpu_read_xer(env);
778 regs.srr0 = env->spr[SPR_SRR0];
779 regs.srr1 = env->spr[SPR_SRR1];
781 regs.sprg0 = env->spr[SPR_SPRG0];
782 regs.sprg1 = env->spr[SPR_SPRG1];
783 regs.sprg2 = env->spr[SPR_SPRG2];
784 regs.sprg3 = env->spr[SPR_SPRG3];
785 regs.sprg4 = env->spr[SPR_SPRG4];
786 regs.sprg5 = env->spr[SPR_SPRG5];
787 regs.sprg6 = env->spr[SPR_SPRG6];
788 regs.sprg7 = env->spr[SPR_SPRG7];
790 regs.pid = env->spr[SPR_BOOKE_PID];
792 for (i = 0;i < 32; i++)
793 regs.gpr[i] = env->gpr[i];
796 for (i = 0; i < 8; i++) {
797 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
800 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s);
806 if (env->tlb_dirty) {
808 env->tlb_dirty = false;
811 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
812 struct kvm_sregs sregs;
814 sregs.pvr = env->spr[SPR_PVR];
816 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
820 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
821 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
822 if (env->slb[i].esid & SLB_ESID_V) {
823 sregs.u.s.ppc64.slb[i].slbe |= i;
825 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
830 for (i = 0; i < 16; i++) {
831 sregs.u.s.ppc32.sr[i] = env->sr[i];
835 for (i = 0; i < 8; i++) {
836 /* Beware. We have to swap upper and lower bits here */
837 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
839 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
843 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
849 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
850 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
856 /* We deliberately ignore errors here, for kernels which have
857 * the ONE_REG calls, but don't support the specific
858 * registers, there's a reasonable chance things will still
859 * work, at least until we try to migrate. */
860 for (i = 0; i < 1024; i++) {
861 uint64_t id = env->spr_cb[i].one_reg_id;
864 kvm_put_one_spr(cs, id, i);
870 if (kvm_put_vpa(cs) < 0) {
871 DPRINTF("Warning: Unable to set VPA information to KVM\n");
874 #endif /* TARGET_PPC64 */
880 int kvm_arch_get_registers(CPUState *cs)
882 PowerPCCPU *cpu = POWERPC_CPU(cs);
883 CPUPPCState *env = &cpu->env;
884 struct kvm_regs regs;
885 struct kvm_sregs sregs;
889 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
894 for (i = 7; i >= 0; i--) {
895 env->crf[i] = cr & 15;
901 cpu_write_xer(env, regs.xer);
905 env->spr[SPR_SRR0] = regs.srr0;
906 env->spr[SPR_SRR1] = regs.srr1;
908 env->spr[SPR_SPRG0] = regs.sprg0;
909 env->spr[SPR_SPRG1] = regs.sprg1;
910 env->spr[SPR_SPRG2] = regs.sprg2;
911 env->spr[SPR_SPRG3] = regs.sprg3;
912 env->spr[SPR_SPRG4] = regs.sprg4;
913 env->spr[SPR_SPRG5] = regs.sprg5;
914 env->spr[SPR_SPRG6] = regs.sprg6;
915 env->spr[SPR_SPRG7] = regs.sprg7;
917 env->spr[SPR_BOOKE_PID] = regs.pid;
919 for (i = 0;i < 32; i++)
920 env->gpr[i] = regs.gpr[i];
924 if (cap_booke_sregs) {
925 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
930 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
931 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
932 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
933 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
934 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
935 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
936 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
937 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
938 env->spr[SPR_DECR] = sregs.u.e.dec;
939 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
940 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
941 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
944 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
945 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
946 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
947 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
948 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
949 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
952 if (sregs.u.e.features & KVM_SREGS_E_64) {
953 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
956 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
957 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
960 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
961 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
962 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
963 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
964 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
965 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
966 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
967 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
968 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
969 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
970 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
971 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
972 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
973 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
974 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
975 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
976 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
978 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
979 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
980 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
981 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
984 if (sregs.u.e.features & KVM_SREGS_E_PM) {
985 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
988 if (sregs.u.e.features & KVM_SREGS_E_PC) {
989 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
990 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
994 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
995 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
996 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
997 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
998 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
999 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1000 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1001 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1002 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1003 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1004 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1007 if (sregs.u.e.features & KVM_SREGS_EXP) {
1008 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1011 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1012 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1013 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1016 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1017 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1018 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1019 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1021 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1022 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1023 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1029 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1034 if (!env->external_htab) {
1035 ppc_store_sdr1(env, sregs.u.s.sdr1);
1041 * The packed SLB array we get from KVM_GET_SREGS only contains
1042 * information about valid entries. So we flush our internal
1043 * copy to get rid of stale ones, then put all valid SLB entries
1046 memset(env->slb, 0, sizeof(env->slb));
1047 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1048 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1049 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1051 * Only restore valid entries
1053 if (rb & SLB_ESID_V) {
1054 ppc_store_slb(env, rb, rs);
1060 for (i = 0; i < 16; i++) {
1061 env->sr[i] = sregs.u.s.ppc32.sr[i];
1065 for (i = 0; i < 8; i++) {
1066 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1067 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1068 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1069 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1074 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1080 /* We deliberately ignore errors here, for kernels which have
1081 * the ONE_REG calls, but don't support the specific
1082 * registers, there's a reasonable chance things will still
1083 * work, at least until we try to migrate. */
1084 for (i = 0; i < 1024; i++) {
1085 uint64_t id = env->spr_cb[i].one_reg_id;
1088 kvm_get_one_spr(cs, id, i);
1094 if (kvm_get_vpa(cs) < 0) {
1095 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1104 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1106 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1108 if (irq != PPC_INTERRUPT_EXT) {
1112 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1116 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1121 #if defined(TARGET_PPCEMB)
1122 #define PPC_INPUT_INT PPC40x_INPUT_INT
1123 #elif defined(TARGET_PPC64)
1124 #define PPC_INPUT_INT PPC970_INPUT_INT
1126 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1129 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1131 PowerPCCPU *cpu = POWERPC_CPU(cs);
1132 CPUPPCState *env = &cpu->env;
1136 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1137 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1138 if (!cap_interrupt_level &&
1139 run->ready_for_interrupt_injection &&
1140 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1141 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1143 /* For now KVM disregards the 'irq' argument. However, in the
1144 * future KVM could cache it in-kernel to avoid a heavyweight exit
1145 * when reading the UIC.
1147 irq = KVM_INTERRUPT_SET;
1149 DPRINTF("injected interrupt %d\n", irq);
1150 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1152 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1155 /* Always wake up soon in case the interrupt was level based */
1156 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1157 (get_ticks_per_sec() / 50));
1160 /* We don't know if there are more interrupts pending after this. However,
1161 * the guest will return to userspace in the course of handling this one
1162 * anyways, so we will get a chance to deliver the rest. */
1165 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1169 int kvm_arch_process_async_events(CPUState *cs)
1174 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1176 CPUState *cs = CPU(cpu);
1177 CPUPPCState *env = &cpu->env;
1179 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1181 cs->exception_index = EXCP_HLT;
1187 /* map dcr access to existing qemu dcr emulation */
1188 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1190 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1191 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1196 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1198 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1199 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1204 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1206 PowerPCCPU *cpu = POWERPC_CPU(cs);
1207 CPUPPCState *env = &cpu->env;
1210 switch (run->exit_reason) {
1212 if (run->dcr.is_write) {
1213 DPRINTF("handle dcr write\n");
1214 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1216 DPRINTF("handle dcr read\n");
1217 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1221 DPRINTF("handle halt\n");
1222 ret = kvmppc_handle_halt(cpu);
1224 #if defined(TARGET_PPC64)
1225 case KVM_EXIT_PAPR_HCALL:
1226 DPRINTF("handle PAPR hypercall\n");
1227 run->papr_hcall.ret = spapr_hypercall(cpu,
1229 run->papr_hcall.args);
1234 DPRINTF("handle epr\n");
1235 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1238 case KVM_EXIT_WATCHDOG:
1239 DPRINTF("handle watchdog expiry\n");
1240 watchdog_perform_action();
1245 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1253 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1255 CPUState *cs = CPU(cpu);
1256 uint32_t bits = tsr_bits;
1257 struct kvm_one_reg reg = {
1258 .id = KVM_REG_PPC_OR_TSR,
1259 .addr = (uintptr_t) &bits,
1262 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1265 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1268 CPUState *cs = CPU(cpu);
1269 uint32_t bits = tsr_bits;
1270 struct kvm_one_reg reg = {
1271 .id = KVM_REG_PPC_CLEAR_TSR,
1272 .addr = (uintptr_t) &bits,
1275 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1278 int kvmppc_set_tcr(PowerPCCPU *cpu)
1280 CPUState *cs = CPU(cpu);
1281 CPUPPCState *env = &cpu->env;
1282 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1284 struct kvm_one_reg reg = {
1285 .id = KVM_REG_PPC_TCR,
1286 .addr = (uintptr_t) &tcr,
1289 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1292 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1294 CPUState *cs = CPU(cpu);
1295 struct kvm_enable_cap encap = {};
1298 if (!kvm_enabled()) {
1302 if (!cap_ppc_watchdog) {
1303 printf("warning: KVM does not support watchdog");
1307 encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1308 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1310 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1311 __func__, strerror(-ret));
1318 static int read_cpuinfo(const char *field, char *value, int len)
1322 int field_len = strlen(field);
1325 f = fopen("/proc/cpuinfo", "r");
1331 if(!fgets(line, sizeof(line), f)) {
1334 if (!strncmp(line, field, field_len)) {
1335 pstrcpy(value, len, line);
1346 uint32_t kvmppc_get_tbfreq(void)
1350 uint32_t retval = get_ticks_per_sec();
1352 if (read_cpuinfo("timebase", line, sizeof(line))) {
1356 if (!(ns = strchr(line, ':'))) {
1366 /* Try to find a device tree node for a CPU with clock-frequency property */
1367 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1369 struct dirent *dirp;
1372 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1373 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1378 while ((dirp = readdir(dp)) != NULL) {
1380 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1382 f = fopen(buf, "r");
1384 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1391 if (buf[0] == '\0') {
1392 printf("Unknown host!\n");
1399 /* Read a CPU node property from the host device tree that's a single
1400 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1401 * (can't find or open the property, or doesn't understand the
1403 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1413 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1417 strncat(buf, "/", sizeof(buf) - strlen(buf));
1418 strncat(buf, propname, sizeof(buf) - strlen(buf));
1420 f = fopen(buf, "rb");
1425 len = fread(&u, 1, sizeof(u), f);
1429 /* property is a 32-bit quantity */
1430 return be32_to_cpu(u.v32);
1432 return be64_to_cpu(u.v64);
1438 uint64_t kvmppc_get_clockfreq(void)
1440 return kvmppc_read_int_cpu_dt("clock-frequency");
1443 uint32_t kvmppc_get_vmx(void)
1445 return kvmppc_read_int_cpu_dt("ibm,vmx");
1448 uint32_t kvmppc_get_dfp(void)
1450 return kvmppc_read_int_cpu_dt("ibm,dfp");
1453 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1455 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1456 CPUState *cs = CPU(cpu);
1458 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1459 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1466 int kvmppc_get_hasidle(CPUPPCState *env)
1468 struct kvm_ppc_pvinfo pvinfo;
1470 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1471 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1478 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1480 uint32_t *hc = (uint32_t*)buf;
1481 struct kvm_ppc_pvinfo pvinfo;
1483 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1484 memcpy(buf, pvinfo.hcall, buf_len);
1489 * Fallback to always fail hypercalls:
1505 void kvmppc_set_papr(PowerPCCPU *cpu)
1507 CPUState *cs = CPU(cpu);
1508 struct kvm_enable_cap cap = {};
1511 cap.cap = KVM_CAP_PPC_PAPR;
1512 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1515 cpu_abort(cs, "This KVM version does not support PAPR\n");
1518 /* Update the capability flag so we sync the right information
1523 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1525 CPUState *cs = CPU(cpu);
1526 struct kvm_enable_cap cap = {};
1529 cap.cap = KVM_CAP_PPC_EPR;
1530 cap.args[0] = mpic_proxy;
1531 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1533 if (ret && mpic_proxy) {
1534 cpu_abort(cs, "This KVM version does not support EPR\n");
1538 int kvmppc_smt_threads(void)
1540 return cap_ppc_smt ? cap_ppc_smt : 1;
1544 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1549 struct kvm_allocate_rma ret;
1550 MemoryRegion *rma_region;
1552 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1553 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1554 * not necessary on this hardware
1555 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1557 * FIXME: We should allow the user to force contiguous RMA
1558 * allocation in the cap_ppc_rma==1 case.
1560 if (cap_ppc_rma < 2) {
1564 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1566 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1571 size = MIN(ret.rma_size, 256ul << 20);
1573 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1574 if (rma == MAP_FAILED) {
1575 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1579 rma_region = g_new(MemoryRegion, 1);
1580 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1581 vmstate_register_ram_global(rma_region);
1582 memory_region_add_subregion(sysmem, 0, rma_region);
1587 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1589 struct kvm_ppc_smmu_info info;
1590 long rampagesize, best_page_shift;
1593 if (cap_ppc_rma >= 2) {
1594 return current_size;
1597 /* Find the largest hardware supported page size that's less than
1598 * or equal to the (logical) backing page size of guest RAM */
1599 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1600 rampagesize = getrampagesize();
1601 best_page_shift = 0;
1603 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1604 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1606 if (!sps->page_shift) {
1610 if ((sps->page_shift > best_page_shift)
1611 && ((1UL << sps->page_shift) <= rampagesize)) {
1612 best_page_shift = sps->page_shift;
1616 return MIN(current_size,
1617 1ULL << (best_page_shift + hash_shift - 7));
1621 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1623 struct kvm_create_spapr_tce args = {
1625 .window_size = window_size,
1631 /* Must set fd to -1 so we don't try to munmap when called for
1632 * destroying the table, which the upper layers -will- do
1635 if (!cap_spapr_tce) {
1639 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1641 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1646 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1647 /* FIXME: round this up to page size */
1649 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1650 if (table == MAP_FAILED) {
1651 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1661 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1669 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1670 if ((munmap(table, len) < 0) ||
1672 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1674 /* Leak the table */
1680 int kvmppc_reset_htab(int shift_hint)
1682 uint32_t shift = shift_hint;
1684 if (!kvm_enabled()) {
1685 /* Full emulation, tell caller to allocate htab itself */
1688 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1690 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1691 if (ret == -ENOTTY) {
1692 /* At least some versions of PR KVM advertise the
1693 * capability, but don't implement the ioctl(). Oops.
1694 * Return 0 so that we allocate the htab in qemu, as is
1695 * correct for PR. */
1697 } else if (ret < 0) {
1703 /* We have a kernel that predates the htab reset calls. For PR
1704 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1705 * this era, it has allocated a 16MB fixed size hash table
1706 * already. Kernels of this era have the GET_PVINFO capability
1707 * only on PR, so we use this hack to determine the right
1709 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1710 /* PR - tell caller to allocate htab */
1713 /* HV - assume 16MB kernel allocated htab */
1718 static inline uint32_t mfpvr(void)
1727 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1736 static void kvmppc_host_cpu_initfn(Object *obj)
1738 assert(kvm_enabled());
1741 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1743 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1744 uint32_t vmx = kvmppc_get_vmx();
1745 uint32_t dfp = kvmppc_get_dfp();
1746 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1747 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1749 /* Now fix up the class with information we can query from the host */
1753 /* Only override when we know what the host supports */
1754 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1755 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1758 /* Only override when we know what the host supports */
1759 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1762 if (dcache_size != -1) {
1763 pcc->l1_dcache_size = dcache_size;
1766 if (icache_size != -1) {
1767 pcc->l1_icache_size = icache_size;
1771 bool kvmppc_has_cap_epr(void)
1776 bool kvmppc_has_cap_htab_fd(void)
1781 static int kvm_ppc_register_host_cpu_type(void)
1783 TypeInfo type_info = {
1784 .name = TYPE_HOST_POWERPC_CPU,
1785 .instance_init = kvmppc_host_cpu_initfn,
1786 .class_init = kvmppc_host_cpu_class_init,
1788 uint32_t host_pvr = mfpvr();
1789 PowerPCCPUClass *pvr_pcc;
1791 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1792 if (pvr_pcc == NULL) {
1793 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1795 if (pvr_pcc == NULL) {
1798 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1799 type_register(&type_info);
1803 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1805 struct kvm_rtas_token_args args = {
1809 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1813 strncpy(args.name, function, sizeof(args.name));
1815 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1818 int kvmppc_get_htab_fd(bool write)
1820 struct kvm_get_htab_fd s = {
1821 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1826 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1830 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1833 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1835 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1836 uint8_t buf[bufsize];
1840 rc = read(fd, buf, bufsize);
1842 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1846 /* Kernel already retuns data in BE format for the file */
1847 qemu_put_buffer(f, buf, rc);
1851 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1853 return (rc == 0) ? 1 : 0;
1856 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1857 uint16_t n_valid, uint16_t n_invalid)
1859 struct kvm_get_htab_header *buf;
1860 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1863 buf = alloca(chunksize);
1864 /* This is KVM on ppc, so this is all big-endian */
1866 buf->n_valid = n_valid;
1867 buf->n_invalid = n_invalid;
1869 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1871 rc = write(fd, buf, chunksize);
1873 fprintf(stderr, "Error writing KVM hash table: %s\n",
1877 if (rc != chunksize) {
1878 /* We should never get a short write on a single chunk */
1879 fprintf(stderr, "Short write, restoring KVM hash table\n");
1885 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1890 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1895 int kvm_arch_on_sigbus(int code, void *addr)
1900 void kvm_arch_init_irq_routing(KVMState *s)
1904 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1909 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1914 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1919 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1924 void kvm_arch_remove_all_hw_breakpoints(void)
1928 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1932 struct kvm_get_htab_buf {
1933 struct kvm_get_htab_header header;
1935 * We require one extra byte for read
1937 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1940 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1943 struct kvm_get_htab_fd ghf;
1944 struct kvm_get_htab_buf *hpte_buf;
1947 ghf.start_index = pte_index;
1948 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1953 hpte_buf = g_malloc0(sizeof(*hpte_buf));
1955 * Read the hpte group
1957 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1962 return (uint64_t)(uintptr_t) hpte_buf->hpte;
1971 void kvmppc_hash64_free_pteg(uint64_t token)
1973 struct kvm_get_htab_buf *htab_buf;
1975 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1981 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1982 target_ulong pte0, target_ulong pte1)
1985 struct kvm_get_htab_fd ghf;
1986 struct kvm_get_htab_buf hpte_buf;
1989 ghf.start_index = 0; /* Ignored */
1990 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1995 hpte_buf.header.n_valid = 1;
1996 hpte_buf.header.n_invalid = 0;
1997 hpte_buf.header.index = pte_index;
1998 hpte_buf.hpte[0] = pte0;
1999 hpte_buf.hpte[1] = pte1;
2001 * Write the hpte entry.
2002 * CAUTION: write() has the warn_unused_result attribute. Hence we
2003 * need to check the return value, even though we do nothing.
2005 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {