2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/error-report.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/ppc.h"
40 #include "sysemu/watchdog.h"
42 #include "exec/gdbstub.h"
43 #include "exec/memattrs.h"
44 #include "sysemu/hostmem.h"
45 #include "qemu/cutils.h"
50 #define DPRINTF(fmt, ...) \
51 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
53 #define DPRINTF(fmt, ...) \
57 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
59 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
63 static int cap_interrupt_unset = false;
64 static int cap_interrupt_level = false;
65 static int cap_segstate;
66 static int cap_booke_sregs;
67 static int cap_ppc_smt;
68 static int cap_ppc_rma;
69 static int cap_spapr_tce;
70 static int cap_spapr_multitce;
71 static int cap_spapr_vfio;
73 static int cap_one_reg;
75 static int cap_ppc_watchdog;
77 static int cap_htab_fd;
78 static int cap_fixup_hcalls;
80 static uint32_t debug_inst_opcode;
82 /* XXX We have a race condition where we actually have a level triggered
83 * interrupt, but the infrastructure can't expose that yet, so the guest
84 * takes but ignores it, goes to sleep and never gets notified that there's
85 * still an interrupt pending.
87 * As a quick workaround, let's just wake up again 20 ms after we injected
88 * an interrupt. That way we can assure that we're always reinjecting
89 * interrupts in case the guest swallowed them.
91 static QEMUTimer *idle_timer;
93 static void kvm_kick_cpu(void *opaque)
95 PowerPCCPU *cpu = opaque;
97 qemu_cpu_kick(CPU(cpu));
100 static int kvm_ppc_register_host_cpu_type(void);
102 int kvm_arch_init(MachineState *ms, KVMState *s)
104 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
105 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
106 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
107 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
108 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
109 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
110 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
111 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
112 cap_spapr_vfio = false;
113 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
114 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
115 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
116 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
117 /* Note: we don't set cap_papr here, because this capability is
118 * only activated after this by kvmppc_set_papr() */
119 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
120 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
122 if (!cap_interrupt_level) {
123 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
124 "VM to stall at times!\n");
127 kvm_ppc_register_host_cpu_type();
132 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
134 CPUPPCState *cenv = &cpu->env;
135 CPUState *cs = CPU(cpu);
136 struct kvm_sregs sregs;
139 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
140 /* What we're really trying to say is "if we're on BookE, we use
141 the native PVR for now". This is the only sane way to check
142 it though, so we potentially confuse users that they can run
143 BookE guests on BookS. Let's hope nobody dares enough :) */
147 fprintf(stderr, "kvm error: missing PVR setting capability\n");
152 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
157 sregs.pvr = cenv->spr[SPR_PVR];
158 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
161 /* Set up a shared TLB array with KVM */
162 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
164 CPUPPCState *env = &cpu->env;
165 CPUState *cs = CPU(cpu);
166 struct kvm_book3e_206_tlb_params params = {};
167 struct kvm_config_tlb cfg = {};
168 unsigned int entries = 0;
171 if (!kvm_enabled() ||
172 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
176 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
178 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
179 params.tlb_sizes[i] = booke206_tlb_size(env, i);
180 params.tlb_ways[i] = booke206_tlb_ways(env, i);
181 entries += params.tlb_sizes[i];
184 assert(entries == env->nb_tlb);
185 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
187 env->tlb_dirty = true;
189 cfg.array = (uintptr_t)env->tlb.tlbm;
190 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
191 cfg.params = (uintptr_t)¶ms;
192 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
194 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
196 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
197 __func__, strerror(-ret));
201 env->kvm_sw_tlb = true;
206 #if defined(TARGET_PPC64)
207 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
208 struct kvm_ppc_smmu_info *info)
210 CPUPPCState *env = &cpu->env;
211 CPUState *cs = CPU(cpu);
213 memset(info, 0, sizeof(*info));
215 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
216 * need to "guess" what the supported page sizes are.
218 * For that to work we make a few assumptions:
220 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
221 * KVM which only supports 4K and 16M pages, but supports them
222 * regardless of the backing store characteritics. We also don't
223 * support 1T segments.
225 * This is safe as if HV KVM ever supports that capability or PR
226 * KVM grows supports for more page/segment sizes, those versions
227 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
228 * will not hit this fallback
230 * - Else we are running HV KVM. This means we only support page
231 * sizes that fit in the backing store. Additionally we only
232 * advertize 64K pages if the processor is ARCH 2.06 and we assume
233 * P7 encodings for the SLB and hash table. Here too, we assume
234 * support for any newer processor will mean a kernel that
235 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
238 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
243 /* Standard 4k base page size segment */
244 info->sps[0].page_shift = 12;
245 info->sps[0].slb_enc = 0;
246 info->sps[0].enc[0].page_shift = 12;
247 info->sps[0].enc[0].pte_enc = 0;
249 /* Standard 16M large page size segment */
250 info->sps[1].page_shift = 24;
251 info->sps[1].slb_enc = SLB_VSID_L;
252 info->sps[1].enc[0].page_shift = 24;
253 info->sps[1].enc[0].pte_enc = 0;
257 /* HV KVM has backing store size restrictions */
258 info->flags = KVM_PPC_PAGE_SIZES_REAL;
260 if (env->mmu_model & POWERPC_MMU_1TSEG) {
261 info->flags |= KVM_PPC_1T_SEGMENTS;
264 if (env->mmu_model == POWERPC_MMU_2_06 ||
265 env->mmu_model == POWERPC_MMU_2_07) {
271 /* Standard 4k base page size segment */
272 info->sps[i].page_shift = 12;
273 info->sps[i].slb_enc = 0;
274 info->sps[i].enc[0].page_shift = 12;
275 info->sps[i].enc[0].pte_enc = 0;
278 /* 64K on MMU 2.06 and later */
279 if (env->mmu_model == POWERPC_MMU_2_06 ||
280 env->mmu_model == POWERPC_MMU_2_07) {
281 info->sps[i].page_shift = 16;
282 info->sps[i].slb_enc = 0x110;
283 info->sps[i].enc[0].page_shift = 16;
284 info->sps[i].enc[0].pte_enc = 1;
288 /* Standard 16M large page size segment */
289 info->sps[i].page_shift = 24;
290 info->sps[i].slb_enc = SLB_VSID_L;
291 info->sps[i].enc[0].page_shift = 24;
292 info->sps[i].enc[0].pte_enc = 0;
296 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
298 CPUState *cs = CPU(cpu);
301 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
302 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
308 kvm_get_fallback_smmu_info(cpu, info);
311 static long gethugepagesize(const char *mem_path)
317 ret = statfs(mem_path, &fs);
318 } while (ret != 0 && errno == EINTR);
321 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
326 #define HUGETLBFS_MAGIC 0x958458f6
328 if (fs.f_type != HUGETLBFS_MAGIC) {
329 /* Explicit mempath, but it's ordinary pages */
330 return getpagesize();
333 /* It's hugepage, return the huge page size */
337 static int find_max_supported_pagesize(Object *obj, void *opaque)
340 long *hpsize_min = opaque;
342 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
343 mem_path = object_property_get_str(obj, "mem-path", NULL);
345 long hpsize = gethugepagesize(mem_path);
346 if (hpsize < *hpsize_min) {
347 *hpsize_min = hpsize;
350 *hpsize_min = getpagesize();
357 static long getrampagesize(void)
359 long hpsize = LONG_MAX;
363 return gethugepagesize(mem_path);
366 /* it's possible we have memory-backend objects with
367 * hugepage-backed RAM. these may get mapped into system
368 * address space via -numa parameters or memory hotplug
369 * hooks. we want to take these into account, but we
370 * also want to make sure these supported hugepage
371 * sizes are applicable across the entire range of memory
372 * we may boot from, so we take the min across all
373 * backends, and assume normal pages in cases where a
374 * backend isn't backed by hugepages.
376 memdev_root = object_resolve_path("/objects", NULL);
378 return getpagesize();
381 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
383 return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
386 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
388 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
392 return (1ul << shift) <= rampgsize;
395 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
397 static struct kvm_ppc_smmu_info smmu_info;
398 static bool has_smmu_info;
399 CPUPPCState *env = &cpu->env;
403 /* We only handle page sizes for 64-bit server guests for now */
404 if (!(env->mmu_model & POWERPC_MMU_64)) {
408 /* Collect MMU info from kernel if not already */
409 if (!has_smmu_info) {
410 kvm_get_smmu_info(cpu, &smmu_info);
411 has_smmu_info = true;
414 rampagesize = getrampagesize();
416 /* Convert to QEMU form */
417 memset(&env->sps, 0, sizeof(env->sps));
419 /* If we have HV KVM, we need to forbid CI large pages if our
420 * host page size is smaller than 64K.
422 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
423 env->ci_large_pages = getpagesize() >= 0x10000;
427 * XXX This loop should be an entry wide AND of the capabilities that
428 * the selected CPU has with the capabilities that KVM supports.
430 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
431 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
432 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
434 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
438 qsps->page_shift = ksps->page_shift;
439 qsps->slb_enc = ksps->slb_enc;
440 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
441 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
442 ksps->enc[jk].page_shift)) {
445 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
446 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
447 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
451 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
455 env->slb_nr = smmu_info.slb_size;
456 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
457 env->mmu_model &= ~POWERPC_MMU_1TSEG;
460 #else /* defined (TARGET_PPC64) */
462 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
466 #endif /* !defined (TARGET_PPC64) */
468 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
470 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
473 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
474 * book3s supports only 1 watchpoint, so array size
475 * of 4 is sufficient for now.
477 #define MAX_HW_BKPTS 4
479 static struct HWBreakpoint {
482 } hw_debug_points[MAX_HW_BKPTS];
484 static CPUWatchpoint hw_watchpoint;
486 /* Default there is no breakpoint and watchpoint supported */
487 static int max_hw_breakpoint;
488 static int max_hw_watchpoint;
489 static int nb_hw_breakpoint;
490 static int nb_hw_watchpoint;
492 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
494 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
495 max_hw_breakpoint = 2;
496 max_hw_watchpoint = 2;
499 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
500 fprintf(stderr, "Error initializing h/w breakpoints\n");
505 int kvm_arch_init_vcpu(CPUState *cs)
507 PowerPCCPU *cpu = POWERPC_CPU(cs);
508 CPUPPCState *cenv = &cpu->env;
511 /* Gather server mmu info from KVM and update the CPU state */
512 kvm_fixup_page_sizes(cpu);
514 /* Synchronize sregs with kvm */
515 ret = kvm_arch_sync_sregs(cpu);
517 if (ret == -EINVAL) {
518 error_report("Register sync failed... If you're using kvm-hv.ko,"
519 " only \"-cpu host\" is possible");
524 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
526 /* Some targets support access to KVM's guest TLB. */
527 switch (cenv->mmu_model) {
528 case POWERPC_MMU_BOOKE206:
529 ret = kvm_booke206_tlb_init(cpu);
535 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
536 kvmppc_hw_debug_points_init(cenv);
541 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
543 CPUPPCState *env = &cpu->env;
544 CPUState *cs = CPU(cpu);
545 struct kvm_dirty_tlb dirty_tlb;
546 unsigned char *bitmap;
549 if (!env->kvm_sw_tlb) {
553 bitmap = g_malloc((env->nb_tlb + 7) / 8);
554 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
556 dirty_tlb.bitmap = (uintptr_t)bitmap;
557 dirty_tlb.num_dirty = env->nb_tlb;
559 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
561 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
562 __func__, strerror(-ret));
568 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
570 PowerPCCPU *cpu = POWERPC_CPU(cs);
571 CPUPPCState *env = &cpu->env;
576 struct kvm_one_reg reg = {
578 .addr = (uintptr_t) &val,
582 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
584 trace_kvm_failed_spr_get(spr, strerror(errno));
586 switch (id & KVM_REG_SIZE_MASK) {
587 case KVM_REG_SIZE_U32:
588 env->spr[spr] = val.u32;
591 case KVM_REG_SIZE_U64:
592 env->spr[spr] = val.u64;
596 /* Don't handle this size yet */
602 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
604 PowerPCCPU *cpu = POWERPC_CPU(cs);
605 CPUPPCState *env = &cpu->env;
610 struct kvm_one_reg reg = {
612 .addr = (uintptr_t) &val,
616 switch (id & KVM_REG_SIZE_MASK) {
617 case KVM_REG_SIZE_U32:
618 val.u32 = env->spr[spr];
621 case KVM_REG_SIZE_U64:
622 val.u64 = env->spr[spr];
626 /* Don't handle this size yet */
630 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
632 trace_kvm_failed_spr_set(spr, strerror(errno));
636 static int kvm_put_fp(CPUState *cs)
638 PowerPCCPU *cpu = POWERPC_CPU(cs);
639 CPUPPCState *env = &cpu->env;
640 struct kvm_one_reg reg;
644 if (env->insns_flags & PPC_FLOAT) {
645 uint64_t fpscr = env->fpscr;
646 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
648 reg.id = KVM_REG_PPC_FPSCR;
649 reg.addr = (uintptr_t)&fpscr;
650 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
652 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
656 for (i = 0; i < 32; i++) {
659 #ifdef HOST_WORDS_BIGENDIAN
660 vsr[0] = float64_val(env->fpr[i]);
661 vsr[1] = env->vsr[i];
663 vsr[0] = env->vsr[i];
664 vsr[1] = float64_val(env->fpr[i]);
666 reg.addr = (uintptr_t) &vsr;
667 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
669 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
671 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
678 if (env->insns_flags & PPC_ALTIVEC) {
679 reg.id = KVM_REG_PPC_VSCR;
680 reg.addr = (uintptr_t)&env->vscr;
681 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
683 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
687 for (i = 0; i < 32; i++) {
688 reg.id = KVM_REG_PPC_VR(i);
689 reg.addr = (uintptr_t)&env->avr[i];
690 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
692 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
701 static int kvm_get_fp(CPUState *cs)
703 PowerPCCPU *cpu = POWERPC_CPU(cs);
704 CPUPPCState *env = &cpu->env;
705 struct kvm_one_reg reg;
709 if (env->insns_flags & PPC_FLOAT) {
711 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
713 reg.id = KVM_REG_PPC_FPSCR;
714 reg.addr = (uintptr_t)&fpscr;
715 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
717 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
723 for (i = 0; i < 32; i++) {
726 reg.addr = (uintptr_t) &vsr;
727 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
729 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
731 DPRINTF("Unable to get %s%d from KVM: %s\n",
732 vsx ? "VSR" : "FPR", i, strerror(errno));
735 #ifdef HOST_WORDS_BIGENDIAN
736 env->fpr[i] = vsr[0];
738 env->vsr[i] = vsr[1];
741 env->fpr[i] = vsr[1];
743 env->vsr[i] = vsr[0];
750 if (env->insns_flags & PPC_ALTIVEC) {
751 reg.id = KVM_REG_PPC_VSCR;
752 reg.addr = (uintptr_t)&env->vscr;
753 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
755 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
759 for (i = 0; i < 32; i++) {
760 reg.id = KVM_REG_PPC_VR(i);
761 reg.addr = (uintptr_t)&env->avr[i];
762 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
764 DPRINTF("Unable to get VR%d from KVM: %s\n",
774 #if defined(TARGET_PPC64)
775 static int kvm_get_vpa(CPUState *cs)
777 PowerPCCPU *cpu = POWERPC_CPU(cs);
778 CPUPPCState *env = &cpu->env;
779 struct kvm_one_reg reg;
782 reg.id = KVM_REG_PPC_VPA_ADDR;
783 reg.addr = (uintptr_t)&env->vpa_addr;
784 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
786 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
790 assert((uintptr_t)&env->slb_shadow_size
791 == ((uintptr_t)&env->slb_shadow_addr + 8));
792 reg.id = KVM_REG_PPC_VPA_SLB;
793 reg.addr = (uintptr_t)&env->slb_shadow_addr;
794 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
796 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
801 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
802 reg.id = KVM_REG_PPC_VPA_DTL;
803 reg.addr = (uintptr_t)&env->dtl_addr;
804 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
806 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
814 static int kvm_put_vpa(CPUState *cs)
816 PowerPCCPU *cpu = POWERPC_CPU(cs);
817 CPUPPCState *env = &cpu->env;
818 struct kvm_one_reg reg;
821 /* SLB shadow or DTL can't be registered unless a master VPA is
822 * registered. That means when restoring state, if a VPA *is*
823 * registered, we need to set that up first. If not, we need to
824 * deregister the others before deregistering the master VPA */
825 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
828 reg.id = KVM_REG_PPC_VPA_ADDR;
829 reg.addr = (uintptr_t)&env->vpa_addr;
830 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
832 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
837 assert((uintptr_t)&env->slb_shadow_size
838 == ((uintptr_t)&env->slb_shadow_addr + 8));
839 reg.id = KVM_REG_PPC_VPA_SLB;
840 reg.addr = (uintptr_t)&env->slb_shadow_addr;
841 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
843 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
847 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
848 reg.id = KVM_REG_PPC_VPA_DTL;
849 reg.addr = (uintptr_t)&env->dtl_addr;
850 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
852 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
857 if (!env->vpa_addr) {
858 reg.id = KVM_REG_PPC_VPA_ADDR;
859 reg.addr = (uintptr_t)&env->vpa_addr;
860 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
862 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
869 #endif /* TARGET_PPC64 */
871 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
873 CPUPPCState *env = &cpu->env;
874 struct kvm_sregs sregs;
877 sregs.pvr = env->spr[SPR_PVR];
879 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
883 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
884 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
885 if (env->slb[i].esid & SLB_ESID_V) {
886 sregs.u.s.ppc64.slb[i].slbe |= i;
888 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
893 for (i = 0; i < 16; i++) {
894 sregs.u.s.ppc32.sr[i] = env->sr[i];
898 for (i = 0; i < 8; i++) {
899 /* Beware. We have to swap upper and lower bits here */
900 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
902 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
906 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
909 int kvm_arch_put_registers(CPUState *cs, int level)
911 PowerPCCPU *cpu = POWERPC_CPU(cs);
912 CPUPPCState *env = &cpu->env;
913 struct kvm_regs regs;
917 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
924 regs.xer = cpu_read_xer(env);
928 regs.srr0 = env->spr[SPR_SRR0];
929 regs.srr1 = env->spr[SPR_SRR1];
931 regs.sprg0 = env->spr[SPR_SPRG0];
932 regs.sprg1 = env->spr[SPR_SPRG1];
933 regs.sprg2 = env->spr[SPR_SPRG2];
934 regs.sprg3 = env->spr[SPR_SPRG3];
935 regs.sprg4 = env->spr[SPR_SPRG4];
936 regs.sprg5 = env->spr[SPR_SPRG5];
937 regs.sprg6 = env->spr[SPR_SPRG6];
938 regs.sprg7 = env->spr[SPR_SPRG7];
940 regs.pid = env->spr[SPR_BOOKE_PID];
942 for (i = 0;i < 32; i++)
943 regs.gpr[i] = env->gpr[i];
946 for (i = 0; i < 8; i++) {
947 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
950 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s);
956 if (env->tlb_dirty) {
958 env->tlb_dirty = false;
961 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
962 ret = kvmppc_put_books_sregs(cpu);
968 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
969 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
975 /* We deliberately ignore errors here, for kernels which have
976 * the ONE_REG calls, but don't support the specific
977 * registers, there's a reasonable chance things will still
978 * work, at least until we try to migrate. */
979 for (i = 0; i < 1024; i++) {
980 uint64_t id = env->spr_cb[i].one_reg_id;
983 kvm_put_one_spr(cs, id, i);
989 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
990 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
992 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
993 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
995 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
996 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
997 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
998 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
999 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1000 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1001 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1002 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1003 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1004 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1008 if (kvm_put_vpa(cs) < 0) {
1009 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1013 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1014 #endif /* TARGET_PPC64 */
1020 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1022 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1025 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1027 CPUPPCState *env = &cpu->env;
1028 struct kvm_sregs sregs;
1031 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1036 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1037 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1038 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1039 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1040 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1041 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1042 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1043 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1044 env->spr[SPR_DECR] = sregs.u.e.dec;
1045 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1046 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1047 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1050 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1051 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1052 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1053 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1054 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1055 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1058 if (sregs.u.e.features & KVM_SREGS_E_64) {
1059 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1062 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1063 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1066 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1067 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1068 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1069 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1070 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1071 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1072 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1073 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1074 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1075 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1076 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1077 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1078 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1079 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1080 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1081 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1082 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1083 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1084 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1085 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1086 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1087 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1088 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1089 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1090 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1091 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1092 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1093 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1094 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1095 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1096 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1097 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1098 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1100 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1101 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1102 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1103 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1104 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1105 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1106 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1109 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1110 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1111 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1114 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1115 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1116 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1117 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1118 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1122 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1123 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1124 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1125 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1126 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1127 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1128 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1129 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1130 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1131 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1132 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1135 if (sregs.u.e.features & KVM_SREGS_EXP) {
1136 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1139 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1140 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1141 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1144 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1145 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1146 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1147 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1149 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1150 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1151 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1158 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1160 CPUPPCState *env = &cpu->env;
1161 struct kvm_sregs sregs;
1165 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1170 if (!env->external_htab) {
1171 ppc_store_sdr1(env, sregs.u.s.sdr1);
1177 * The packed SLB array we get from KVM_GET_SREGS only contains
1178 * information about valid entries. So we flush our internal copy
1179 * to get rid of stale ones, then put all valid SLB entries back
1182 memset(env->slb, 0, sizeof(env->slb));
1183 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1184 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1185 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1187 * Only restore valid entries
1189 if (rb & SLB_ESID_V) {
1190 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1196 for (i = 0; i < 16; i++) {
1197 env->sr[i] = sregs.u.s.ppc32.sr[i];
1201 for (i = 0; i < 8; i++) {
1202 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1203 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1204 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1205 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1211 int kvm_arch_get_registers(CPUState *cs)
1213 PowerPCCPU *cpu = POWERPC_CPU(cs);
1214 CPUPPCState *env = &cpu->env;
1215 struct kvm_regs regs;
1219 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
1224 for (i = 7; i >= 0; i--) {
1225 env->crf[i] = cr & 15;
1229 env->ctr = regs.ctr;
1231 cpu_write_xer(env, regs.xer);
1232 env->msr = regs.msr;
1235 env->spr[SPR_SRR0] = regs.srr0;
1236 env->spr[SPR_SRR1] = regs.srr1;
1238 env->spr[SPR_SPRG0] = regs.sprg0;
1239 env->spr[SPR_SPRG1] = regs.sprg1;
1240 env->spr[SPR_SPRG2] = regs.sprg2;
1241 env->spr[SPR_SPRG3] = regs.sprg3;
1242 env->spr[SPR_SPRG4] = regs.sprg4;
1243 env->spr[SPR_SPRG5] = regs.sprg5;
1244 env->spr[SPR_SPRG6] = regs.sprg6;
1245 env->spr[SPR_SPRG7] = regs.sprg7;
1247 env->spr[SPR_BOOKE_PID] = regs.pid;
1249 for (i = 0;i < 32; i++)
1250 env->gpr[i] = regs.gpr[i];
1254 if (cap_booke_sregs) {
1255 ret = kvmppc_get_booke_sregs(cpu);
1262 ret = kvmppc_get_books_sregs(cpu);
1269 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1275 /* We deliberately ignore errors here, for kernels which have
1276 * the ONE_REG calls, but don't support the specific
1277 * registers, there's a reasonable chance things will still
1278 * work, at least until we try to migrate. */
1279 for (i = 0; i < 1024; i++) {
1280 uint64_t id = env->spr_cb[i].one_reg_id;
1283 kvm_get_one_spr(cs, id, i);
1289 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1290 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1292 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1293 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1295 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1296 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1297 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1298 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1299 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1300 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1301 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1302 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1303 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1304 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1308 if (kvm_get_vpa(cs) < 0) {
1309 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1313 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1320 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1322 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1324 if (irq != PPC_INTERRUPT_EXT) {
1328 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1332 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1337 #if defined(TARGET_PPCEMB)
1338 #define PPC_INPUT_INT PPC40x_INPUT_INT
1339 #elif defined(TARGET_PPC64)
1340 #define PPC_INPUT_INT PPC970_INPUT_INT
1342 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1345 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1347 PowerPCCPU *cpu = POWERPC_CPU(cs);
1348 CPUPPCState *env = &cpu->env;
1352 qemu_mutex_lock_iothread();
1354 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1355 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1356 if (!cap_interrupt_level &&
1357 run->ready_for_interrupt_injection &&
1358 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1359 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1361 /* For now KVM disregards the 'irq' argument. However, in the
1362 * future KVM could cache it in-kernel to avoid a heavyweight exit
1363 * when reading the UIC.
1365 irq = KVM_INTERRUPT_SET;
1367 DPRINTF("injected interrupt %d\n", irq);
1368 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1370 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1373 /* Always wake up soon in case the interrupt was level based */
1374 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1375 (NANOSECONDS_PER_SECOND / 50));
1378 /* We don't know if there are more interrupts pending after this. However,
1379 * the guest will return to userspace in the course of handling this one
1380 * anyways, so we will get a chance to deliver the rest. */
1382 qemu_mutex_unlock_iothread();
1385 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1387 return MEMTXATTRS_UNSPECIFIED;
1390 int kvm_arch_process_async_events(CPUState *cs)
1395 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1397 CPUState *cs = CPU(cpu);
1398 CPUPPCState *env = &cpu->env;
1400 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1402 cs->exception_index = EXCP_HLT;
1408 /* map dcr access to existing qemu dcr emulation */
1409 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1411 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1412 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1417 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1419 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1420 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1425 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1427 /* Mixed endian case is not handled */
1428 uint32_t sc = debug_inst_opcode;
1430 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1432 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1439 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1443 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1444 sc != debug_inst_opcode ||
1445 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1453 static int find_hw_breakpoint(target_ulong addr, int type)
1457 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1458 <= ARRAY_SIZE(hw_debug_points));
1460 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1461 if (hw_debug_points[n].addr == addr &&
1462 hw_debug_points[n].type == type) {
1470 static int find_hw_watchpoint(target_ulong addr, int *flag)
1474 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1476 *flag = BP_MEM_ACCESS;
1480 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1482 *flag = BP_MEM_WRITE;
1486 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1488 *flag = BP_MEM_READ;
1495 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1496 target_ulong len, int type)
1498 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1502 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1503 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1506 case GDB_BREAKPOINT_HW:
1507 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1511 if (find_hw_breakpoint(addr, type) >= 0) {
1518 case GDB_WATCHPOINT_WRITE:
1519 case GDB_WATCHPOINT_READ:
1520 case GDB_WATCHPOINT_ACCESS:
1521 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1525 if (find_hw_breakpoint(addr, type) >= 0) {
1539 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1540 target_ulong len, int type)
1544 n = find_hw_breakpoint(addr, type);
1550 case GDB_BREAKPOINT_HW:
1554 case GDB_WATCHPOINT_WRITE:
1555 case GDB_WATCHPOINT_READ:
1556 case GDB_WATCHPOINT_ACCESS:
1563 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1568 void kvm_arch_remove_all_hw_breakpoints(void)
1570 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1573 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1577 /* Software Breakpoint updates */
1578 if (kvm_sw_breakpoints_active(cs)) {
1579 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1582 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1583 <= ARRAY_SIZE(hw_debug_points));
1584 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1586 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1587 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1588 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1589 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1590 switch (hw_debug_points[n].type) {
1591 case GDB_BREAKPOINT_HW:
1592 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1594 case GDB_WATCHPOINT_WRITE:
1595 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1597 case GDB_WATCHPOINT_READ:
1598 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1600 case GDB_WATCHPOINT_ACCESS:
1601 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1602 KVMPPC_DEBUG_WATCH_READ;
1605 cpu_abort(cs, "Unsupported breakpoint type\n");
1607 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1612 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1614 CPUState *cs = CPU(cpu);
1615 CPUPPCState *env = &cpu->env;
1616 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1621 if (cs->singlestep_enabled) {
1623 } else if (arch_info->status) {
1624 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1625 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1626 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1630 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1631 KVMPPC_DEBUG_WATCH_WRITE)) {
1632 n = find_hw_watchpoint(arch_info->address, &flag);
1635 cs->watchpoint_hit = &hw_watchpoint;
1636 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1637 hw_watchpoint.flags = flag;
1641 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1644 /* QEMU is not able to handle debug exception, so inject
1645 * program exception to guest;
1646 * Yes program exception NOT debug exception !!
1647 * When QEMU is using debug resources then debug exception must
1648 * be always set. To achieve this we set MSR_DE and also set
1649 * MSRP_DEP so guest cannot change MSR_DE.
1650 * When emulating debug resource for guest we want guest
1651 * to control MSR_DE (enable/disable debug interrupt on need).
1652 * Supporting both configurations are NOT possible.
1653 * So the result is that we cannot share debug resources
1654 * between QEMU and Guest on BOOKE architecture.
1655 * In the current design QEMU gets the priority over guest,
1656 * this means that if QEMU is using debug resources then guest
1658 * For software breakpoint QEMU uses a privileged instruction;
1659 * So there cannot be any reason that we are here for guest
1660 * set debug exception, only possibility is guest executed a
1661 * privileged / illegal instruction and that's why we are
1662 * injecting a program interrupt.
1665 cpu_synchronize_state(cs);
1666 /* env->nip is PC, so increment this by 4 to use
1667 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1670 cs->exception_index = POWERPC_EXCP_PROGRAM;
1671 env->error_code = POWERPC_EXCP_INVAL;
1672 ppc_cpu_do_interrupt(cs);
1678 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1680 PowerPCCPU *cpu = POWERPC_CPU(cs);
1681 CPUPPCState *env = &cpu->env;
1684 qemu_mutex_lock_iothread();
1686 switch (run->exit_reason) {
1688 if (run->dcr.is_write) {
1689 DPRINTF("handle dcr write\n");
1690 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1692 DPRINTF("handle dcr read\n");
1693 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1697 DPRINTF("handle halt\n");
1698 ret = kvmppc_handle_halt(cpu);
1700 #if defined(TARGET_PPC64)
1701 case KVM_EXIT_PAPR_HCALL:
1702 DPRINTF("handle PAPR hypercall\n");
1703 run->papr_hcall.ret = spapr_hypercall(cpu,
1705 run->papr_hcall.args);
1710 DPRINTF("handle epr\n");
1711 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1714 case KVM_EXIT_WATCHDOG:
1715 DPRINTF("handle watchdog expiry\n");
1716 watchdog_perform_action();
1720 case KVM_EXIT_DEBUG:
1721 DPRINTF("handle debug exception\n");
1722 if (kvm_handle_debug(cpu, run)) {
1726 /* re-enter, this exception was guest-internal */
1731 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1736 qemu_mutex_unlock_iothread();
1740 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1742 CPUState *cs = CPU(cpu);
1743 uint32_t bits = tsr_bits;
1744 struct kvm_one_reg reg = {
1745 .id = KVM_REG_PPC_OR_TSR,
1746 .addr = (uintptr_t) &bits,
1749 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1752 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1755 CPUState *cs = CPU(cpu);
1756 uint32_t bits = tsr_bits;
1757 struct kvm_one_reg reg = {
1758 .id = KVM_REG_PPC_CLEAR_TSR,
1759 .addr = (uintptr_t) &bits,
1762 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1765 int kvmppc_set_tcr(PowerPCCPU *cpu)
1767 CPUState *cs = CPU(cpu);
1768 CPUPPCState *env = &cpu->env;
1769 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1771 struct kvm_one_reg reg = {
1772 .id = KVM_REG_PPC_TCR,
1773 .addr = (uintptr_t) &tcr,
1776 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
1779 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1781 CPUState *cs = CPU(cpu);
1784 if (!kvm_enabled()) {
1788 if (!cap_ppc_watchdog) {
1789 printf("warning: KVM does not support watchdog");
1793 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1795 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1796 __func__, strerror(-ret));
1803 static int read_cpuinfo(const char *field, char *value, int len)
1807 int field_len = strlen(field);
1810 f = fopen("/proc/cpuinfo", "r");
1816 if (!fgets(line, sizeof(line), f)) {
1819 if (!strncmp(line, field, field_len)) {
1820 pstrcpy(value, len, line);
1831 uint32_t kvmppc_get_tbfreq(void)
1835 uint32_t retval = NANOSECONDS_PER_SECOND;
1837 if (read_cpuinfo("timebase", line, sizeof(line))) {
1841 if (!(ns = strchr(line, ':'))) {
1850 bool kvmppc_get_host_serial(char **value)
1852 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1856 bool kvmppc_get_host_model(char **value)
1858 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1861 /* Try to find a device tree node for a CPU with clock-frequency property */
1862 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1864 struct dirent *dirp;
1867 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1868 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1873 while ((dirp = readdir(dp)) != NULL) {
1875 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1877 f = fopen(buf, "r");
1879 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1886 if (buf[0] == '\0') {
1887 printf("Unknown host!\n");
1894 static uint64_t kvmppc_read_int_dt(const char *filename)
1903 f = fopen(filename, "rb");
1908 len = fread(&u, 1, sizeof(u), f);
1912 /* property is a 32-bit quantity */
1913 return be32_to_cpu(u.v32);
1915 return be64_to_cpu(u.v64);
1921 /* Read a CPU node property from the host device tree that's a single
1922 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1923 * (can't find or open the property, or doesn't understand the
1925 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1927 char buf[PATH_MAX], *tmp;
1930 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1934 tmp = g_strdup_printf("%s/%s", buf, propname);
1935 val = kvmppc_read_int_dt(tmp);
1941 uint64_t kvmppc_get_clockfreq(void)
1943 return kvmppc_read_int_cpu_dt("clock-frequency");
1946 uint32_t kvmppc_get_vmx(void)
1948 return kvmppc_read_int_cpu_dt("ibm,vmx");
1951 uint32_t kvmppc_get_dfp(void)
1953 return kvmppc_read_int_cpu_dt("ibm,dfp");
1956 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1958 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1959 CPUState *cs = CPU(cpu);
1961 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1962 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1969 int kvmppc_get_hasidle(CPUPPCState *env)
1971 struct kvm_ppc_pvinfo pvinfo;
1973 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1974 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1981 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1983 uint32_t *hc = (uint32_t*)buf;
1984 struct kvm_ppc_pvinfo pvinfo;
1986 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1987 memcpy(buf, pvinfo.hcall, buf_len);
1992 * Fallback to always fail hypercalls regardless of endianness:
1994 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1996 * b .+8 (becomes nop in wrong endian)
1997 * bswap32(li r3, -1)
2000 hc[0] = cpu_to_be32(0x08000048);
2001 hc[1] = cpu_to_be32(0x3860ffff);
2002 hc[2] = cpu_to_be32(0x48000008);
2003 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2008 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2010 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2013 void kvmppc_enable_logical_ci_hcalls(void)
2016 * FIXME: it would be nice if we could detect the cases where
2017 * we're using a device which requires the in kernel
2018 * implementation of these hcalls, but the kernel lacks them and
2019 * produce a warning.
2021 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2022 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2025 void kvmppc_enable_set_mode_hcall(void)
2027 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2030 void kvmppc_set_papr(PowerPCCPU *cpu)
2032 CPUState *cs = CPU(cpu);
2035 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2037 error_report("This vCPU type or KVM version does not support PAPR");
2041 /* Update the capability flag so we sync the right information
2046 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2048 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2051 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2053 CPUState *cs = CPU(cpu);
2056 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2057 if (ret && mpic_proxy) {
2058 error_report("This KVM version does not support EPR");
2063 int kvmppc_smt_threads(void)
2065 return cap_ppc_smt ? cap_ppc_smt : 1;
2069 off_t kvmppc_alloc_rma(void **rma)
2073 struct kvm_allocate_rma ret;
2075 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2076 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2077 * not necessary on this hardware
2078 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2080 * FIXME: We should allow the user to force contiguous RMA
2081 * allocation in the cap_ppc_rma==1 case.
2083 if (cap_ppc_rma < 2) {
2087 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2089 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2094 size = MIN(ret.rma_size, 256ul << 20);
2096 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2097 if (*rma == MAP_FAILED) {
2098 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2105 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2107 struct kvm_ppc_smmu_info info;
2108 long rampagesize, best_page_shift;
2111 if (cap_ppc_rma >= 2) {
2112 return current_size;
2115 /* Find the largest hardware supported page size that's less than
2116 * or equal to the (logical) backing page size of guest RAM */
2117 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2118 rampagesize = getrampagesize();
2119 best_page_shift = 0;
2121 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2122 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2124 if (!sps->page_shift) {
2128 if ((sps->page_shift > best_page_shift)
2129 && ((1UL << sps->page_shift) <= rampagesize)) {
2130 best_page_shift = sps->page_shift;
2134 return MIN(current_size,
2135 1ULL << (best_page_shift + hash_shift - 7));
2139 bool kvmppc_spapr_use_multitce(void)
2141 return cap_spapr_multitce;
2144 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2147 struct kvm_create_spapr_tce args = {
2149 .window_size = window_size,
2155 /* Must set fd to -1 so we don't try to munmap when called for
2156 * destroying the table, which the upper layers -will- do
2159 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2163 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2165 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2170 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2171 /* FIXME: round this up to page size */
2173 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2174 if (table == MAP_FAILED) {
2175 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2185 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2193 len = nb_table * sizeof(uint64_t);
2194 if ((munmap(table, len) < 0) ||
2196 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2198 /* Leak the table */
2204 int kvmppc_reset_htab(int shift_hint)
2206 uint32_t shift = shift_hint;
2208 if (!kvm_enabled()) {
2209 /* Full emulation, tell caller to allocate htab itself */
2212 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2214 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2215 if (ret == -ENOTTY) {
2216 /* At least some versions of PR KVM advertise the
2217 * capability, but don't implement the ioctl(). Oops.
2218 * Return 0 so that we allocate the htab in qemu, as is
2219 * correct for PR. */
2221 } else if (ret < 0) {
2227 /* We have a kernel that predates the htab reset calls. For PR
2228 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2229 * this era, it has allocated a 16MB fixed size hash table
2230 * already. Kernels of this era have the GET_PVINFO capability
2231 * only on PR, so we use this hack to determine the right
2233 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2234 /* PR - tell caller to allocate htab */
2237 /* HV - assume 16MB kernel allocated htab */
2242 static inline uint32_t mfpvr(void)
2251 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2260 static void kvmppc_host_cpu_initfn(Object *obj)
2262 assert(kvm_enabled());
2265 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2267 DeviceClass *dc = DEVICE_CLASS(oc);
2268 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2269 uint32_t vmx = kvmppc_get_vmx();
2270 uint32_t dfp = kvmppc_get_dfp();
2271 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2272 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2274 /* Now fix up the class with information we can query from the host */
2278 /* Only override when we know what the host supports */
2279 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2280 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2283 /* Only override when we know what the host supports */
2284 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2287 if (dcache_size != -1) {
2288 pcc->l1_dcache_size = dcache_size;
2291 if (icache_size != -1) {
2292 pcc->l1_icache_size = icache_size;
2295 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2296 dc->cannot_destroy_with_object_finalize_yet = true;
2299 bool kvmppc_has_cap_epr(void)
2304 bool kvmppc_has_cap_htab_fd(void)
2309 bool kvmppc_has_cap_fixup_hcalls(void)
2311 return cap_fixup_hcalls;
2314 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2316 ObjectClass *oc = OBJECT_CLASS(pcc);
2318 while (oc && !object_class_is_abstract(oc)) {
2319 oc = object_class_get_parent(oc);
2323 return POWERPC_CPU_CLASS(oc);
2326 static int kvm_ppc_register_host_cpu_type(void)
2328 TypeInfo type_info = {
2329 .name = TYPE_HOST_POWERPC_CPU,
2330 .instance_init = kvmppc_host_cpu_initfn,
2331 .class_init = kvmppc_host_cpu_class_init,
2333 uint32_t host_pvr = mfpvr();
2334 PowerPCCPUClass *pvr_pcc;
2337 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2338 if (pvr_pcc == NULL) {
2339 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2341 if (pvr_pcc == NULL) {
2344 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2345 type_register(&type_info);
2347 /* Register generic family CPU class for a family */
2348 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2349 dc = DEVICE_CLASS(pvr_pcc);
2350 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2351 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2352 type_register(&type_info);
2357 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2359 struct kvm_rtas_token_args args = {
2363 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2367 strncpy(args.name, function, sizeof(args.name));
2369 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2372 int kvmppc_get_htab_fd(bool write)
2374 struct kvm_get_htab_fd s = {
2375 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2380 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2384 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2387 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2389 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2390 uint8_t buf[bufsize];
2394 rc = read(fd, buf, bufsize);
2396 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2400 uint8_t *buffer = buf;
2403 struct kvm_get_htab_header *head =
2404 (struct kvm_get_htab_header *) buffer;
2405 size_t chunksize = sizeof(*head) +
2406 HASH_PTE_SIZE_64 * head->n_valid;
2408 qemu_put_be32(f, head->index);
2409 qemu_put_be16(f, head->n_valid);
2410 qemu_put_be16(f, head->n_invalid);
2411 qemu_put_buffer(f, (void *)(head + 1),
2412 HASH_PTE_SIZE_64 * head->n_valid);
2414 buffer += chunksize;
2420 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2422 return (rc == 0) ? 1 : 0;
2425 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2426 uint16_t n_valid, uint16_t n_invalid)
2428 struct kvm_get_htab_header *buf;
2429 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2432 buf = alloca(chunksize);
2434 buf->n_valid = n_valid;
2435 buf->n_invalid = n_invalid;
2437 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2439 rc = write(fd, buf, chunksize);
2441 fprintf(stderr, "Error writing KVM hash table: %s\n",
2445 if (rc != chunksize) {
2446 /* We should never get a short write on a single chunk */
2447 fprintf(stderr, "Short write, restoring KVM hash table\n");
2453 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2458 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2463 int kvm_arch_on_sigbus(int code, void *addr)
2468 void kvm_arch_init_irq_routing(KVMState *s)
2472 struct kvm_get_htab_buf {
2473 struct kvm_get_htab_header header;
2475 * We require one extra byte for read
2477 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2480 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2483 struct kvm_get_htab_fd ghf;
2484 struct kvm_get_htab_buf *hpte_buf;
2487 ghf.start_index = pte_index;
2488 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2493 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2495 * Read the hpte group
2497 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2502 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2511 void kvmppc_hash64_free_pteg(uint64_t token)
2513 struct kvm_get_htab_buf *htab_buf;
2515 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2521 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2522 target_ulong pte0, target_ulong pte1)
2525 struct kvm_get_htab_fd ghf;
2526 struct kvm_get_htab_buf hpte_buf;
2529 ghf.start_index = 0; /* Ignored */
2530 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2535 hpte_buf.header.n_valid = 1;
2536 hpte_buf.header.n_invalid = 0;
2537 hpte_buf.header.index = pte_index;
2538 hpte_buf.hpte[0] = pte0;
2539 hpte_buf.hpte[1] = pte1;
2541 * Write the hpte entry.
2542 * CAUTION: write() has the warn_unused_result attribute. Hence we
2543 * need to check the return value, even though we do nothing.
2545 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2557 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2558 uint64_t address, uint32_t data, PCIDevice *dev)
2563 int kvm_arch_msi_data_to_gsi(uint32_t data)
2565 return data & 0xffff;
2568 int kvmppc_enable_hwrng(void)
2570 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2574 return kvmppc_enable_hcall(kvm_state, H_RANDOM);