target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <[email protected]>
   9  *  Christian Ehrhardt <[email protected]>
  10  *  Hollis Blanchard <[email protected]>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qapi/error.h"
  26 #include "qemu/error-report.h"
  27 #include "cpu.h"
  28 #include "cpu-models.h"
  29 #include "qemu/timer.h"
  30 #include "sysemu/sysemu.h"
  31 #include "sysemu/hw_accel.h"
  32 #include "kvm_ppc.h"
  33 #include "sysemu/cpus.h"
  34 #include "sysemu/device_tree.h"
  35 #include "mmu-hash64.h"
  36
  37 #include "hw/sysbus.h"
  38 #include "hw/ppc/spapr.h"
  39 #include "hw/ppc/spapr_vio.h"
  40 #include "hw/ppc/spapr_cpu_core.h"
  41 #include "hw/ppc/ppc.h"
  42 #include "sysemu/watchdog.h"
  43 #include "trace.h"
  44 #include "exec/gdbstub.h"
  45 #include "exec/memattrs.h"
  46 #include "exec/ram_addr.h"
  47 #include "sysemu/hostmem.h"
  48 #include "qemu/cutils.h"
  49 #include "qemu/mmap-alloc.h"
  50 #include "elf.h"
  51 #include "sysemu/kvm_int.h"
  52
  53 //#define DEBUG_KVM
  54
  55 #ifdef DEBUG_KVM
  56 #define DPRINTF(fmt, ...) \
  57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  58 #else
  59 #define DPRINTF(fmt, ...) \
  60     do { } while (0)
  61 #endif
  62
  63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  64
  65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  66     KVM_CAP_LAST_INFO
  67 };
  68
  69 static int cap_interrupt_unset = false;
  70 static int cap_interrupt_level = false;
  71 static int cap_segstate;
  72 static int cap_booke_sregs;
  73 static int cap_ppc_smt;
  74 static int cap_ppc_smt_possible;
  75 static int cap_ppc_rma;
  76 static int cap_spapr_tce;
  77 static int cap_spapr_tce_64;
  78 static int cap_spapr_multitce;
  79 static int cap_spapr_vfio;
  80 static int cap_hior;
  81 static int cap_one_reg;
  82 static int cap_epr;
  83 static int cap_ppc_watchdog;
  84 static int cap_papr;
  85 static int cap_htab_fd;
  86 static int cap_fixup_hcalls;
  87 static int cap_htm;             /* Hardware transactional memory support */
  88 static int cap_mmu_radix;
  89 static int cap_mmu_hash_v3;
  90 static int cap_resize_hpt;
  91 static int cap_ppc_pvr_compat;
  92 static int cap_ppc_safe_cache;
  93 static int cap_ppc_safe_bounds_check;
  94 static int cap_ppc_safe_indirect_branch;
  95
  96 static uint32_t debug_inst_opcode;
  97
  98 /* XXX We have a race condition where we actually have a level triggered
  99  *     interrupt, but the infrastructure can't expose that yet, so the guest
 100  *     takes but ignores it, goes to sleep and never gets notified that there's
 101  *     still an interrupt pending.
 102  *
 103  *     As a quick workaround, let's just wake up again 20 ms after we injected
 104  *     an interrupt. That way we can assure that we're always reinjecting
 105  *     interrupts in case the guest swallowed them.
 106  */
 107 static QEMUTimer *idle_timer;
 108
 109 static void kvm_kick_cpu(void *opaque)
 110 {
 111     PowerPCCPU *cpu = opaque;
 112
 113     qemu_cpu_kick(CPU(cpu));
 114 }
 115
 116 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 117  * should only be used for fallback tests - generally we should use
 118  * explicit capabilities for the features we want, rather than
 119  * assuming what is/isn't available depending on the KVM variant. */
 120 static bool kvmppc_is_pr(KVMState *ks)
 121 {
 122     /* Assume KVM-PR if the GET_PVINFO capability is available */
 123     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 124 }
 125
 126 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 127 static void kvmppc_get_cpu_characteristics(KVMState *s);
 128
 129 int kvm_arch_init(MachineState *ms, KVMState *s)
 130 {
 131     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 132     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 133     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 134     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 135     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 136     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 137     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 138     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 139     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 140     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 141     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 142     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 143     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 144     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 145     /* Note: we don't set cap_papr here, because this capability is
 146      * only activated after this by kvmppc_set_papr() */
 147     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 148     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 149     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 150     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 151     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 152     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 153     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 154     kvmppc_get_cpu_characteristics(s);
 155     /*
 156      * Note: setting it to false because there is not such capability
 157      * in KVM at this moment.
 158      *
 159      * TODO: call kvm_vm_check_extension() with the right capability
 160      * after the kernel starts implementing it.*/
 161     cap_ppc_pvr_compat = false;
 162
 163     if (!cap_interrupt_level) {
 164         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 165                         "VM to stall at times!\n");
 166     }
 167
 168     kvm_ppc_register_host_cpu_type(ms);
 169
 170     return 0;
 171 }
 172
 173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 174 {
 175     return 0;
 176 }
 177
 178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 179 {
 180     CPUPPCState *cenv = &cpu->env;
 181     CPUState *cs = CPU(cpu);
 182     struct kvm_sregs sregs;
 183     int ret;
 184
 185     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 186         /* What we're really trying to say is "if we're on BookE, we use
 187            the native PVR for now". This is the only sane way to check
 188            it though, so we potentially confuse users that they can run
 189            BookE guests on BookS. Let's hope nobody dares enough :) */
 190         return 0;
 191     } else {
 192         if (!cap_segstate) {
 193             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 194             return -ENOSYS;
 195         }
 196     }
 197
 198     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 199     if (ret) {
 200         return ret;
 201     }
 202
 203     sregs.pvr = cenv->spr[SPR_PVR];
 204     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 205 }
 206
 207 /* Set up a shared TLB array with KVM */
 208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 209 {
 210     CPUPPCState *env = &cpu->env;
 211     CPUState *cs = CPU(cpu);
 212     struct kvm_book3e_206_tlb_params params = {};
 213     struct kvm_config_tlb cfg = {};
 214     unsigned int entries = 0;
 215     int ret, i;
 216
 217     if (!kvm_enabled() ||
 218         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 219         return 0;
 220     }
 221
 222     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 223
 224     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 225         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 226         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 227         entries += params.tlb_sizes[i];
 228     }
 229
 230     assert(entries == env->nb_tlb);
 231     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 232
 233     env->tlb_dirty = true;
 234
 235     cfg.array = (uintptr_t)env->tlb.tlbm;
 236     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 237     cfg.params = (uintptr_t)&params;
 238     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 239
 240     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 241     if (ret < 0) {
 242         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 243                 __func__, strerror(-ret));
 244         return ret;
 245     }
 246
 247     env->kvm_sw_tlb = true;
 248     return 0;
 249 }
 250
 251
 252 #if defined(TARGET_PPC64)
 253 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 254                                        struct kvm_ppc_smmu_info *info)
 255 {
 256     CPUPPCState *env = &cpu->env;
 257     CPUState *cs = CPU(cpu);
 258
 259     memset(info, 0, sizeof(*info));
 260
 261     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 262      * need to "guess" what the supported page sizes are.
 263      *
 264      * For that to work we make a few assumptions:
 265      *
 266      * - Check whether we are running "PR" KVM which only supports 4K
 267      *   and 16M pages, but supports them regardless of the backing
 268      *   store characteritics. We also don't support 1T segments.
 269      *
 270      *   This is safe as if HV KVM ever supports that capability or PR
 271      *   KVM grows supports for more page/segment sizes, those versions
 272      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 273      *   will not hit this fallback
 274      *
 275      * - Else we are running HV KVM. This means we only support page
 276      *   sizes that fit in the backing store. Additionally we only
 277      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 278      *   P7 encodings for the SLB and hash table. Here too, we assume
 279      *   support for any newer processor will mean a kernel that
 280      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 281      *   this fallback.
 282      */
 283     if (kvmppc_is_pr(cs->kvm_state)) {
 284         /* No flags */
 285         info->flags = 0;
 286         info->slb_size = 64;
 287
 288         /* Standard 4k base page size segment */
 289         info->sps[0].page_shift = 12;
 290         info->sps[0].slb_enc = 0;
 291         info->sps[0].enc[0].page_shift = 12;
 292         info->sps[0].enc[0].pte_enc = 0;
 293
 294         /* Standard 16M large page size segment */
 295         info->sps[1].page_shift = 24;
 296         info->sps[1].slb_enc = SLB_VSID_L;
 297         info->sps[1].enc[0].page_shift = 24;
 298         info->sps[1].enc[0].pte_enc = 0;
 299     } else {
 300         int i = 0;
 301
 302         /* HV KVM has backing store size restrictions */
 303         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 304
 305         if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
 306             info->flags |= KVM_PPC_1T_SEGMENTS;
 307         }
 308
 309         if (env->mmu_model == POWERPC_MMU_2_06 ||
 310             env->mmu_model == POWERPC_MMU_2_07) {
 311             info->slb_size = 32;
 312         } else {
 313             info->slb_size = 64;
 314         }
 315
 316         /* Standard 4k base page size segment */
 317         info->sps[i].page_shift = 12;
 318         info->sps[i].slb_enc = 0;
 319         info->sps[i].enc[0].page_shift = 12;
 320         info->sps[i].enc[0].pte_enc = 0;
 321         i++;
 322
 323         /* 64K on MMU 2.06 and later */
 324         if (env->mmu_model == POWERPC_MMU_2_06 ||
 325             env->mmu_model == POWERPC_MMU_2_07) {
 326             info->sps[i].page_shift = 16;
 327             info->sps[i].slb_enc = 0x110;
 328             info->sps[i].enc[0].page_shift = 16;
 329             info->sps[i].enc[0].pte_enc = 1;
 330             i++;
 331         }
 332
 333         /* Standard 16M large page size segment */
 334         info->sps[i].page_shift = 24;
 335         info->sps[i].slb_enc = SLB_VSID_L;
 336         info->sps[i].enc[0].page_shift = 24;
 337         info->sps[i].enc[0].pte_enc = 0;
 338     }
 339 }
 340
 341 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 342 {
 343     CPUState *cs = CPU(cpu);
 344     int ret;
 345
 346     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 347         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 348         if (ret == 0) {
 349             return;
 350         }
 351     }
 352
 353     kvm_get_fallback_smmu_info(cpu, info);
 354 }
 355
 356 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 357 {
 358     KVMState *s = KVM_STATE(current_machine->accelerator);
 359     struct ppc_radix_page_info *radix_page_info;
 360     struct kvm_ppc_rmmu_info rmmu_info;
 361     int i;
 362
 363     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 364         return NULL;
 365     }
 366     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 367         return NULL;
 368     }
 369     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 370     radix_page_info->count = 0;
 371     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 372         if (rmmu_info.ap_encodings[i]) {
 373             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 374             radix_page_info->count++;
 375         }
 376     }
 377     return radix_page_info;
 378 }
 379
 380 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 381                                      bool radix, bool gtse,
 382                                      uint64_t proc_tbl)
 383 {
 384     CPUState *cs = CPU(cpu);
 385     int ret;
 386     uint64_t flags = 0;
 387     struct kvm_ppc_mmuv3_cfg cfg = {
 388         .process_table = proc_tbl,
 389     };
 390
 391     if (radix) {
 392         flags |= KVM_PPC_MMUV3_RADIX;
 393     }
 394     if (gtse) {
 395         flags |= KVM_PPC_MMUV3_GTSE;
 396     }
 397     cfg.flags = flags;
 398     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 399     switch (ret) {
 400     case 0:
 401         return H_SUCCESS;
 402     case -EINVAL:
 403         return H_PARAMETER;
 404     case -ENODEV:
 405         return H_NOT_AVAILABLE;
 406     default:
 407         return H_HARDWARE;
 408     }
 409 }
 410
 411 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 412 {
 413     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 414         return true;
 415     }
 416
 417     return (1ul << shift) <= rampgsize;
 418 }
 419
 420 static long max_cpu_page_size;
 421
 422 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 423 {
 424     static struct kvm_ppc_smmu_info smmu_info;
 425     static bool has_smmu_info;
 426     CPUPPCState *env = &cpu->env;
 427     int iq, ik, jq, jk;
 428
 429     /* We only handle page sizes for 64-bit server guests for now */
 430     if (!(env->mmu_model & POWERPC_MMU_64)) {
 431         return;
 432     }
 433
 434     /* Collect MMU info from kernel if not already */
 435     if (!has_smmu_info) {
 436         kvm_get_smmu_info(cpu, &smmu_info);
 437         has_smmu_info = true;
 438     }
 439
 440     if (!max_cpu_page_size) {
 441         max_cpu_page_size = qemu_getrampagesize();
 442     }
 443
 444     /* Convert to QEMU form */
 445     memset(cpu->hash64_opts->sps, 0, sizeof(*cpu->hash64_opts->sps));
 446
 447     /* If we have HV KVM, we need to forbid CI large pages if our
 448      * host page size is smaller than 64K.
 449      */
 450     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 451         if (getpagesize() >= 0x10000) {
 452             cpu->hash64_opts->flags |= PPC_HASH64_CI_LARGEPAGE;
 453         } else {
 454             cpu->hash64_opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
 455         }
 456     }
 457
 458     /*
 459      * XXX This loop should be an entry wide AND of the capabilities that
 460      *     the selected CPU has with the capabilities that KVM supports.
 461      */
 462     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 463         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
 464         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 465
 466         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 467                                  ksps->page_shift)) {
 468             continue;
 469         }
 470         qsps->page_shift = ksps->page_shift;
 471         qsps->slb_enc = ksps->slb_enc;
 472         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 473             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 474                                      ksps->enc[jk].page_shift)) {
 475                 continue;
 476             }
 477             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 478             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 479             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 480                 break;
 481             }
 482         }
 483         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 484             break;
 485         }
 486     }
 487     cpu->hash64_opts->slb_size = smmu_info.slb_size;
 488     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 489         cpu->hash64_opts->flags &= ~PPC_HASH64_1TSEG;
 490     }
 491 }
 492
 493 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 494 {
 495     Object *mem_obj = object_resolve_path(obj_path, NULL);
 496     long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj));
 497
 498     return pagesize >= max_cpu_page_size;
 499 }
 500
 501 #else /* defined (TARGET_PPC64) */
 502
 503 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 504 {
 505 }
 506
 507 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 508 {
 509     return true;
 510 }
 511
 512 #endif /* !defined (TARGET_PPC64) */
 513
 514 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 515 {
 516     return POWERPC_CPU(cpu)->vcpu_id;
 517 }
 518
 519 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 520  * book3s supports only 1 watchpoint, so array size
 521  * of 4 is sufficient for now.
 522  */
 523 #define MAX_HW_BKPTS 4
 524
 525 static struct HWBreakpoint {
 526     target_ulong addr;
 527     int type;
 528 } hw_debug_points[MAX_HW_BKPTS];
 529
 530 static CPUWatchpoint hw_watchpoint;
 531
 532 /* Default there is no breakpoint and watchpoint supported */
 533 static int max_hw_breakpoint;
 534 static int max_hw_watchpoint;
 535 static int nb_hw_breakpoint;
 536 static int nb_hw_watchpoint;
 537
 538 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 539 {
 540     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 541         max_hw_breakpoint = 2;
 542         max_hw_watchpoint = 2;
 543     }
 544
 545     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 546         fprintf(stderr, "Error initializing h/w breakpoints\n");
 547         return;
 548     }
 549 }
 550
 551 int kvm_arch_init_vcpu(CPUState *cs)
 552 {
 553     PowerPCCPU *cpu = POWERPC_CPU(cs);
 554     CPUPPCState *cenv = &cpu->env;
 555     int ret;
 556
 557     /* Gather server mmu info from KVM and update the CPU state */
 558     kvm_fixup_page_sizes(cpu);
 559
 560     /* Synchronize sregs with kvm */
 561     ret = kvm_arch_sync_sregs(cpu);
 562     if (ret) {
 563         if (ret == -EINVAL) {
 564             error_report("Register sync failed... If you're using kvm-hv.ko,"
 565                          " only \"-cpu host\" is possible");
 566         }
 567         return ret;
 568     }
 569
 570     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 571
 572     switch (cenv->mmu_model) {
 573     case POWERPC_MMU_BOOKE206:
 574         /* This target supports access to KVM's guest TLB */
 575         ret = kvm_booke206_tlb_init(cpu);
 576         break;
 577     case POWERPC_MMU_2_07:
 578         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 579             /* KVM-HV has transactional memory on POWER8 also without the
 580              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 581              * long as it's availble to userspace on the host. */
 582             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 583                 cap_htm = true;
 584             }
 585         }
 586         break;
 587     default:
 588         break;
 589     }
 590
 591     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 592     kvmppc_hw_debug_points_init(cenv);
 593
 594     return ret;
 595 }
 596
 597 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 598 {
 599     CPUPPCState *env = &cpu->env;
 600     CPUState *cs = CPU(cpu);
 601     struct kvm_dirty_tlb dirty_tlb;
 602     unsigned char *bitmap;
 603     int ret;
 604
 605     if (!env->kvm_sw_tlb) {
 606         return;
 607     }
 608
 609     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 610     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 611
 612     dirty_tlb.bitmap = (uintptr_t)bitmap;
 613     dirty_tlb.num_dirty = env->nb_tlb;
 614
 615     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 616     if (ret) {
 617         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 618                 __func__, strerror(-ret));
 619     }
 620
 621     g_free(bitmap);
 622 }
 623
 624 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 625 {
 626     PowerPCCPU *cpu = POWERPC_CPU(cs);
 627     CPUPPCState *env = &cpu->env;
 628     union {
 629         uint32_t u32;
 630         uint64_t u64;
 631     } val;
 632     struct kvm_one_reg reg = {
 633         .id = id,
 634         .addr = (uintptr_t) &val,
 635     };
 636     int ret;
 637
 638     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 639     if (ret != 0) {
 640         trace_kvm_failed_spr_get(spr, strerror(errno));
 641     } else {
 642         switch (id & KVM_REG_SIZE_MASK) {
 643         case KVM_REG_SIZE_U32:
 644             env->spr[spr] = val.u32;
 645             break;
 646
 647         case KVM_REG_SIZE_U64:
 648             env->spr[spr] = val.u64;
 649             break;
 650
 651         default:
 652             /* Don't handle this size yet */
 653             abort();
 654         }
 655     }
 656 }
 657
 658 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 659 {
 660     PowerPCCPU *cpu = POWERPC_CPU(cs);
 661     CPUPPCState *env = &cpu->env;
 662     union {
 663         uint32_t u32;
 664         uint64_t u64;
 665     } val;
 666     struct kvm_one_reg reg = {
 667         .id = id,
 668         .addr = (uintptr_t) &val,
 669     };
 670     int ret;
 671
 672     switch (id & KVM_REG_SIZE_MASK) {
 673     case KVM_REG_SIZE_U32:
 674         val.u32 = env->spr[spr];
 675         break;
 676
 677     case KVM_REG_SIZE_U64:
 678         val.u64 = env->spr[spr];
 679         break;
 680
 681     default:
 682         /* Don't handle this size yet */
 683         abort();
 684     }
 685
 686     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 687     if (ret != 0) {
 688         trace_kvm_failed_spr_set(spr, strerror(errno));
 689     }
 690 }
 691
 692 static int kvm_put_fp(CPUState *cs)
 693 {
 694     PowerPCCPU *cpu = POWERPC_CPU(cs);
 695     CPUPPCState *env = &cpu->env;
 696     struct kvm_one_reg reg;
 697     int i;
 698     int ret;
 699
 700     if (env->insns_flags & PPC_FLOAT) {
 701         uint64_t fpscr = env->fpscr;
 702         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 703
 704         reg.id = KVM_REG_PPC_FPSCR;
 705         reg.addr = (uintptr_t)&fpscr;
 706         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 707         if (ret < 0) {
 708             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 709             return ret;
 710         }
 711
 712         for (i = 0; i < 32; i++) {
 713             uint64_t vsr[2];
 714
 715 #ifdef HOST_WORDS_BIGENDIAN
 716             vsr[0] = float64_val(env->fpr[i]);
 717             vsr[1] = env->vsr[i];
 718 #else
 719             vsr[0] = env->vsr[i];
 720             vsr[1] = float64_val(env->fpr[i]);
 721 #endif
 722             reg.addr = (uintptr_t) &vsr;
 723             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 724
 725             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 726             if (ret < 0) {
 727                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 728                         i, strerror(errno));
 729                 return ret;
 730             }
 731         }
 732     }
 733
 734     if (env->insns_flags & PPC_ALTIVEC) {
 735         reg.id = KVM_REG_PPC_VSCR;
 736         reg.addr = (uintptr_t)&env->vscr;
 737         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 738         if (ret < 0) {
 739             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 740             return ret;
 741         }
 742
 743         for (i = 0; i < 32; i++) {
 744             reg.id = KVM_REG_PPC_VR(i);
 745             reg.addr = (uintptr_t)&env->avr[i];
 746             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 747             if (ret < 0) {
 748                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 749                 return ret;
 750             }
 751         }
 752     }
 753
 754     return 0;
 755 }
 756
 757 static int kvm_get_fp(CPUState *cs)
 758 {
 759     PowerPCCPU *cpu = POWERPC_CPU(cs);
 760     CPUPPCState *env = &cpu->env;
 761     struct kvm_one_reg reg;
 762     int i;
 763     int ret;
 764
 765     if (env->insns_flags & PPC_FLOAT) {
 766         uint64_t fpscr;
 767         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 768
 769         reg.id = KVM_REG_PPC_FPSCR;
 770         reg.addr = (uintptr_t)&fpscr;
 771         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 772         if (ret < 0) {
 773             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 774             return ret;
 775         } else {
 776             env->fpscr = fpscr;
 777         }
 778
 779         for (i = 0; i < 32; i++) {
 780             uint64_t vsr[2];
 781
 782             reg.addr = (uintptr_t) &vsr;
 783             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 784
 785             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 786             if (ret < 0) {
 787                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 788                         vsx ? "VSR" : "FPR", i, strerror(errno));
 789                 return ret;
 790             } else {
 791 #ifdef HOST_WORDS_BIGENDIAN
 792                 env->fpr[i] = vsr[0];
 793                 if (vsx) {
 794                     env->vsr[i] = vsr[1];
 795                 }
 796 #else
 797                 env->fpr[i] = vsr[1];
 798                 if (vsx) {
 799                     env->vsr[i] = vsr[0];
 800                 }
 801 #endif
 802             }
 803         }
 804     }
 805
 806     if (env->insns_flags & PPC_ALTIVEC) {
 807         reg.id = KVM_REG_PPC_VSCR;
 808         reg.addr = (uintptr_t)&env->vscr;
 809         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 810         if (ret < 0) {
 811             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 812             return ret;
 813         }
 814
 815         for (i = 0; i < 32; i++) {
 816             reg.id = KVM_REG_PPC_VR(i);
 817             reg.addr = (uintptr_t)&env->avr[i];
 818             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 819             if (ret < 0) {
 820                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 821                         i, strerror(errno));
 822                 return ret;
 823             }
 824         }
 825     }
 826
 827     return 0;
 828 }
 829
 830 #if defined(TARGET_PPC64)
 831 static int kvm_get_vpa(CPUState *cs)
 832 {
 833     PowerPCCPU *cpu = POWERPC_CPU(cs);
 834     CPUPPCState *env = &cpu->env;
 835     struct kvm_one_reg reg;
 836     int ret;
 837
 838     reg.id = KVM_REG_PPC_VPA_ADDR;
 839     reg.addr = (uintptr_t)&env->vpa_addr;
 840     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 841     if (ret < 0) {
 842         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 843         return ret;
 844     }
 845
 846     assert((uintptr_t)&env->slb_shadow_size
 847            == ((uintptr_t)&env->slb_shadow_addr + 8));
 848     reg.id = KVM_REG_PPC_VPA_SLB;
 849     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 850     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 851     if (ret < 0) {
 852         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 853                 strerror(errno));
 854         return ret;
 855     }
 856
 857     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 858     reg.id = KVM_REG_PPC_VPA_DTL;
 859     reg.addr = (uintptr_t)&env->dtl_addr;
 860     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 861     if (ret < 0) {
 862         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 863                 strerror(errno));
 864         return ret;
 865     }
 866
 867     return 0;
 868 }
 869
 870 static int kvm_put_vpa(CPUState *cs)
 871 {
 872     PowerPCCPU *cpu = POWERPC_CPU(cs);
 873     CPUPPCState *env = &cpu->env;
 874     struct kvm_one_reg reg;
 875     int ret;
 876
 877     /* SLB shadow or DTL can't be registered unless a master VPA is
 878      * registered.  That means when restoring state, if a VPA *is*
 879      * registered, we need to set that up first.  If not, we need to
 880      * deregister the others before deregistering the master VPA */
 881     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 882
 883     if (env->vpa_addr) {
 884         reg.id = KVM_REG_PPC_VPA_ADDR;
 885         reg.addr = (uintptr_t)&env->vpa_addr;
 886         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 887         if (ret < 0) {
 888             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 889             return ret;
 890         }
 891     }
 892
 893     assert((uintptr_t)&env->slb_shadow_size
 894            == ((uintptr_t)&env->slb_shadow_addr + 8));
 895     reg.id = KVM_REG_PPC_VPA_SLB;
 896     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 897     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 898     if (ret < 0) {
 899         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 900         return ret;
 901     }
 902
 903     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 904     reg.id = KVM_REG_PPC_VPA_DTL;
 905     reg.addr = (uintptr_t)&env->dtl_addr;
 906     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 907     if (ret < 0) {
 908         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 909                 strerror(errno));
 910         return ret;
 911     }
 912
 913     if (!env->vpa_addr) {
 914         reg.id = KVM_REG_PPC_VPA_ADDR;
 915         reg.addr = (uintptr_t)&env->vpa_addr;
 916         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 917         if (ret < 0) {
 918             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 919             return ret;
 920         }
 921     }
 922
 923     return 0;
 924 }
 925 #endif /* TARGET_PPC64 */
 926
 927 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 928 {
 929     CPUPPCState *env = &cpu->env;
 930     struct kvm_sregs sregs;
 931     int i;
 932
 933     sregs.pvr = env->spr[SPR_PVR];
 934
 935     if (cpu->vhyp) {
 936         PPCVirtualHypervisorClass *vhc =
 937             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 938         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 939     } else {
 940         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 941     }
 942
 943     /* Sync SLB */
 944 #ifdef TARGET_PPC64
 945     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 946         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 947         if (env->slb[i].esid & SLB_ESID_V) {
 948             sregs.u.s.ppc64.slb[i].slbe |= i;
 949         }
 950         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 951     }
 952 #endif
 953
 954     /* Sync SRs */
 955     for (i = 0; i < 16; i++) {
 956         sregs.u.s.ppc32.sr[i] = env->sr[i];
 957     }
 958
 959     /* Sync BATs */
 960     for (i = 0; i < 8; i++) {
 961         /* Beware. We have to swap upper and lower bits here */
 962         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 963             | env->DBAT[1][i];
 964         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 965             | env->IBAT[1][i];
 966     }
 967
 968     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 969 }
 970
 971 int kvm_arch_put_registers(CPUState *cs, int level)
 972 {
 973     PowerPCCPU *cpu = POWERPC_CPU(cs);
 974     CPUPPCState *env = &cpu->env;
 975     struct kvm_regs regs;
 976     int ret;
 977     int i;
 978
 979     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 980     if (ret < 0) {
 981         return ret;
 982     }
 983
 984     regs.ctr = env->ctr;
 985     regs.lr  = env->lr;
 986     regs.xer = cpu_read_xer(env);
 987     regs.msr = env->msr;
 988     regs.pc = env->nip;
 989
 990     regs.srr0 = env->spr[SPR_SRR0];
 991     regs.srr1 = env->spr[SPR_SRR1];
 992
 993     regs.sprg0 = env->spr[SPR_SPRG0];
 994     regs.sprg1 = env->spr[SPR_SPRG1];
 995     regs.sprg2 = env->spr[SPR_SPRG2];
 996     regs.sprg3 = env->spr[SPR_SPRG3];
 997     regs.sprg4 = env->spr[SPR_SPRG4];
 998     regs.sprg5 = env->spr[SPR_SPRG5];
 999     regs.sprg6 = env->spr[SPR_SPRG6];
1000     regs.sprg7 = env->spr[SPR_SPRG7];
1001
1002     regs.pid = env->spr[SPR_BOOKE_PID];
1003
1004     for (i = 0;i < 32; i++)
1005         regs.gpr[i] = env->gpr[i];
1006
1007     regs.cr = 0;
1008     for (i = 0; i < 8; i++) {
1009         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1010     }
1011
1012     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1013     if (ret < 0)
1014         return ret;
1015
1016     kvm_put_fp(cs);
1017
1018     if (env->tlb_dirty) {
1019         kvm_sw_tlb_put(cpu);
1020         env->tlb_dirty = false;
1021     }
1022
1023     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1024         ret = kvmppc_put_books_sregs(cpu);
1025         if (ret < 0) {
1026             return ret;
1027         }
1028     }
1029
1030     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1031         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1032     }
1033
1034     if (cap_one_reg) {
1035         int i;
1036
1037         /* We deliberately ignore errors here, for kernels which have
1038          * the ONE_REG calls, but don't support the specific
1039          * registers, there's a reasonable chance things will still
1040          * work, at least until we try to migrate. */
1041         for (i = 0; i < 1024; i++) {
1042             uint64_t id = env->spr_cb[i].one_reg_id;
1043
1044             if (id != 0) {
1045                 kvm_put_one_spr(cs, id, i);
1046             }
1047         }
1048
1049 #ifdef TARGET_PPC64
1050         if (msr_ts) {
1051             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1052                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1053             }
1054             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1055                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1056             }
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1065             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1066             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1067         }
1068
1069         if (cap_papr) {
1070             if (kvm_put_vpa(cs) < 0) {
1071                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1072             }
1073         }
1074
1075         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1076 #endif /* TARGET_PPC64 */
1077     }
1078
1079     return ret;
1080 }
1081
1082 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1083 {
1084      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1085 }
1086
1087 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1088 {
1089     CPUPPCState *env = &cpu->env;
1090     struct kvm_sregs sregs;
1091     int ret;
1092
1093     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1094     if (ret < 0) {
1095         return ret;
1096     }
1097
1098     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1099         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1100         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1101         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1102         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1103         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1104         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1105         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1106         env->spr[SPR_DECR] = sregs.u.e.dec;
1107         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1108         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1109         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1110     }
1111
1112     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1113         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1114         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1115         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1116         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1117         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1118     }
1119
1120     if (sregs.u.e.features & KVM_SREGS_E_64) {
1121         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1122     }
1123
1124     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1125         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1126     }
1127
1128     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1129         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1130         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1131         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1132         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1133         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1134         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1135         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1136         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1137         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1138         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1139         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1140         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1141         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1142         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1143         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1144         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1145         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1146         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1147         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1148         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1149         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1150         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1151         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1152         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1153         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1154         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1155         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1156         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1157         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1158         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1159         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1160         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1161
1162         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1163             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1164             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1165             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1166             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1167             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1168             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1169         }
1170
1171         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1172             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1173             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1174         }
1175
1176         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1177             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1178             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1179             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1180             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1181         }
1182     }
1183
1184     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1185         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1186         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1187         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1188         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1189         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1190         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1191         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1192         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1193         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1194         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1195     }
1196
1197     if (sregs.u.e.features & KVM_SREGS_EXP) {
1198         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1199     }
1200
1201     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1202         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1203         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1204     }
1205
1206     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1207         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1208         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1209         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1210
1211         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1212             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1213             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1214         }
1215     }
1216
1217     return 0;
1218 }
1219
1220 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1221 {
1222     CPUPPCState *env = &cpu->env;
1223     struct kvm_sregs sregs;
1224     int ret;
1225     int i;
1226
1227     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1228     if (ret < 0) {
1229         return ret;
1230     }
1231
1232     if (!cpu->vhyp) {
1233         ppc_store_sdr1(env, sregs.u.s.sdr1);
1234     }
1235
1236     /* Sync SLB */
1237 #ifdef TARGET_PPC64
1238     /*
1239      * The packed SLB array we get from KVM_GET_SREGS only contains
1240      * information about valid entries. So we flush our internal copy
1241      * to get rid of stale ones, then put all valid SLB entries back
1242      * in.
1243      */
1244     memset(env->slb, 0, sizeof(env->slb));
1245     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1246         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1247         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1248         /*
1249          * Only restore valid entries
1250          */
1251         if (rb & SLB_ESID_V) {
1252             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1253         }
1254     }
1255 #endif
1256
1257     /* Sync SRs */
1258     for (i = 0; i < 16; i++) {
1259         env->sr[i] = sregs.u.s.ppc32.sr[i];
1260     }
1261
1262     /* Sync BATs */
1263     for (i = 0; i < 8; i++) {
1264         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1265         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1266         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1267         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1268     }
1269
1270     return 0;
1271 }
1272
1273 int kvm_arch_get_registers(CPUState *cs)
1274 {
1275     PowerPCCPU *cpu = POWERPC_CPU(cs);
1276     CPUPPCState *env = &cpu->env;
1277     struct kvm_regs regs;
1278     uint32_t cr;
1279     int i, ret;
1280
1281     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1282     if (ret < 0)
1283         return ret;
1284
1285     cr = regs.cr;
1286     for (i = 7; i >= 0; i--) {
1287         env->crf[i] = cr & 15;
1288         cr >>= 4;
1289     }
1290
1291     env->ctr = regs.ctr;
1292     env->lr = regs.lr;
1293     cpu_write_xer(env, regs.xer);
1294     env->msr = regs.msr;
1295     env->nip = regs.pc;
1296
1297     env->spr[SPR_SRR0] = regs.srr0;
1298     env->spr[SPR_SRR1] = regs.srr1;
1299
1300     env->spr[SPR_SPRG0] = regs.sprg0;
1301     env->spr[SPR_SPRG1] = regs.sprg1;
1302     env->spr[SPR_SPRG2] = regs.sprg2;
1303     env->spr[SPR_SPRG3] = regs.sprg3;
1304     env->spr[SPR_SPRG4] = regs.sprg4;
1305     env->spr[SPR_SPRG5] = regs.sprg5;
1306     env->spr[SPR_SPRG6] = regs.sprg6;
1307     env->spr[SPR_SPRG7] = regs.sprg7;
1308
1309     env->spr[SPR_BOOKE_PID] = regs.pid;
1310
1311     for (i = 0;i < 32; i++)
1312         env->gpr[i] = regs.gpr[i];
1313
1314     kvm_get_fp(cs);
1315
1316     if (cap_booke_sregs) {
1317         ret = kvmppc_get_booke_sregs(cpu);
1318         if (ret < 0) {
1319             return ret;
1320         }
1321     }
1322
1323     if (cap_segstate) {
1324         ret = kvmppc_get_books_sregs(cpu);
1325         if (ret < 0) {
1326             return ret;
1327         }
1328     }
1329
1330     if (cap_hior) {
1331         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1332     }
1333
1334     if (cap_one_reg) {
1335         int i;
1336
1337         /* We deliberately ignore errors here, for kernels which have
1338          * the ONE_REG calls, but don't support the specific
1339          * registers, there's a reasonable chance things will still
1340          * work, at least until we try to migrate. */
1341         for (i = 0; i < 1024; i++) {
1342             uint64_t id = env->spr_cb[i].one_reg_id;
1343
1344             if (id != 0) {
1345                 kvm_get_one_spr(cs, id, i);
1346             }
1347         }
1348
1349 #ifdef TARGET_PPC64
1350         if (msr_ts) {
1351             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1352                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1353             }
1354             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1355                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1356             }
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1365             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1366             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1367         }
1368
1369         if (cap_papr) {
1370             if (kvm_get_vpa(cs) < 0) {
1371                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1372             }
1373         }
1374
1375         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1376 #endif
1377     }
1378
1379     return 0;
1380 }
1381
1382 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1383 {
1384     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1385
1386     if (irq != PPC_INTERRUPT_EXT) {
1387         return 0;
1388     }
1389
1390     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1391         return 0;
1392     }
1393
1394     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1395
1396     return 0;
1397 }
1398
1399 #if defined(TARGET_PPCEMB)
1400 #define PPC_INPUT_INT PPC40x_INPUT_INT
1401 #elif defined(TARGET_PPC64)
1402 #define PPC_INPUT_INT PPC970_INPUT_INT
1403 #else
1404 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1405 #endif
1406
1407 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1408 {
1409     PowerPCCPU *cpu = POWERPC_CPU(cs);
1410     CPUPPCState *env = &cpu->env;
1411     int r;
1412     unsigned irq;
1413
1414     qemu_mutex_lock_iothread();
1415
1416     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1417      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1418     if (!cap_interrupt_level &&
1419         run->ready_for_interrupt_injection &&
1420         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1421         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1422     {
1423         /* For now KVM disregards the 'irq' argument. However, in the
1424          * future KVM could cache it in-kernel to avoid a heavyweight exit
1425          * when reading the UIC.
1426          */
1427         irq = KVM_INTERRUPT_SET;
1428
1429         DPRINTF("injected interrupt %d\n", irq);
1430         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1431         if (r < 0) {
1432             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1433         }
1434
1435         /* Always wake up soon in case the interrupt was level based */
1436         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1437                        (NANOSECONDS_PER_SECOND / 50));
1438     }
1439
1440     /* We don't know if there are more interrupts pending after this. However,
1441      * the guest will return to userspace in the course of handling this one
1442      * anyways, so we will get a chance to deliver the rest. */
1443
1444     qemu_mutex_unlock_iothread();
1445 }
1446
1447 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1448 {
1449     return MEMTXATTRS_UNSPECIFIED;
1450 }
1451
1452 int kvm_arch_process_async_events(CPUState *cs)
1453 {
1454     return cs->halted;
1455 }
1456
1457 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1458 {
1459     CPUState *cs = CPU(cpu);
1460     CPUPPCState *env = &cpu->env;
1461
1462     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1463         cs->halted = 1;
1464         cs->exception_index = EXCP_HLT;
1465     }
1466
1467     return 0;
1468 }
1469
1470 /* map dcr access to existing qemu dcr emulation */
1471 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1472 {
1473     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1474         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1475
1476     return 0;
1477 }
1478
1479 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1480 {
1481     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1482         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1483
1484     return 0;
1485 }
1486
1487 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1488 {
1489     /* Mixed endian case is not handled */
1490     uint32_t sc = debug_inst_opcode;
1491
1492     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1493                             sizeof(sc), 0) ||
1494         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1495         return -EINVAL;
1496     }
1497
1498     return 0;
1499 }
1500
1501 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1502 {
1503     uint32_t sc;
1504
1505     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1506         sc != debug_inst_opcode ||
1507         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1508                             sizeof(sc), 1)) {
1509         return -EINVAL;
1510     }
1511
1512     return 0;
1513 }
1514
1515 static int find_hw_breakpoint(target_ulong addr, int type)
1516 {
1517     int n;
1518
1519     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1520            <= ARRAY_SIZE(hw_debug_points));
1521
1522     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1523         if (hw_debug_points[n].addr == addr &&
1524              hw_debug_points[n].type == type) {
1525             return n;
1526         }
1527     }
1528
1529     return -1;
1530 }
1531
1532 static int find_hw_watchpoint(target_ulong addr, int *flag)
1533 {
1534     int n;
1535
1536     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1537     if (n >= 0) {
1538         *flag = BP_MEM_ACCESS;
1539         return n;
1540     }
1541
1542     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1543     if (n >= 0) {
1544         *flag = BP_MEM_WRITE;
1545         return n;
1546     }
1547
1548     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1549     if (n >= 0) {
1550         *flag = BP_MEM_READ;
1551         return n;
1552     }
1553
1554     return -1;
1555 }
1556
1557 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1558                                   target_ulong len, int type)
1559 {
1560     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1561         return -ENOBUFS;
1562     }
1563
1564     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1565     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1566
1567     switch (type) {
1568     case GDB_BREAKPOINT_HW:
1569         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1570             return -ENOBUFS;
1571         }
1572
1573         if (find_hw_breakpoint(addr, type) >= 0) {
1574             return -EEXIST;
1575         }
1576
1577         nb_hw_breakpoint++;
1578         break;
1579
1580     case GDB_WATCHPOINT_WRITE:
1581     case GDB_WATCHPOINT_READ:
1582     case GDB_WATCHPOINT_ACCESS:
1583         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1584             return -ENOBUFS;
1585         }
1586
1587         if (find_hw_breakpoint(addr, type) >= 0) {
1588             return -EEXIST;
1589         }
1590
1591         nb_hw_watchpoint++;
1592         break;
1593
1594     default:
1595         return -ENOSYS;
1596     }
1597
1598     return 0;
1599 }
1600
1601 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1602                                   target_ulong len, int type)
1603 {
1604     int n;
1605
1606     n = find_hw_breakpoint(addr, type);
1607     if (n < 0) {
1608         return -ENOENT;
1609     }
1610
1611     switch (type) {
1612     case GDB_BREAKPOINT_HW:
1613         nb_hw_breakpoint--;
1614         break;
1615
1616     case GDB_WATCHPOINT_WRITE:
1617     case GDB_WATCHPOINT_READ:
1618     case GDB_WATCHPOINT_ACCESS:
1619         nb_hw_watchpoint--;
1620         break;
1621
1622     default:
1623         return -ENOSYS;
1624     }
1625     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1626
1627     return 0;
1628 }
1629
1630 void kvm_arch_remove_all_hw_breakpoints(void)
1631 {
1632     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1633 }
1634
1635 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1636 {
1637     int n;
1638
1639     /* Software Breakpoint updates */
1640     if (kvm_sw_breakpoints_active(cs)) {
1641         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1642     }
1643
1644     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1645            <= ARRAY_SIZE(hw_debug_points));
1646     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1647
1648     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1649         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1650         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1651         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1652             switch (hw_debug_points[n].type) {
1653             case GDB_BREAKPOINT_HW:
1654                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1655                 break;
1656             case GDB_WATCHPOINT_WRITE:
1657                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1658                 break;
1659             case GDB_WATCHPOINT_READ:
1660                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1661                 break;
1662             case GDB_WATCHPOINT_ACCESS:
1663                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1664                                         KVMPPC_DEBUG_WATCH_READ;
1665                 break;
1666             default:
1667                 cpu_abort(cs, "Unsupported breakpoint type\n");
1668             }
1669             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1670         }
1671     }
1672 }
1673
1674 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1675 {
1676     CPUState *cs = CPU(cpu);
1677     CPUPPCState *env = &cpu->env;
1678     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1679     int handle = 0;
1680     int n;
1681     int flag = 0;
1682
1683     if (cs->singlestep_enabled) {
1684         handle = 1;
1685     } else if (arch_info->status) {
1686         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1687             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1688                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1689                 if (n >= 0) {
1690                     handle = 1;
1691                 }
1692             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1693                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1694                 n = find_hw_watchpoint(arch_info->address,  &flag);
1695                 if (n >= 0) {
1696                     handle = 1;
1697                     cs->watchpoint_hit = &hw_watchpoint;
1698                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1699                     hw_watchpoint.flags = flag;
1700                 }
1701             }
1702         }
1703     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1704         handle = 1;
1705     } else {
1706         /* QEMU is not able to handle debug exception, so inject
1707          * program exception to guest;
1708          * Yes program exception NOT debug exception !!
1709          * When QEMU is using debug resources then debug exception must
1710          * be always set. To achieve this we set MSR_DE and also set
1711          * MSRP_DEP so guest cannot change MSR_DE.
1712          * When emulating debug resource for guest we want guest
1713          * to control MSR_DE (enable/disable debug interrupt on need).
1714          * Supporting both configurations are NOT possible.
1715          * So the result is that we cannot share debug resources
1716          * between QEMU and Guest on BOOKE architecture.
1717          * In the current design QEMU gets the priority over guest,
1718          * this means that if QEMU is using debug resources then guest
1719          * cannot use them;
1720          * For software breakpoint QEMU uses a privileged instruction;
1721          * So there cannot be any reason that we are here for guest
1722          * set debug exception, only possibility is guest executed a
1723          * privileged / illegal instruction and that's why we are
1724          * injecting a program interrupt.
1725          */
1726
1727         cpu_synchronize_state(cs);
1728         /* env->nip is PC, so increment this by 4 to use
1729          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1730          */
1731         env->nip += 4;
1732         cs->exception_index = POWERPC_EXCP_PROGRAM;
1733         env->error_code = POWERPC_EXCP_INVAL;
1734         ppc_cpu_do_interrupt(cs);
1735     }
1736
1737     return handle;
1738 }
1739
1740 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1741 {
1742     PowerPCCPU *cpu = POWERPC_CPU(cs);
1743     CPUPPCState *env = &cpu->env;
1744     int ret;
1745
1746     qemu_mutex_lock_iothread();
1747
1748     switch (run->exit_reason) {
1749     case KVM_EXIT_DCR:
1750         if (run->dcr.is_write) {
1751             DPRINTF("handle dcr write\n");
1752             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1753         } else {
1754             DPRINTF("handle dcr read\n");
1755             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1756         }
1757         break;
1758     case KVM_EXIT_HLT:
1759         DPRINTF("handle halt\n");
1760         ret = kvmppc_handle_halt(cpu);
1761         break;
1762 #if defined(TARGET_PPC64)
1763     case KVM_EXIT_PAPR_HCALL:
1764         DPRINTF("handle PAPR hypercall\n");
1765         run->papr_hcall.ret = spapr_hypercall(cpu,
1766                                               run->papr_hcall.nr,
1767                                               run->papr_hcall.args);
1768         ret = 0;
1769         break;
1770 #endif
1771     case KVM_EXIT_EPR:
1772         DPRINTF("handle epr\n");
1773         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1774         ret = 0;
1775         break;
1776     case KVM_EXIT_WATCHDOG:
1777         DPRINTF("handle watchdog expiry\n");
1778         watchdog_perform_action();
1779         ret = 0;
1780         break;
1781
1782     case KVM_EXIT_DEBUG:
1783         DPRINTF("handle debug exception\n");
1784         if (kvm_handle_debug(cpu, run)) {
1785             ret = EXCP_DEBUG;
1786             break;
1787         }
1788         /* re-enter, this exception was guest-internal */
1789         ret = 0;
1790         break;
1791
1792     default:
1793         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1794         ret = -1;
1795         break;
1796     }
1797
1798     qemu_mutex_unlock_iothread();
1799     return ret;
1800 }
1801
1802 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1803 {
1804     CPUState *cs = CPU(cpu);
1805     uint32_t bits = tsr_bits;
1806     struct kvm_one_reg reg = {
1807         .id = KVM_REG_PPC_OR_TSR,
1808         .addr = (uintptr_t) &bits,
1809     };
1810
1811     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1812 }
1813
1814 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1815 {
1816
1817     CPUState *cs = CPU(cpu);
1818     uint32_t bits = tsr_bits;
1819     struct kvm_one_reg reg = {
1820         .id = KVM_REG_PPC_CLEAR_TSR,
1821         .addr = (uintptr_t) &bits,
1822     };
1823
1824     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1825 }
1826
1827 int kvmppc_set_tcr(PowerPCCPU *cpu)
1828 {
1829     CPUState *cs = CPU(cpu);
1830     CPUPPCState *env = &cpu->env;
1831     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1832
1833     struct kvm_one_reg reg = {
1834         .id = KVM_REG_PPC_TCR,
1835         .addr = (uintptr_t) &tcr,
1836     };
1837
1838     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1839 }
1840
1841 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1842 {
1843     CPUState *cs = CPU(cpu);
1844     int ret;
1845
1846     if (!kvm_enabled()) {
1847         return -1;
1848     }
1849
1850     if (!cap_ppc_watchdog) {
1851         printf("warning: KVM does not support watchdog");
1852         return -1;
1853     }
1854
1855     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1856     if (ret < 0) {
1857         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1858                 __func__, strerror(-ret));
1859         return ret;
1860     }
1861
1862     return ret;
1863 }
1864
1865 static int read_cpuinfo(const char *field, char *value, int len)
1866 {
1867     FILE *f;
1868     int ret = -1;
1869     int field_len = strlen(field);
1870     char line[512];
1871
1872     f = fopen("/proc/cpuinfo", "r");
1873     if (!f) {
1874         return -1;
1875     }
1876
1877     do {
1878         if (!fgets(line, sizeof(line), f)) {
1879             break;
1880         }
1881         if (!strncmp(line, field, field_len)) {
1882             pstrcpy(value, len, line);
1883             ret = 0;
1884             break;
1885         }
1886     } while(*line);
1887
1888     fclose(f);
1889
1890     return ret;
1891 }
1892
1893 uint32_t kvmppc_get_tbfreq(void)
1894 {
1895     char line[512];
1896     char *ns;
1897     uint32_t retval = NANOSECONDS_PER_SECOND;
1898
1899     if (read_cpuinfo("timebase", line, sizeof(line))) {
1900         return retval;
1901     }
1902
1903     if (!(ns = strchr(line, ':'))) {
1904         return retval;
1905     }
1906
1907     ns++;
1908
1909     return atoi(ns);
1910 }
1911
1912 bool kvmppc_get_host_serial(char **value)
1913 {
1914     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1915                                NULL);
1916 }
1917
1918 bool kvmppc_get_host_model(char **value)
1919 {
1920     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1921 }
1922
1923 /* Try to find a device tree node for a CPU with clock-frequency property */
1924 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1925 {
1926     struct dirent *dirp;
1927     DIR *dp;
1928
1929     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1930         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1931         return -1;
1932     }
1933
1934     buf[0] = '\0';
1935     while ((dirp = readdir(dp)) != NULL) {
1936         FILE *f;
1937         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1938                  dirp->d_name);
1939         f = fopen(buf, "r");
1940         if (f) {
1941             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1942             fclose(f);
1943             break;
1944         }
1945         buf[0] = '\0';
1946     }
1947     closedir(dp);
1948     if (buf[0] == '\0') {
1949         printf("Unknown host!\n");
1950         return -1;
1951     }
1952
1953     return 0;
1954 }
1955
1956 static uint64_t kvmppc_read_int_dt(const char *filename)
1957 {
1958     union {
1959         uint32_t v32;
1960         uint64_t v64;
1961     } u;
1962     FILE *f;
1963     int len;
1964
1965     f = fopen(filename, "rb");
1966     if (!f) {
1967         return -1;
1968     }
1969
1970     len = fread(&u, 1, sizeof(u), f);
1971     fclose(f);
1972     switch (len) {
1973     case 4:
1974         /* property is a 32-bit quantity */
1975         return be32_to_cpu(u.v32);
1976     case 8:
1977         return be64_to_cpu(u.v64);
1978     }
1979
1980     return 0;
1981 }
1982
1983 /* Read a CPU node property from the host device tree that's a single
1984  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1985  * (can't find or open the property, or doesn't understand the
1986  * format) */
1987 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1988 {
1989     char buf[PATH_MAX], *tmp;
1990     uint64_t val;
1991
1992     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1993         return -1;
1994     }
1995
1996     tmp = g_strdup_printf("%s/%s", buf, propname);
1997     val = kvmppc_read_int_dt(tmp);
1998     g_free(tmp);
1999
2000     return val;
2001 }
2002
2003 uint64_t kvmppc_get_clockfreq(void)
2004 {
2005     return kvmppc_read_int_cpu_dt("clock-frequency");
2006 }
2007
2008 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2009  {
2010      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2011      CPUState *cs = CPU(cpu);
2012
2013     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2014         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2015         return 0;
2016     }
2017
2018     return 1;
2019 }
2020
2021 int kvmppc_get_hasidle(CPUPPCState *env)
2022 {
2023     struct kvm_ppc_pvinfo pvinfo;
2024
2025     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2026         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2027         return 1;
2028     }
2029
2030     return 0;
2031 }
2032
2033 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2034 {
2035     uint32_t *hc = (uint32_t*)buf;
2036     struct kvm_ppc_pvinfo pvinfo;
2037
2038     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2039         memcpy(buf, pvinfo.hcall, buf_len);
2040         return 0;
2041     }
2042
2043     /*
2044      * Fallback to always fail hypercalls regardless of endianness:
2045      *
2046      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2047      *     li r3, -1
2048      *     b .+8       (becomes nop in wrong endian)
2049      *     bswap32(li r3, -1)
2050      */
2051
2052     hc[0] = cpu_to_be32(0x08000048);
2053     hc[1] = cpu_to_be32(0x3860ffff);
2054     hc[2] = cpu_to_be32(0x48000008);
2055     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2056
2057     return 1;
2058 }
2059
2060 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2061 {
2062     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2063 }
2064
2065 void kvmppc_enable_logical_ci_hcalls(void)
2066 {
2067     /*
2068      * FIXME: it would be nice if we could detect the cases where
2069      * we're using a device which requires the in kernel
2070      * implementation of these hcalls, but the kernel lacks them and
2071      * produce a warning.
2072      */
2073     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2074     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2075 }
2076
2077 void kvmppc_enable_set_mode_hcall(void)
2078 {
2079     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2080 }
2081
2082 void kvmppc_enable_clear_ref_mod_hcalls(void)
2083 {
2084     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2085     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2086 }
2087
2088 void kvmppc_set_papr(PowerPCCPU *cpu)
2089 {
2090     CPUState *cs = CPU(cpu);
2091     int ret;
2092
2093     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2094     if (ret) {
2095         error_report("This vCPU type or KVM version does not support PAPR");
2096         exit(1);
2097     }
2098
2099     /* Update the capability flag so we sync the right information
2100      * with kvm */
2101     cap_papr = 1;
2102 }
2103
2104 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2105 {
2106     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2107 }
2108
2109 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2110 {
2111     CPUState *cs = CPU(cpu);
2112     int ret;
2113
2114     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2115     if (ret && mpic_proxy) {
2116         error_report("This KVM version does not support EPR");
2117         exit(1);
2118     }
2119 }
2120
2121 int kvmppc_smt_threads(void)
2122 {
2123     return cap_ppc_smt ? cap_ppc_smt : 1;
2124 }
2125
2126 int kvmppc_set_smt_threads(int smt)
2127 {
2128     int ret;
2129
2130     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2131     if (!ret) {
2132         cap_ppc_smt = smt;
2133     }
2134     return ret;
2135 }
2136
2137 void kvmppc_hint_smt_possible(Error **errp)
2138 {
2139     int i;
2140     GString *g;
2141     char *s;
2142
2143     assert(kvm_enabled());
2144     if (cap_ppc_smt_possible) {
2145         g = g_string_new("Available VSMT modes:");
2146         for (i = 63; i >= 0; i--) {
2147             if ((1UL << i) & cap_ppc_smt_possible) {
2148                 g_string_append_printf(g, " %lu", (1UL << i));
2149             }
2150         }
2151         s = g_string_free(g, false);
2152         error_append_hint(errp, "%s.\n", s);
2153         g_free(s);
2154     } else {
2155         error_append_hint(errp,
2156                           "This KVM seems to be too old to support VSMT.\n");
2157     }
2158 }
2159
2160
2161 #ifdef TARGET_PPC64
2162 off_t kvmppc_alloc_rma(void **rma)
2163 {
2164     off_t size;
2165     int fd;
2166     struct kvm_allocate_rma ret;
2167
2168     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2169      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2170      *                      not necessary on this hardware
2171      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2172      *
2173      * FIXME: We should allow the user to force contiguous RMA
2174      * allocation in the cap_ppc_rma==1 case.
2175      */
2176     if (cap_ppc_rma < 2) {
2177         return 0;
2178     }
2179
2180     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2181     if (fd < 0) {
2182         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2183                 strerror(errno));
2184         return -1;
2185     }
2186
2187     size = MIN(ret.rma_size, 256ul << 20);
2188
2189     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2190     if (*rma == MAP_FAILED) {
2191         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2192         return -1;
2193     };
2194
2195     return size;
2196 }
2197
2198 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2199 {
2200     struct kvm_ppc_smmu_info info;
2201     long rampagesize, best_page_shift;
2202     int i;
2203
2204     if (cap_ppc_rma >= 2) {
2205         return current_size;
2206     }
2207
2208     /* Find the largest hardware supported page size that's less than
2209      * or equal to the (logical) backing page size of guest RAM */
2210     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2211     rampagesize = qemu_getrampagesize();
2212     best_page_shift = 0;
2213
2214     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2215         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2216
2217         if (!sps->page_shift) {
2218             continue;
2219         }
2220
2221         if ((sps->page_shift > best_page_shift)
2222             && ((1UL << sps->page_shift) <= rampagesize)) {
2223             best_page_shift = sps->page_shift;
2224         }
2225     }
2226
2227     return MIN(current_size,
2228                1ULL << (best_page_shift + hash_shift - 7));
2229 }
2230 #endif
2231
2232 bool kvmppc_spapr_use_multitce(void)
2233 {
2234     return cap_spapr_multitce;
2235 }
2236
2237 int kvmppc_spapr_enable_inkernel_multitce(void)
2238 {
2239     int ret;
2240
2241     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2242                             H_PUT_TCE_INDIRECT, 1);
2243     if (!ret) {
2244         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2245                                 H_STUFF_TCE, 1);
2246     }
2247
2248     return ret;
2249 }
2250
2251 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2252                               uint64_t bus_offset, uint32_t nb_table,
2253                               int *pfd, bool need_vfio)
2254 {
2255     long len;
2256     int fd;
2257     void *table;
2258
2259     /* Must set fd to -1 so we don't try to munmap when called for
2260      * destroying the table, which the upper layers -will- do
2261      */
2262     *pfd = -1;
2263     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2264         return NULL;
2265     }
2266
2267     if (cap_spapr_tce_64) {
2268         struct kvm_create_spapr_tce_64 args = {
2269             .liobn = liobn,
2270             .page_shift = page_shift,
2271             .offset = bus_offset >> page_shift,
2272             .size = nb_table,
2273             .flags = 0
2274         };
2275         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2276         if (fd < 0) {
2277             fprintf(stderr,
2278                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2279                     liobn);
2280             return NULL;
2281         }
2282     } else if (cap_spapr_tce) {
2283         uint64_t window_size = (uint64_t) nb_table << page_shift;
2284         struct kvm_create_spapr_tce args = {
2285             .liobn = liobn,
2286             .window_size = window_size,
2287         };
2288         if ((window_size != args.window_size) || bus_offset) {
2289             return NULL;
2290         }
2291         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2292         if (fd < 0) {
2293             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2294                     liobn);
2295             return NULL;
2296         }
2297     } else {
2298         return NULL;
2299     }
2300
2301     len = nb_table * sizeof(uint64_t);
2302     /* FIXME: round this up to page size */
2303
2304     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2305     if (table == MAP_FAILED) {
2306         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2307                 liobn);
2308         close(fd);
2309         return NULL;
2310     }
2311
2312     *pfd = fd;
2313     return table;
2314 }
2315
2316 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2317 {
2318     long len;
2319
2320     if (fd < 0) {
2321         return -1;
2322     }
2323
2324     len = nb_table * sizeof(uint64_t);
2325     if ((munmap(table, len) < 0) ||
2326         (close(fd) < 0)) {
2327         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2328                 strerror(errno));
2329         /* Leak the table */
2330     }
2331
2332     return 0;
2333 }
2334
2335 int kvmppc_reset_htab(int shift_hint)
2336 {
2337     uint32_t shift = shift_hint;
2338
2339     if (!kvm_enabled()) {
2340         /* Full emulation, tell caller to allocate htab itself */
2341         return 0;
2342     }
2343     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2344         int ret;
2345         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2346         if (ret == -ENOTTY) {
2347             /* At least some versions of PR KVM advertise the
2348              * capability, but don't implement the ioctl().  Oops.
2349              * Return 0 so that we allocate the htab in qemu, as is
2350              * correct for PR. */
2351             return 0;
2352         } else if (ret < 0) {
2353             return ret;
2354         }
2355         return shift;
2356     }
2357
2358     /* We have a kernel that predates the htab reset calls.  For PR
2359      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2360      * this era, it has allocated a 16MB fixed size hash table already. */
2361     if (kvmppc_is_pr(kvm_state)) {
2362         /* PR - tell caller to allocate htab */
2363         return 0;
2364     } else {
2365         /* HV - assume 16MB kernel allocated htab */
2366         return 24;
2367     }
2368 }
2369
2370 static inline uint32_t mfpvr(void)
2371 {
2372     uint32_t pvr;
2373
2374     asm ("mfpvr %0"
2375          : "=r"(pvr));
2376     return pvr;
2377 }
2378
2379 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2380 {
2381     if (on) {
2382         *word |= flags;
2383     } else {
2384         *word &= ~flags;
2385     }
2386 }
2387
2388 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2389 {
2390     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2391     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2392     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2393
2394     /* Now fix up the class with information we can query from the host */
2395     pcc->pvr = mfpvr();
2396
2397     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2398                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2399     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2400                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2401     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2402                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2403
2404     if (dcache_size != -1) {
2405         pcc->l1_dcache_size = dcache_size;
2406     }
2407
2408     if (icache_size != -1) {
2409         pcc->l1_icache_size = icache_size;
2410     }
2411
2412 #if defined(TARGET_PPC64)
2413     pcc->radix_page_info = kvm_get_radix_page_info();
2414
2415     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2416         /*
2417          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2418          * compliant.  More importantly, advertising ISA 3.00
2419          * architected mode may prevent guests from activating
2420          * necessary DD1 workarounds.
2421          */
2422         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2423                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2424     }
2425 #endif /* defined(TARGET_PPC64) */
2426 }
2427
2428 bool kvmppc_has_cap_epr(void)
2429 {
2430     return cap_epr;
2431 }
2432
2433 bool kvmppc_has_cap_fixup_hcalls(void)
2434 {
2435     return cap_fixup_hcalls;
2436 }
2437
2438 bool kvmppc_has_cap_htm(void)
2439 {
2440     return cap_htm;
2441 }
2442
2443 bool kvmppc_has_cap_mmu_radix(void)
2444 {
2445     return cap_mmu_radix;
2446 }
2447
2448 bool kvmppc_has_cap_mmu_hash_v3(void)
2449 {
2450     return cap_mmu_hash_v3;
2451 }
2452
2453 static void kvmppc_get_cpu_characteristics(KVMState *s)
2454 {
2455     struct kvm_ppc_cpu_char c;
2456     int ret;
2457
2458     /* Assume broken */
2459     cap_ppc_safe_cache = 0;
2460     cap_ppc_safe_bounds_check = 0;
2461     cap_ppc_safe_indirect_branch = 0;
2462
2463     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2464     if (!ret) {
2465         return;
2466     }
2467     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2468     if (ret < 0) {
2469         return;
2470     }
2471     /* Parse and set cap_ppc_safe_cache */
2472     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2473         cap_ppc_safe_cache = 2;
2474     } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2475                (c.character & c.character_mask
2476                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2477         cap_ppc_safe_cache = 1;
2478     }
2479     /* Parse and set cap_ppc_safe_bounds_check */
2480     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2481         cap_ppc_safe_bounds_check = 2;
2482     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2483         cap_ppc_safe_bounds_check = 1;
2484     }
2485     /* Parse and set cap_ppc_safe_indirect_branch */
2486     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2487         cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_CCD;
2488     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2489         cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_IBS;
2490     }
2491 }
2492
2493 int kvmppc_get_cap_safe_cache(void)
2494 {
2495     return cap_ppc_safe_cache;
2496 }
2497
2498 int kvmppc_get_cap_safe_bounds_check(void)
2499 {
2500     return cap_ppc_safe_bounds_check;
2501 }
2502
2503 int kvmppc_get_cap_safe_indirect_branch(void)
2504 {
2505     return cap_ppc_safe_indirect_branch;
2506 }
2507
2508 bool kvmppc_has_cap_spapr_vfio(void)
2509 {
2510     return cap_spapr_vfio;
2511 }
2512
2513 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2514 {
2515     uint32_t host_pvr = mfpvr();
2516     PowerPCCPUClass *pvr_pcc;
2517
2518     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2519     if (pvr_pcc == NULL) {
2520         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2521     }
2522
2523     return pvr_pcc;
2524 }
2525
2526 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2527 {
2528     TypeInfo type_info = {
2529         .name = TYPE_HOST_POWERPC_CPU,
2530         .class_init = kvmppc_host_cpu_class_init,
2531     };
2532     MachineClass *mc = MACHINE_GET_CLASS(ms);
2533     PowerPCCPUClass *pvr_pcc;
2534     ObjectClass *oc;
2535     DeviceClass *dc;
2536     int i;
2537
2538     pvr_pcc = kvm_ppc_get_host_cpu_class();
2539     if (pvr_pcc == NULL) {
2540         return -1;
2541     }
2542     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2543     type_register(&type_info);
2544     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2545         /* override TCG default cpu type with 'host' cpu model */
2546         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2547     }
2548
2549     oc = object_class_by_name(type_info.name);
2550     g_assert(oc);
2551
2552     /*
2553      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2554      * we want "POWER8" to be a "family" alias that points to the current
2555      * host CPU type, too)
2556      */
2557     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2558     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2559         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2560             char *suffix;
2561
2562             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2563             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2564             if (suffix) {
2565                 *suffix = 0;
2566             }
2567             break;
2568         }
2569     }
2570
2571     return 0;
2572 }
2573
2574 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2575 {
2576     struct kvm_rtas_token_args args = {
2577         .token = token,
2578     };
2579
2580     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2581         return -ENOENT;
2582     }
2583
2584     strncpy(args.name, function, sizeof(args.name));
2585
2586     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2587 }
2588
2589 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2590 {
2591     struct kvm_get_htab_fd s = {
2592         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2593         .start_index = index,
2594     };
2595     int ret;
2596
2597     if (!cap_htab_fd) {
2598         error_setg(errp, "KVM version doesn't support %s the HPT",
2599                    write ? "writing" : "reading");
2600         return -ENOTSUP;
2601     }
2602
2603     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2604     if (ret < 0) {
2605         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2606                    write ? "writing" : "reading", write ? "to" : "from",
2607                    strerror(errno));
2608         return -errno;
2609     }
2610
2611     return ret;
2612 }
2613
2614 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2615 {
2616     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2617     uint8_t buf[bufsize];
2618     ssize_t rc;
2619
2620     do {
2621         rc = read(fd, buf, bufsize);
2622         if (rc < 0) {
2623             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2624                     strerror(errno));
2625             return rc;
2626         } else if (rc) {
2627             uint8_t *buffer = buf;
2628             ssize_t n = rc;
2629             while (n) {
2630                 struct kvm_get_htab_header *head =
2631                     (struct kvm_get_htab_header *) buffer;
2632                 size_t chunksize = sizeof(*head) +
2633                      HASH_PTE_SIZE_64 * head->n_valid;
2634
2635                 qemu_put_be32(f, head->index);
2636                 qemu_put_be16(f, head->n_valid);
2637                 qemu_put_be16(f, head->n_invalid);
2638                 qemu_put_buffer(f, (void *)(head + 1),
2639                                 HASH_PTE_SIZE_64 * head->n_valid);
2640
2641                 buffer += chunksize;
2642                 n -= chunksize;
2643             }
2644         }
2645     } while ((rc != 0)
2646              && ((max_ns < 0)
2647                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2648
2649     return (rc == 0) ? 1 : 0;
2650 }
2651
2652 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2653                            uint16_t n_valid, uint16_t n_invalid)
2654 {
2655     struct kvm_get_htab_header *buf;
2656     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2657     ssize_t rc;
2658
2659     buf = alloca(chunksize);
2660     buf->index = index;
2661     buf->n_valid = n_valid;
2662     buf->n_invalid = n_invalid;
2663
2664     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2665
2666     rc = write(fd, buf, chunksize);
2667     if (rc < 0) {
2668         fprintf(stderr, "Error writing KVM hash table: %s\n",
2669                 strerror(errno));
2670         return rc;
2671     }
2672     if (rc != chunksize) {
2673         /* We should never get a short write on a single chunk */
2674         fprintf(stderr, "Short write, restoring KVM hash table\n");
2675         return -1;
2676     }
2677     return 0;
2678 }
2679
2680 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2681 {
2682     return true;
2683 }
2684
2685 void kvm_arch_init_irq_routing(KVMState *s)
2686 {
2687 }
2688
2689 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2690 {
2691     int fd, rc;
2692     int i;
2693
2694     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2695
2696     i = 0;
2697     while (i < n) {
2698         struct kvm_get_htab_header *hdr;
2699         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2700         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2701
2702         rc = read(fd, buf, sizeof(buf));
2703         if (rc < 0) {
2704             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2705         }
2706
2707         hdr = (struct kvm_get_htab_header *)buf;
2708         while ((i < n) && ((char *)hdr < (buf + rc))) {
2709             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2710
2711             if (hdr->index != (ptex + i)) {
2712                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2713                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2714             }
2715
2716             if (n - i < valid) {
2717                 valid = n - i;
2718             }
2719             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2720             i += valid;
2721
2722             if ((n - i) < invalid) {
2723                 invalid = n - i;
2724             }
2725             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2726             i += invalid;
2727
2728             hdr = (struct kvm_get_htab_header *)
2729                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2730         }
2731     }
2732
2733     close(fd);
2734 }
2735
2736 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2737 {
2738     int fd, rc;
2739     struct {
2740         struct kvm_get_htab_header hdr;
2741         uint64_t pte0;
2742         uint64_t pte1;
2743     } buf;
2744
2745     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2746
2747     buf.hdr.n_valid = 1;
2748     buf.hdr.n_invalid = 0;
2749     buf.hdr.index = ptex;
2750     buf.pte0 = cpu_to_be64(pte0);
2751     buf.pte1 = cpu_to_be64(pte1);
2752
2753     rc = write(fd, &buf, sizeof(buf));
2754     if (rc != sizeof(buf)) {
2755         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2756     }
2757     close(fd);
2758 }
2759
2760 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2761                              uint64_t address, uint32_t data, PCIDevice *dev)
2762 {
2763     return 0;
2764 }
2765
2766 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2767                                 int vector, PCIDevice *dev)
2768 {
2769     return 0;
2770 }
2771
2772 int kvm_arch_release_virq_post(int virq)
2773 {
2774     return 0;
2775 }
2776
2777 int kvm_arch_msi_data_to_gsi(uint32_t data)
2778 {
2779     return data & 0xffff;
2780 }
2781
2782 int kvmppc_enable_hwrng(void)
2783 {
2784     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2785         return -1;
2786     }
2787
2788     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2789 }
2790
2791 void kvmppc_check_papr_resize_hpt(Error **errp)
2792 {
2793     if (!kvm_enabled()) {
2794         return; /* No KVM, we're good */
2795     }
2796
2797     if (cap_resize_hpt) {
2798         return; /* Kernel has explicit support, we're good */
2799     }
2800
2801     /* Otherwise fallback on looking for PR KVM */
2802     if (kvmppc_is_pr(kvm_state)) {
2803         return;
2804     }
2805
2806     error_setg(errp,
2807                "Hash page table resizing not available with this KVM version");
2808 }
2809
2810 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2811 {
2812     CPUState *cs = CPU(cpu);
2813     struct kvm_ppc_resize_hpt rhpt = {
2814         .flags = flags,
2815         .shift = shift,
2816     };
2817
2818     if (!cap_resize_hpt) {
2819         return -ENOSYS;
2820     }
2821
2822     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2823 }
2824
2825 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2826 {
2827     CPUState *cs = CPU(cpu);
2828     struct kvm_ppc_resize_hpt rhpt = {
2829         .flags = flags,
2830         .shift = shift,
2831     };
2832
2833     if (!cap_resize_hpt) {
2834         return -ENOSYS;
2835     }
2836
2837     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2838 }
2839
2840 /*
2841  * This is a helper function to detect a post migration scenario
2842  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2843  * the guest kernel can't handle a PVR value other than the actual host
2844  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2845  *
2846  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2847  * (so, we're HV), return true. The workaround itself is done in
2848  * cpu_post_load.
2849  *
2850  * The order here is important: we'll only check for KVM PR as a
2851  * fallback if the guest kernel can't handle the situation itself.
2852  * We need to avoid as much as possible querying the running KVM type
2853  * in QEMU level.
2854  */
2855 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2856 {
2857     CPUState *cs = CPU(cpu);
2858
2859     if (!kvm_enabled()) {
2860         return false;
2861     }
2862
2863     if (cap_ppc_pvr_compat) {
2864         return false;
2865     }
2866
2867     return !kvmppc_is_pr(cs->kvm_state);
2868 }