target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <[email protected]>
   9  *  Christian Ehrhardt <[email protected]>
  10  *  Hollis Blanchard <[email protected]>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/error-report.h"
  27 #include "qemu/timer.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/kvm.h"
  30 #include "kvm_ppc.h"
  31 #include "cpu.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/ppc.h"
  40 #include "sysemu/watchdog.h"
  41 #include "trace.h"
  42 #include "exec/gdbstub.h"
  43 #include "exec/memattrs.h"
  44 #include "sysemu/hostmem.h"
  45 #include "qemu/cutils.h"
  46
  47 //#define DEBUG_KVM
  48
  49 #ifdef DEBUG_KVM
  50 #define DPRINTF(fmt, ...) \
  51     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  52 #else
  53 #define DPRINTF(fmt, ...) \
  54     do { } while (0)
  55 #endif
  56
  57 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  58
  59 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  60     KVM_CAP_LAST_INFO
  61 };
  62
  63 static int cap_interrupt_unset = false;
  64 static int cap_interrupt_level = false;
  65 static int cap_segstate;
  66 static int cap_booke_sregs;
  67 static int cap_ppc_smt;
  68 static int cap_ppc_rma;
  69 static int cap_spapr_tce;
  70 static int cap_spapr_multitce;
  71 static int cap_spapr_vfio;
  72 static int cap_hior;
  73 static int cap_one_reg;
  74 static int cap_epr;
  75 static int cap_ppc_watchdog;
  76 static int cap_papr;
  77 static int cap_htab_fd;
  78 static int cap_fixup_hcalls;
  79
  80 static uint32_t debug_inst_opcode;
  81
  82 /* XXX We have a race condition where we actually have a level triggered
  83  *     interrupt, but the infrastructure can't expose that yet, so the guest
  84  *     takes but ignores it, goes to sleep and never gets notified that there's
  85  *     still an interrupt pending.
  86  *
  87  *     As a quick workaround, let's just wake up again 20 ms after we injected
  88  *     an interrupt. That way we can assure that we're always reinjecting
  89  *     interrupts in case the guest swallowed them.
  90  */
  91 static QEMUTimer *idle_timer;
  92
  93 static void kvm_kick_cpu(void *opaque)
  94 {
  95     PowerPCCPU *cpu = opaque;
  96
  97     qemu_cpu_kick(CPU(cpu));
  98 }
  99
 100 static int kvm_ppc_register_host_cpu_type(void);
 101
 102 int kvm_arch_init(MachineState *ms, KVMState *s)
 103 {
 104     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 105     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 106     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 107     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 108     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 109     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 110     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 111     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 112     cap_spapr_vfio = false;
 113     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 114     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 115     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 116     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 117     /* Note: we don't set cap_papr here, because this capability is
 118      * only activated after this by kvmppc_set_papr() */
 119     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 120     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 121
 122     if (!cap_interrupt_level) {
 123         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 124                         "VM to stall at times!\n");
 125     }
 126
 127     kvm_ppc_register_host_cpu_type();
 128
 129     return 0;
 130 }
 131
 132 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 133 {
 134     CPUPPCState *cenv = &cpu->env;
 135     CPUState *cs = CPU(cpu);
 136     struct kvm_sregs sregs;
 137     int ret;
 138
 139     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 140         /* What we're really trying to say is "if we're on BookE, we use
 141            the native PVR for now". This is the only sane way to check
 142            it though, so we potentially confuse users that they can run
 143            BookE guests on BookS. Let's hope nobody dares enough :) */
 144         return 0;
 145     } else {
 146         if (!cap_segstate) {
 147             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 148             return -ENOSYS;
 149         }
 150     }
 151
 152     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 153     if (ret) {
 154         return ret;
 155     }
 156
 157     sregs.pvr = cenv->spr[SPR_PVR];
 158     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 159 }
 160
 161 /* Set up a shared TLB array with KVM */
 162 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 163 {
 164     CPUPPCState *env = &cpu->env;
 165     CPUState *cs = CPU(cpu);
 166     struct kvm_book3e_206_tlb_params params = {};
 167     struct kvm_config_tlb cfg = {};
 168     unsigned int entries = 0;
 169     int ret, i;
 170
 171     if (!kvm_enabled() ||
 172         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 173         return 0;
 174     }
 175
 176     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 177
 178     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 179         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 180         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 181         entries += params.tlb_sizes[i];
 182     }
 183
 184     assert(entries == env->nb_tlb);
 185     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 186
 187     env->tlb_dirty = true;
 188
 189     cfg.array = (uintptr_t)env->tlb.tlbm;
 190     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 191     cfg.params = (uintptr_t)&params;
 192     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 193
 194     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 195     if (ret < 0) {
 196         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 197                 __func__, strerror(-ret));
 198         return ret;
 199     }
 200
 201     env->kvm_sw_tlb = true;
 202     return 0;
 203 }
 204
 205
 206 #if defined(TARGET_PPC64)
 207 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 208                                        struct kvm_ppc_smmu_info *info)
 209 {
 210     CPUPPCState *env = &cpu->env;
 211     CPUState *cs = CPU(cpu);
 212
 213     memset(info, 0, sizeof(*info));
 214
 215     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 216      * need to "guess" what the supported page sizes are.
 217      *
 218      * For that to work we make a few assumptions:
 219      *
 220      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 221      *   KVM which only supports 4K and 16M pages, but supports them
 222      *   regardless of the backing store characteritics. We also don't
 223      *   support 1T segments.
 224      *
 225      *   This is safe as if HV KVM ever supports that capability or PR
 226      *   KVM grows supports for more page/segment sizes, those versions
 227      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 228      *   will not hit this fallback
 229      *
 230      * - Else we are running HV KVM. This means we only support page
 231      *   sizes that fit in the backing store. Additionally we only
 232      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 233      *   P7 encodings for the SLB and hash table. Here too, we assume
 234      *   support for any newer processor will mean a kernel that
 235      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 236      *   this fallback.
 237      */
 238     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 239         /* No flags */
 240         info->flags = 0;
 241         info->slb_size = 64;
 242
 243         /* Standard 4k base page size segment */
 244         info->sps[0].page_shift = 12;
 245         info->sps[0].slb_enc = 0;
 246         info->sps[0].enc[0].page_shift = 12;
 247         info->sps[0].enc[0].pte_enc = 0;
 248
 249         /* Standard 16M large page size segment */
 250         info->sps[1].page_shift = 24;
 251         info->sps[1].slb_enc = SLB_VSID_L;
 252         info->sps[1].enc[0].page_shift = 24;
 253         info->sps[1].enc[0].pte_enc = 0;
 254     } else {
 255         int i = 0;
 256
 257         /* HV KVM has backing store size restrictions */
 258         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 259
 260         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 261             info->flags |= KVM_PPC_1T_SEGMENTS;
 262         }
 263
 264         if (env->mmu_model == POWERPC_MMU_2_06 ||
 265             env->mmu_model == POWERPC_MMU_2_07) {
 266             info->slb_size = 32;
 267         } else {
 268             info->slb_size = 64;
 269         }
 270
 271         /* Standard 4k base page size segment */
 272         info->sps[i].page_shift = 12;
 273         info->sps[i].slb_enc = 0;
 274         info->sps[i].enc[0].page_shift = 12;
 275         info->sps[i].enc[0].pte_enc = 0;
 276         i++;
 277
 278         /* 64K on MMU 2.06 and later */
 279         if (env->mmu_model == POWERPC_MMU_2_06 ||
 280             env->mmu_model == POWERPC_MMU_2_07) {
 281             info->sps[i].page_shift = 16;
 282             info->sps[i].slb_enc = 0x110;
 283             info->sps[i].enc[0].page_shift = 16;
 284             info->sps[i].enc[0].pte_enc = 1;
 285             i++;
 286         }
 287
 288         /* Standard 16M large page size segment */
 289         info->sps[i].page_shift = 24;
 290         info->sps[i].slb_enc = SLB_VSID_L;
 291         info->sps[i].enc[0].page_shift = 24;
 292         info->sps[i].enc[0].pte_enc = 0;
 293     }
 294 }
 295
 296 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 297 {
 298     CPUState *cs = CPU(cpu);
 299     int ret;
 300
 301     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 302         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 303         if (ret == 0) {
 304             return;
 305         }
 306     }
 307
 308     kvm_get_fallback_smmu_info(cpu, info);
 309 }
 310
 311 static long gethugepagesize(const char *mem_path)
 312 {
 313     struct statfs fs;
 314     int ret;
 315
 316     do {
 317         ret = statfs(mem_path, &fs);
 318     } while (ret != 0 && errno == EINTR);
 319
 320     if (ret != 0) {
 321         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 322                 strerror(errno));
 323         exit(1);
 324     }
 325
 326 #define HUGETLBFS_MAGIC       0x958458f6
 327
 328     if (fs.f_type != HUGETLBFS_MAGIC) {
 329         /* Explicit mempath, but it's ordinary pages */
 330         return getpagesize();
 331     }
 332
 333     /* It's hugepage, return the huge page size */
 334     return fs.f_bsize;
 335 }
 336
 337 static int find_max_supported_pagesize(Object *obj, void *opaque)
 338 {
 339     char *mem_path;
 340     long *hpsize_min = opaque;
 341
 342     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 343         mem_path = object_property_get_str(obj, "mem-path", NULL);
 344         if (mem_path) {
 345             long hpsize = gethugepagesize(mem_path);
 346             if (hpsize < *hpsize_min) {
 347                 *hpsize_min = hpsize;
 348             }
 349         } else {
 350             *hpsize_min = getpagesize();
 351         }
 352     }
 353
 354     return 0;
 355 }
 356
 357 static long getrampagesize(void)
 358 {
 359     long hpsize = LONG_MAX;
 360     Object *memdev_root;
 361
 362     if (mem_path) {
 363         return gethugepagesize(mem_path);
 364     }
 365
 366     /* it's possible we have memory-backend objects with
 367      * hugepage-backed RAM. these may get mapped into system
 368      * address space via -numa parameters or memory hotplug
 369      * hooks. we want to take these into account, but we
 370      * also want to make sure these supported hugepage
 371      * sizes are applicable across the entire range of memory
 372      * we may boot from, so we take the min across all
 373      * backends, and assume normal pages in cases where a
 374      * backend isn't backed by hugepages.
 375      */
 376     memdev_root = object_resolve_path("/objects", NULL);
 377     if (!memdev_root) {
 378         return getpagesize();
 379     }
 380
 381     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 382
 383     return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
 384 }
 385
 386 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 387 {
 388     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 389         return true;
 390     }
 391
 392     return (1ul << shift) <= rampgsize;
 393 }
 394
 395 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 396 {
 397     static struct kvm_ppc_smmu_info smmu_info;
 398     static bool has_smmu_info;
 399     CPUPPCState *env = &cpu->env;
 400     long rampagesize;
 401     int iq, ik, jq, jk;
 402
 403     /* We only handle page sizes for 64-bit server guests for now */
 404     if (!(env->mmu_model & POWERPC_MMU_64)) {
 405         return;
 406     }
 407
 408     /* Collect MMU info from kernel if not already */
 409     if (!has_smmu_info) {
 410         kvm_get_smmu_info(cpu, &smmu_info);
 411         has_smmu_info = true;
 412     }
 413
 414     rampagesize = getrampagesize();
 415
 416     /* Convert to QEMU form */
 417     memset(&env->sps, 0, sizeof(env->sps));
 418
 419     /* If we have HV KVM, we need to forbid CI large pages if our
 420      * host page size is smaller than 64K.
 421      */
 422     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 423         env->ci_large_pages = getpagesize() >= 0x10000;
 424     }
 425
 426     /*
 427      * XXX This loop should be an entry wide AND of the capabilities that
 428      *     the selected CPU has with the capabilities that KVM supports.
 429      */
 430     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 431         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 432         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 433
 434         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 435                                  ksps->page_shift)) {
 436             continue;
 437         }
 438         qsps->page_shift = ksps->page_shift;
 439         qsps->slb_enc = ksps->slb_enc;
 440         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 441             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 442                                      ksps->enc[jk].page_shift)) {
 443                 continue;
 444             }
 445             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 446             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 447             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 448                 break;
 449             }
 450         }
 451         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 452             break;
 453         }
 454     }
 455     env->slb_nr = smmu_info.slb_size;
 456     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 457         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 458     }
 459 }
 460 #else /* defined (TARGET_PPC64) */
 461
 462 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 463 {
 464 }
 465
 466 #endif /* !defined (TARGET_PPC64) */
 467
 468 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 469 {
 470     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 471 }
 472
 473 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 474  * book3s supports only 1 watchpoint, so array size
 475  * of 4 is sufficient for now.
 476  */
 477 #define MAX_HW_BKPTS 4
 478
 479 static struct HWBreakpoint {
 480     target_ulong addr;
 481     int type;
 482 } hw_debug_points[MAX_HW_BKPTS];
 483
 484 static CPUWatchpoint hw_watchpoint;
 485
 486 /* Default there is no breakpoint and watchpoint supported */
 487 static int max_hw_breakpoint;
 488 static int max_hw_watchpoint;
 489 static int nb_hw_breakpoint;
 490 static int nb_hw_watchpoint;
 491
 492 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 493 {
 494     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 495         max_hw_breakpoint = 2;
 496         max_hw_watchpoint = 2;
 497     }
 498
 499     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 500         fprintf(stderr, "Error initializing h/w breakpoints\n");
 501         return;
 502     }
 503 }
 504
 505 int kvm_arch_init_vcpu(CPUState *cs)
 506 {
 507     PowerPCCPU *cpu = POWERPC_CPU(cs);
 508     CPUPPCState *cenv = &cpu->env;
 509     int ret;
 510
 511     /* Gather server mmu info from KVM and update the CPU state */
 512     kvm_fixup_page_sizes(cpu);
 513
 514     /* Synchronize sregs with kvm */
 515     ret = kvm_arch_sync_sregs(cpu);
 516     if (ret) {
 517         if (ret == -EINVAL) {
 518             error_report("Register sync failed... If you're using kvm-hv.ko,"
 519                          " only \"-cpu host\" is possible");
 520         }
 521         return ret;
 522     }
 523
 524     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 525
 526     /* Some targets support access to KVM's guest TLB. */
 527     switch (cenv->mmu_model) {
 528     case POWERPC_MMU_BOOKE206:
 529         ret = kvm_booke206_tlb_init(cpu);
 530         break;
 531     default:
 532         break;
 533     }
 534
 535     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 536     kvmppc_hw_debug_points_init(cenv);
 537
 538     return ret;
 539 }
 540
 541 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 542 {
 543     CPUPPCState *env = &cpu->env;
 544     CPUState *cs = CPU(cpu);
 545     struct kvm_dirty_tlb dirty_tlb;
 546     unsigned char *bitmap;
 547     int ret;
 548
 549     if (!env->kvm_sw_tlb) {
 550         return;
 551     }
 552
 553     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 554     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 555
 556     dirty_tlb.bitmap = (uintptr_t)bitmap;
 557     dirty_tlb.num_dirty = env->nb_tlb;
 558
 559     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 560     if (ret) {
 561         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 562                 __func__, strerror(-ret));
 563     }
 564
 565     g_free(bitmap);
 566 }
 567
 568 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 569 {
 570     PowerPCCPU *cpu = POWERPC_CPU(cs);
 571     CPUPPCState *env = &cpu->env;
 572     union {
 573         uint32_t u32;
 574         uint64_t u64;
 575     } val;
 576     struct kvm_one_reg reg = {
 577         .id = id,
 578         .addr = (uintptr_t) &val,
 579     };
 580     int ret;
 581
 582     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 583     if (ret != 0) {
 584         trace_kvm_failed_spr_get(spr, strerror(errno));
 585     } else {
 586         switch (id & KVM_REG_SIZE_MASK) {
 587         case KVM_REG_SIZE_U32:
 588             env->spr[spr] = val.u32;
 589             break;
 590
 591         case KVM_REG_SIZE_U64:
 592             env->spr[spr] = val.u64;
 593             break;
 594
 595         default:
 596             /* Don't handle this size yet */
 597             abort();
 598         }
 599     }
 600 }
 601
 602 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 603 {
 604     PowerPCCPU *cpu = POWERPC_CPU(cs);
 605     CPUPPCState *env = &cpu->env;
 606     union {
 607         uint32_t u32;
 608         uint64_t u64;
 609     } val;
 610     struct kvm_one_reg reg = {
 611         .id = id,
 612         .addr = (uintptr_t) &val,
 613     };
 614     int ret;
 615
 616     switch (id & KVM_REG_SIZE_MASK) {
 617     case KVM_REG_SIZE_U32:
 618         val.u32 = env->spr[spr];
 619         break;
 620
 621     case KVM_REG_SIZE_U64:
 622         val.u64 = env->spr[spr];
 623         break;
 624
 625     default:
 626         /* Don't handle this size yet */
 627         abort();
 628     }
 629
 630     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 631     if (ret != 0) {
 632         trace_kvm_failed_spr_set(spr, strerror(errno));
 633     }
 634 }
 635
 636 static int kvm_put_fp(CPUState *cs)
 637 {
 638     PowerPCCPU *cpu = POWERPC_CPU(cs);
 639     CPUPPCState *env = &cpu->env;
 640     struct kvm_one_reg reg;
 641     int i;
 642     int ret;
 643
 644     if (env->insns_flags & PPC_FLOAT) {
 645         uint64_t fpscr = env->fpscr;
 646         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 647
 648         reg.id = KVM_REG_PPC_FPSCR;
 649         reg.addr = (uintptr_t)&fpscr;
 650         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 651         if (ret < 0) {
 652             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 653             return ret;
 654         }
 655
 656         for (i = 0; i < 32; i++) {
 657             uint64_t vsr[2];
 658
 659 #ifdef HOST_WORDS_BIGENDIAN
 660             vsr[0] = float64_val(env->fpr[i]);
 661             vsr[1] = env->vsr[i];
 662 #else
 663             vsr[0] = env->vsr[i];
 664             vsr[1] = float64_val(env->fpr[i]);
 665 #endif
 666             reg.addr = (uintptr_t) &vsr;
 667             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 668
 669             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 670             if (ret < 0) {
 671                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 672                         i, strerror(errno));
 673                 return ret;
 674             }
 675         }
 676     }
 677
 678     if (env->insns_flags & PPC_ALTIVEC) {
 679         reg.id = KVM_REG_PPC_VSCR;
 680         reg.addr = (uintptr_t)&env->vscr;
 681         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 682         if (ret < 0) {
 683             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 684             return ret;
 685         }
 686
 687         for (i = 0; i < 32; i++) {
 688             reg.id = KVM_REG_PPC_VR(i);
 689             reg.addr = (uintptr_t)&env->avr[i];
 690             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 691             if (ret < 0) {
 692                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 693                 return ret;
 694             }
 695         }
 696     }
 697
 698     return 0;
 699 }
 700
 701 static int kvm_get_fp(CPUState *cs)
 702 {
 703     PowerPCCPU *cpu = POWERPC_CPU(cs);
 704     CPUPPCState *env = &cpu->env;
 705     struct kvm_one_reg reg;
 706     int i;
 707     int ret;
 708
 709     if (env->insns_flags & PPC_FLOAT) {
 710         uint64_t fpscr;
 711         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 712
 713         reg.id = KVM_REG_PPC_FPSCR;
 714         reg.addr = (uintptr_t)&fpscr;
 715         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 716         if (ret < 0) {
 717             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 718             return ret;
 719         } else {
 720             env->fpscr = fpscr;
 721         }
 722
 723         for (i = 0; i < 32; i++) {
 724             uint64_t vsr[2];
 725
 726             reg.addr = (uintptr_t) &vsr;
 727             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 728
 729             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 730             if (ret < 0) {
 731                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 732                         vsx ? "VSR" : "FPR", i, strerror(errno));
 733                 return ret;
 734             } else {
 735 #ifdef HOST_WORDS_BIGENDIAN
 736                 env->fpr[i] = vsr[0];
 737                 if (vsx) {
 738                     env->vsr[i] = vsr[1];
 739                 }
 740 #else
 741                 env->fpr[i] = vsr[1];
 742                 if (vsx) {
 743                     env->vsr[i] = vsr[0];
 744                 }
 745 #endif
 746             }
 747         }
 748     }
 749
 750     if (env->insns_flags & PPC_ALTIVEC) {
 751         reg.id = KVM_REG_PPC_VSCR;
 752         reg.addr = (uintptr_t)&env->vscr;
 753         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 754         if (ret < 0) {
 755             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 756             return ret;
 757         }
 758
 759         for (i = 0; i < 32; i++) {
 760             reg.id = KVM_REG_PPC_VR(i);
 761             reg.addr = (uintptr_t)&env->avr[i];
 762             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 763             if (ret < 0) {
 764                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 765                         i, strerror(errno));
 766                 return ret;
 767             }
 768         }
 769     }
 770
 771     return 0;
 772 }
 773
 774 #if defined(TARGET_PPC64)
 775 static int kvm_get_vpa(CPUState *cs)
 776 {
 777     PowerPCCPU *cpu = POWERPC_CPU(cs);
 778     CPUPPCState *env = &cpu->env;
 779     struct kvm_one_reg reg;
 780     int ret;
 781
 782     reg.id = KVM_REG_PPC_VPA_ADDR;
 783     reg.addr = (uintptr_t)&env->vpa_addr;
 784     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 785     if (ret < 0) {
 786         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 787         return ret;
 788     }
 789
 790     assert((uintptr_t)&env->slb_shadow_size
 791            == ((uintptr_t)&env->slb_shadow_addr + 8));
 792     reg.id = KVM_REG_PPC_VPA_SLB;
 793     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 794     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 795     if (ret < 0) {
 796         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 797                 strerror(errno));
 798         return ret;
 799     }
 800
 801     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 802     reg.id = KVM_REG_PPC_VPA_DTL;
 803     reg.addr = (uintptr_t)&env->dtl_addr;
 804     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 805     if (ret < 0) {
 806         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 807                 strerror(errno));
 808         return ret;
 809     }
 810
 811     return 0;
 812 }
 813
 814 static int kvm_put_vpa(CPUState *cs)
 815 {
 816     PowerPCCPU *cpu = POWERPC_CPU(cs);
 817     CPUPPCState *env = &cpu->env;
 818     struct kvm_one_reg reg;
 819     int ret;
 820
 821     /* SLB shadow or DTL can't be registered unless a master VPA is
 822      * registered.  That means when restoring state, if a VPA *is*
 823      * registered, we need to set that up first.  If not, we need to
 824      * deregister the others before deregistering the master VPA */
 825     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 826
 827     if (env->vpa_addr) {
 828         reg.id = KVM_REG_PPC_VPA_ADDR;
 829         reg.addr = (uintptr_t)&env->vpa_addr;
 830         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 831         if (ret < 0) {
 832             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 833             return ret;
 834         }
 835     }
 836
 837     assert((uintptr_t)&env->slb_shadow_size
 838            == ((uintptr_t)&env->slb_shadow_addr + 8));
 839     reg.id = KVM_REG_PPC_VPA_SLB;
 840     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 841     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 842     if (ret < 0) {
 843         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 844         return ret;
 845     }
 846
 847     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 848     reg.id = KVM_REG_PPC_VPA_DTL;
 849     reg.addr = (uintptr_t)&env->dtl_addr;
 850     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 851     if (ret < 0) {
 852         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 853                 strerror(errno));
 854         return ret;
 855     }
 856
 857     if (!env->vpa_addr) {
 858         reg.id = KVM_REG_PPC_VPA_ADDR;
 859         reg.addr = (uintptr_t)&env->vpa_addr;
 860         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 861         if (ret < 0) {
 862             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 863             return ret;
 864         }
 865     }
 866
 867     return 0;
 868 }
 869 #endif /* TARGET_PPC64 */
 870
 871 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 872 {
 873     CPUPPCState *env = &cpu->env;
 874     struct kvm_sregs sregs;
 875     int i;
 876
 877     sregs.pvr = env->spr[SPR_PVR];
 878
 879     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 880
 881     /* Sync SLB */
 882 #ifdef TARGET_PPC64
 883     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 884         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 885         if (env->slb[i].esid & SLB_ESID_V) {
 886             sregs.u.s.ppc64.slb[i].slbe |= i;
 887         }
 888         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 889     }
 890 #endif
 891
 892     /* Sync SRs */
 893     for (i = 0; i < 16; i++) {
 894         sregs.u.s.ppc32.sr[i] = env->sr[i];
 895     }
 896
 897     /* Sync BATs */
 898     for (i = 0; i < 8; i++) {
 899         /* Beware. We have to swap upper and lower bits here */
 900         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 901             | env->DBAT[1][i];
 902         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 903             | env->IBAT[1][i];
 904     }
 905
 906     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 907 }
 908
 909 int kvm_arch_put_registers(CPUState *cs, int level)
 910 {
 911     PowerPCCPU *cpu = POWERPC_CPU(cs);
 912     CPUPPCState *env = &cpu->env;
 913     struct kvm_regs regs;
 914     int ret;
 915     int i;
 916
 917     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 918     if (ret < 0) {
 919         return ret;
 920     }
 921
 922     regs.ctr = env->ctr;
 923     regs.lr  = env->lr;
 924     regs.xer = cpu_read_xer(env);
 925     regs.msr = env->msr;
 926     regs.pc = env->nip;
 927
 928     regs.srr0 = env->spr[SPR_SRR0];
 929     regs.srr1 = env->spr[SPR_SRR1];
 930
 931     regs.sprg0 = env->spr[SPR_SPRG0];
 932     regs.sprg1 = env->spr[SPR_SPRG1];
 933     regs.sprg2 = env->spr[SPR_SPRG2];
 934     regs.sprg3 = env->spr[SPR_SPRG3];
 935     regs.sprg4 = env->spr[SPR_SPRG4];
 936     regs.sprg5 = env->spr[SPR_SPRG5];
 937     regs.sprg6 = env->spr[SPR_SPRG6];
 938     regs.sprg7 = env->spr[SPR_SPRG7];
 939
 940     regs.pid = env->spr[SPR_BOOKE_PID];
 941
 942     for (i = 0;i < 32; i++)
 943         regs.gpr[i] = env->gpr[i];
 944
 945     regs.cr = 0;
 946     for (i = 0; i < 8; i++) {
 947         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 948     }
 949
 950     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 951     if (ret < 0)
 952         return ret;
 953
 954     kvm_put_fp(cs);
 955
 956     if (env->tlb_dirty) {
 957         kvm_sw_tlb_put(cpu);
 958         env->tlb_dirty = false;
 959     }
 960
 961     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 962         ret = kvmppc_put_books_sregs(cpu);
 963         if (ret < 0) {
 964             return ret;
 965         }
 966     }
 967
 968     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 969         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 970     }
 971
 972     if (cap_one_reg) {
 973         int i;
 974
 975         /* We deliberately ignore errors here, for kernels which have
 976          * the ONE_REG calls, but don't support the specific
 977          * registers, there's a reasonable chance things will still
 978          * work, at least until we try to migrate. */
 979         for (i = 0; i < 1024; i++) {
 980             uint64_t id = env->spr_cb[i].one_reg_id;
 981
 982             if (id != 0) {
 983                 kvm_put_one_spr(cs, id, i);
 984             }
 985         }
 986
 987 #ifdef TARGET_PPC64
 988         if (msr_ts) {
 989             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 990                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 991             }
 992             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 993                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 994             }
 995             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 996             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 997             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 998             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 999             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1000             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1001             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1002             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1003             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1004             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1005         }
1006
1007         if (cap_papr) {
1008             if (kvm_put_vpa(cs) < 0) {
1009                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1010             }
1011         }
1012
1013         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1014 #endif /* TARGET_PPC64 */
1015     }
1016
1017     return ret;
1018 }
1019
1020 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1021 {
1022      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1023 }
1024
1025 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1026 {
1027     CPUPPCState *env = &cpu->env;
1028     struct kvm_sregs sregs;
1029     int ret;
1030
1031     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1032     if (ret < 0) {
1033         return ret;
1034     }
1035
1036     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1037         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1038         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1039         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1040         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1041         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1042         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1043         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1044         env->spr[SPR_DECR] = sregs.u.e.dec;
1045         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1046         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1047         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1048     }
1049
1050     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1051         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1052         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1053         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1054         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1055         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1056     }
1057
1058     if (sregs.u.e.features & KVM_SREGS_E_64) {
1059         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1060     }
1061
1062     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1063         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1064     }
1065
1066     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1067         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1068         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1069         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1070         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1071         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1072         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1073         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1074         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1075         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1076         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1077         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1078         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1079         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1080         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1081         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1082         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1083         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1084         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1085         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1086         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1087         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1088         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1089         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1090         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1091         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1092         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1093         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1094         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1095         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1096         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1097         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1098         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1099
1100         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1101             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1102             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1103             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1104             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1105             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1106             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1107         }
1108
1109         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1110             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1111             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1112         }
1113
1114         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1115             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1116             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1117             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1118             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1119         }
1120     }
1121
1122     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1123         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1124         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1125         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1126         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1127         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1128         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1129         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1130         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1131         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1132         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1133     }
1134
1135     if (sregs.u.e.features & KVM_SREGS_EXP) {
1136         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1137     }
1138
1139     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1140         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1141         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1142     }
1143
1144     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1145         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1146         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1147         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1148
1149         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1150             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1151             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1152         }
1153     }
1154
1155     return 0;
1156 }
1157
1158 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1159 {
1160     CPUPPCState *env = &cpu->env;
1161     struct kvm_sregs sregs;
1162     int ret;
1163     int i;
1164
1165     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1166     if (ret < 0) {
1167         return ret;
1168     }
1169
1170     if (!env->external_htab) {
1171         ppc_store_sdr1(env, sregs.u.s.sdr1);
1172     }
1173
1174     /* Sync SLB */
1175 #ifdef TARGET_PPC64
1176     /*
1177      * The packed SLB array we get from KVM_GET_SREGS only contains
1178      * information about valid entries. So we flush our internal copy
1179      * to get rid of stale ones, then put all valid SLB entries back
1180      * in.
1181      */
1182     memset(env->slb, 0, sizeof(env->slb));
1183     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1184         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1185         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1186         /*
1187          * Only restore valid entries
1188          */
1189         if (rb & SLB_ESID_V) {
1190             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1191         }
1192     }
1193 #endif
1194
1195     /* Sync SRs */
1196     for (i = 0; i < 16; i++) {
1197         env->sr[i] = sregs.u.s.ppc32.sr[i];
1198     }
1199
1200     /* Sync BATs */
1201     for (i = 0; i < 8; i++) {
1202         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1203         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1204         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1205         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1206     }
1207
1208     return 0;
1209 }
1210
1211 int kvm_arch_get_registers(CPUState *cs)
1212 {
1213     PowerPCCPU *cpu = POWERPC_CPU(cs);
1214     CPUPPCState *env = &cpu->env;
1215     struct kvm_regs regs;
1216     uint32_t cr;
1217     int i, ret;
1218
1219     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1220     if (ret < 0)
1221         return ret;
1222
1223     cr = regs.cr;
1224     for (i = 7; i >= 0; i--) {
1225         env->crf[i] = cr & 15;
1226         cr >>= 4;
1227     }
1228
1229     env->ctr = regs.ctr;
1230     env->lr = regs.lr;
1231     cpu_write_xer(env, regs.xer);
1232     env->msr = regs.msr;
1233     env->nip = regs.pc;
1234
1235     env->spr[SPR_SRR0] = regs.srr0;
1236     env->spr[SPR_SRR1] = regs.srr1;
1237
1238     env->spr[SPR_SPRG0] = regs.sprg0;
1239     env->spr[SPR_SPRG1] = regs.sprg1;
1240     env->spr[SPR_SPRG2] = regs.sprg2;
1241     env->spr[SPR_SPRG3] = regs.sprg3;
1242     env->spr[SPR_SPRG4] = regs.sprg4;
1243     env->spr[SPR_SPRG5] = regs.sprg5;
1244     env->spr[SPR_SPRG6] = regs.sprg6;
1245     env->spr[SPR_SPRG7] = regs.sprg7;
1246
1247     env->spr[SPR_BOOKE_PID] = regs.pid;
1248
1249     for (i = 0;i < 32; i++)
1250         env->gpr[i] = regs.gpr[i];
1251
1252     kvm_get_fp(cs);
1253
1254     if (cap_booke_sregs) {
1255         ret = kvmppc_get_booke_sregs(cpu);
1256         if (ret < 0) {
1257             return ret;
1258         }
1259     }
1260
1261     if (cap_segstate) {
1262         ret = kvmppc_get_books_sregs(cpu);
1263         if (ret < 0) {
1264             return ret;
1265         }
1266     }
1267
1268     if (cap_hior) {
1269         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1270     }
1271
1272     if (cap_one_reg) {
1273         int i;
1274
1275         /* We deliberately ignore errors here, for kernels which have
1276          * the ONE_REG calls, but don't support the specific
1277          * registers, there's a reasonable chance things will still
1278          * work, at least until we try to migrate. */
1279         for (i = 0; i < 1024; i++) {
1280             uint64_t id = env->spr_cb[i].one_reg_id;
1281
1282             if (id != 0) {
1283                 kvm_get_one_spr(cs, id, i);
1284             }
1285         }
1286
1287 #ifdef TARGET_PPC64
1288         if (msr_ts) {
1289             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1290                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1291             }
1292             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1293                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1294             }
1295             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1296             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1297             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1298             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1299             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1300             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1301             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1302             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1303             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1304             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1305         }
1306
1307         if (cap_papr) {
1308             if (kvm_get_vpa(cs) < 0) {
1309                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1310             }
1311         }
1312
1313         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1314 #endif
1315     }
1316
1317     return 0;
1318 }
1319
1320 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1321 {
1322     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1323
1324     if (irq != PPC_INTERRUPT_EXT) {
1325         return 0;
1326     }
1327
1328     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1329         return 0;
1330     }
1331
1332     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1333
1334     return 0;
1335 }
1336
1337 #if defined(TARGET_PPCEMB)
1338 #define PPC_INPUT_INT PPC40x_INPUT_INT
1339 #elif defined(TARGET_PPC64)
1340 #define PPC_INPUT_INT PPC970_INPUT_INT
1341 #else
1342 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1343 #endif
1344
1345 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1346 {
1347     PowerPCCPU *cpu = POWERPC_CPU(cs);
1348     CPUPPCState *env = &cpu->env;
1349     int r;
1350     unsigned irq;
1351
1352     qemu_mutex_lock_iothread();
1353
1354     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1355      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1356     if (!cap_interrupt_level &&
1357         run->ready_for_interrupt_injection &&
1358         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1359         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1360     {
1361         /* For now KVM disregards the 'irq' argument. However, in the
1362          * future KVM could cache it in-kernel to avoid a heavyweight exit
1363          * when reading the UIC.
1364          */
1365         irq = KVM_INTERRUPT_SET;
1366
1367         DPRINTF("injected interrupt %d\n", irq);
1368         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1369         if (r < 0) {
1370             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1371         }
1372
1373         /* Always wake up soon in case the interrupt was level based */
1374         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1375                        (NANOSECONDS_PER_SECOND / 50));
1376     }
1377
1378     /* We don't know if there are more interrupts pending after this. However,
1379      * the guest will return to userspace in the course of handling this one
1380      * anyways, so we will get a chance to deliver the rest. */
1381
1382     qemu_mutex_unlock_iothread();
1383 }
1384
1385 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1386 {
1387     return MEMTXATTRS_UNSPECIFIED;
1388 }
1389
1390 int kvm_arch_process_async_events(CPUState *cs)
1391 {
1392     return cs->halted;
1393 }
1394
1395 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1396 {
1397     CPUState *cs = CPU(cpu);
1398     CPUPPCState *env = &cpu->env;
1399
1400     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1401         cs->halted = 1;
1402         cs->exception_index = EXCP_HLT;
1403     }
1404
1405     return 0;
1406 }
1407
1408 /* map dcr access to existing qemu dcr emulation */
1409 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1410 {
1411     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1412         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1413
1414     return 0;
1415 }
1416
1417 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1418 {
1419     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1420         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1421
1422     return 0;
1423 }
1424
1425 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1426 {
1427     /* Mixed endian case is not handled */
1428     uint32_t sc = debug_inst_opcode;
1429
1430     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1431                             sizeof(sc), 0) ||
1432         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1433         return -EINVAL;
1434     }
1435
1436     return 0;
1437 }
1438
1439 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1440 {
1441     uint32_t sc;
1442
1443     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1444         sc != debug_inst_opcode ||
1445         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1446                             sizeof(sc), 1)) {
1447         return -EINVAL;
1448     }
1449
1450     return 0;
1451 }
1452
1453 static int find_hw_breakpoint(target_ulong addr, int type)
1454 {
1455     int n;
1456
1457     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1458            <= ARRAY_SIZE(hw_debug_points));
1459
1460     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1461         if (hw_debug_points[n].addr == addr &&
1462              hw_debug_points[n].type == type) {
1463             return n;
1464         }
1465     }
1466
1467     return -1;
1468 }
1469
1470 static int find_hw_watchpoint(target_ulong addr, int *flag)
1471 {
1472     int n;
1473
1474     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1475     if (n >= 0) {
1476         *flag = BP_MEM_ACCESS;
1477         return n;
1478     }
1479
1480     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1481     if (n >= 0) {
1482         *flag = BP_MEM_WRITE;
1483         return n;
1484     }
1485
1486     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1487     if (n >= 0) {
1488         *flag = BP_MEM_READ;
1489         return n;
1490     }
1491
1492     return -1;
1493 }
1494
1495 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1496                                   target_ulong len, int type)
1497 {
1498     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1499         return -ENOBUFS;
1500     }
1501
1502     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1503     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1504
1505     switch (type) {
1506     case GDB_BREAKPOINT_HW:
1507         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1508             return -ENOBUFS;
1509         }
1510
1511         if (find_hw_breakpoint(addr, type) >= 0) {
1512             return -EEXIST;
1513         }
1514
1515         nb_hw_breakpoint++;
1516         break;
1517
1518     case GDB_WATCHPOINT_WRITE:
1519     case GDB_WATCHPOINT_READ:
1520     case GDB_WATCHPOINT_ACCESS:
1521         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1522             return -ENOBUFS;
1523         }
1524
1525         if (find_hw_breakpoint(addr, type) >= 0) {
1526             return -EEXIST;
1527         }
1528
1529         nb_hw_watchpoint++;
1530         break;
1531
1532     default:
1533         return -ENOSYS;
1534     }
1535
1536     return 0;
1537 }
1538
1539 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1540                                   target_ulong len, int type)
1541 {
1542     int n;
1543
1544     n = find_hw_breakpoint(addr, type);
1545     if (n < 0) {
1546         return -ENOENT;
1547     }
1548
1549     switch (type) {
1550     case GDB_BREAKPOINT_HW:
1551         nb_hw_breakpoint--;
1552         break;
1553
1554     case GDB_WATCHPOINT_WRITE:
1555     case GDB_WATCHPOINT_READ:
1556     case GDB_WATCHPOINT_ACCESS:
1557         nb_hw_watchpoint--;
1558         break;
1559
1560     default:
1561         return -ENOSYS;
1562     }
1563     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1564
1565     return 0;
1566 }
1567
1568 void kvm_arch_remove_all_hw_breakpoints(void)
1569 {
1570     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1571 }
1572
1573 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1574 {
1575     int n;
1576
1577     /* Software Breakpoint updates */
1578     if (kvm_sw_breakpoints_active(cs)) {
1579         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1580     }
1581
1582     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1583            <= ARRAY_SIZE(hw_debug_points));
1584     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1585
1586     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1587         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1588         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1589         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1590             switch (hw_debug_points[n].type) {
1591             case GDB_BREAKPOINT_HW:
1592                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1593                 break;
1594             case GDB_WATCHPOINT_WRITE:
1595                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1596                 break;
1597             case GDB_WATCHPOINT_READ:
1598                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1599                 break;
1600             case GDB_WATCHPOINT_ACCESS:
1601                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1602                                         KVMPPC_DEBUG_WATCH_READ;
1603                 break;
1604             default:
1605                 cpu_abort(cs, "Unsupported breakpoint type\n");
1606             }
1607             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1608         }
1609     }
1610 }
1611
1612 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1613 {
1614     CPUState *cs = CPU(cpu);
1615     CPUPPCState *env = &cpu->env;
1616     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1617     int handle = 0;
1618     int n;
1619     int flag = 0;
1620
1621     if (cs->singlestep_enabled) {
1622         handle = 1;
1623     } else if (arch_info->status) {
1624         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1625             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1626                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1627                 if (n >= 0) {
1628                     handle = 1;
1629                 }
1630             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1631                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1632                 n = find_hw_watchpoint(arch_info->address,  &flag);
1633                 if (n >= 0) {
1634                     handle = 1;
1635                     cs->watchpoint_hit = &hw_watchpoint;
1636                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1637                     hw_watchpoint.flags = flag;
1638                 }
1639             }
1640         }
1641     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1642         handle = 1;
1643     } else {
1644         /* QEMU is not able to handle debug exception, so inject
1645          * program exception to guest;
1646          * Yes program exception NOT debug exception !!
1647          * When QEMU is using debug resources then debug exception must
1648          * be always set. To achieve this we set MSR_DE and also set
1649          * MSRP_DEP so guest cannot change MSR_DE.
1650          * When emulating debug resource for guest we want guest
1651          * to control MSR_DE (enable/disable debug interrupt on need).
1652          * Supporting both configurations are NOT possible.
1653          * So the result is that we cannot share debug resources
1654          * between QEMU and Guest on BOOKE architecture.
1655          * In the current design QEMU gets the priority over guest,
1656          * this means that if QEMU is using debug resources then guest
1657          * cannot use them;
1658          * For software breakpoint QEMU uses a privileged instruction;
1659          * So there cannot be any reason that we are here for guest
1660          * set debug exception, only possibility is guest executed a
1661          * privileged / illegal instruction and that's why we are
1662          * injecting a program interrupt.
1663          */
1664
1665         cpu_synchronize_state(cs);
1666         /* env->nip is PC, so increment this by 4 to use
1667          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1668          */
1669         env->nip += 4;
1670         cs->exception_index = POWERPC_EXCP_PROGRAM;
1671         env->error_code = POWERPC_EXCP_INVAL;
1672         ppc_cpu_do_interrupt(cs);
1673     }
1674
1675     return handle;
1676 }
1677
1678 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1679 {
1680     PowerPCCPU *cpu = POWERPC_CPU(cs);
1681     CPUPPCState *env = &cpu->env;
1682     int ret;
1683
1684     qemu_mutex_lock_iothread();
1685
1686     switch (run->exit_reason) {
1687     case KVM_EXIT_DCR:
1688         if (run->dcr.is_write) {
1689             DPRINTF("handle dcr write\n");
1690             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1691         } else {
1692             DPRINTF("handle dcr read\n");
1693             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1694         }
1695         break;
1696     case KVM_EXIT_HLT:
1697         DPRINTF("handle halt\n");
1698         ret = kvmppc_handle_halt(cpu);
1699         break;
1700 #if defined(TARGET_PPC64)
1701     case KVM_EXIT_PAPR_HCALL:
1702         DPRINTF("handle PAPR hypercall\n");
1703         run->papr_hcall.ret = spapr_hypercall(cpu,
1704                                               run->papr_hcall.nr,
1705                                               run->papr_hcall.args);
1706         ret = 0;
1707         break;
1708 #endif
1709     case KVM_EXIT_EPR:
1710         DPRINTF("handle epr\n");
1711         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1712         ret = 0;
1713         break;
1714     case KVM_EXIT_WATCHDOG:
1715         DPRINTF("handle watchdog expiry\n");
1716         watchdog_perform_action();
1717         ret = 0;
1718         break;
1719
1720     case KVM_EXIT_DEBUG:
1721         DPRINTF("handle debug exception\n");
1722         if (kvm_handle_debug(cpu, run)) {
1723             ret = EXCP_DEBUG;
1724             break;
1725         }
1726         /* re-enter, this exception was guest-internal */
1727         ret = 0;
1728         break;
1729
1730     default:
1731         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1732         ret = -1;
1733         break;
1734     }
1735
1736     qemu_mutex_unlock_iothread();
1737     return ret;
1738 }
1739
1740 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1741 {
1742     CPUState *cs = CPU(cpu);
1743     uint32_t bits = tsr_bits;
1744     struct kvm_one_reg reg = {
1745         .id = KVM_REG_PPC_OR_TSR,
1746         .addr = (uintptr_t) &bits,
1747     };
1748
1749     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1750 }
1751
1752 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1753 {
1754
1755     CPUState *cs = CPU(cpu);
1756     uint32_t bits = tsr_bits;
1757     struct kvm_one_reg reg = {
1758         .id = KVM_REG_PPC_CLEAR_TSR,
1759         .addr = (uintptr_t) &bits,
1760     };
1761
1762     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1763 }
1764
1765 int kvmppc_set_tcr(PowerPCCPU *cpu)
1766 {
1767     CPUState *cs = CPU(cpu);
1768     CPUPPCState *env = &cpu->env;
1769     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1770
1771     struct kvm_one_reg reg = {
1772         .id = KVM_REG_PPC_TCR,
1773         .addr = (uintptr_t) &tcr,
1774     };
1775
1776     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1777 }
1778
1779 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1780 {
1781     CPUState *cs = CPU(cpu);
1782     int ret;
1783
1784     if (!kvm_enabled()) {
1785         return -1;
1786     }
1787
1788     if (!cap_ppc_watchdog) {
1789         printf("warning: KVM does not support watchdog");
1790         return -1;
1791     }
1792
1793     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1794     if (ret < 0) {
1795         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1796                 __func__, strerror(-ret));
1797         return ret;
1798     }
1799
1800     return ret;
1801 }
1802
1803 static int read_cpuinfo(const char *field, char *value, int len)
1804 {
1805     FILE *f;
1806     int ret = -1;
1807     int field_len = strlen(field);
1808     char line[512];
1809
1810     f = fopen("/proc/cpuinfo", "r");
1811     if (!f) {
1812         return -1;
1813     }
1814
1815     do {
1816         if (!fgets(line, sizeof(line), f)) {
1817             break;
1818         }
1819         if (!strncmp(line, field, field_len)) {
1820             pstrcpy(value, len, line);
1821             ret = 0;
1822             break;
1823         }
1824     } while(*line);
1825
1826     fclose(f);
1827
1828     return ret;
1829 }
1830
1831 uint32_t kvmppc_get_tbfreq(void)
1832 {
1833     char line[512];
1834     char *ns;
1835     uint32_t retval = NANOSECONDS_PER_SECOND;
1836
1837     if (read_cpuinfo("timebase", line, sizeof(line))) {
1838         return retval;
1839     }
1840
1841     if (!(ns = strchr(line, ':'))) {
1842         return retval;
1843     }
1844
1845     ns++;
1846
1847     return atoi(ns);
1848 }
1849
1850 bool kvmppc_get_host_serial(char **value)
1851 {
1852     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1853                                NULL);
1854 }
1855
1856 bool kvmppc_get_host_model(char **value)
1857 {
1858     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1859 }
1860
1861 /* Try to find a device tree node for a CPU with clock-frequency property */
1862 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1863 {
1864     struct dirent *dirp;
1865     DIR *dp;
1866
1867     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1868         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1869         return -1;
1870     }
1871
1872     buf[0] = '\0';
1873     while ((dirp = readdir(dp)) != NULL) {
1874         FILE *f;
1875         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1876                  dirp->d_name);
1877         f = fopen(buf, "r");
1878         if (f) {
1879             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1880             fclose(f);
1881             break;
1882         }
1883         buf[0] = '\0';
1884     }
1885     closedir(dp);
1886     if (buf[0] == '\0') {
1887         printf("Unknown host!\n");
1888         return -1;
1889     }
1890
1891     return 0;
1892 }
1893
1894 static uint64_t kvmppc_read_int_dt(const char *filename)
1895 {
1896     union {
1897         uint32_t v32;
1898         uint64_t v64;
1899     } u;
1900     FILE *f;
1901     int len;
1902
1903     f = fopen(filename, "rb");
1904     if (!f) {
1905         return -1;
1906     }
1907
1908     len = fread(&u, 1, sizeof(u), f);
1909     fclose(f);
1910     switch (len) {
1911     case 4:
1912         /* property is a 32-bit quantity */
1913         return be32_to_cpu(u.v32);
1914     case 8:
1915         return be64_to_cpu(u.v64);
1916     }
1917
1918     return 0;
1919 }
1920
1921 /* Read a CPU node property from the host device tree that's a single
1922  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1923  * (can't find or open the property, or doesn't understand the
1924  * format) */
1925 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1926 {
1927     char buf[PATH_MAX], *tmp;
1928     uint64_t val;
1929
1930     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1931         return -1;
1932     }
1933
1934     tmp = g_strdup_printf("%s/%s", buf, propname);
1935     val = kvmppc_read_int_dt(tmp);
1936     g_free(tmp);
1937
1938     return val;
1939 }
1940
1941 uint64_t kvmppc_get_clockfreq(void)
1942 {
1943     return kvmppc_read_int_cpu_dt("clock-frequency");
1944 }
1945
1946 uint32_t kvmppc_get_vmx(void)
1947 {
1948     return kvmppc_read_int_cpu_dt("ibm,vmx");
1949 }
1950
1951 uint32_t kvmppc_get_dfp(void)
1952 {
1953     return kvmppc_read_int_cpu_dt("ibm,dfp");
1954 }
1955
1956 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1957  {
1958      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1959      CPUState *cs = CPU(cpu);
1960
1961     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1962         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1963         return 0;
1964     }
1965
1966     return 1;
1967 }
1968
1969 int kvmppc_get_hasidle(CPUPPCState *env)
1970 {
1971     struct kvm_ppc_pvinfo pvinfo;
1972
1973     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1974         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1975         return 1;
1976     }
1977
1978     return 0;
1979 }
1980
1981 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1982 {
1983     uint32_t *hc = (uint32_t*)buf;
1984     struct kvm_ppc_pvinfo pvinfo;
1985
1986     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1987         memcpy(buf, pvinfo.hcall, buf_len);
1988         return 0;
1989     }
1990
1991     /*
1992      * Fallback to always fail hypercalls regardless of endianness:
1993      *
1994      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1995      *     li r3, -1
1996      *     b .+8       (becomes nop in wrong endian)
1997      *     bswap32(li r3, -1)
1998      */
1999
2000     hc[0] = cpu_to_be32(0x08000048);
2001     hc[1] = cpu_to_be32(0x3860ffff);
2002     hc[2] = cpu_to_be32(0x48000008);
2003     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2004
2005     return 0;
2006 }
2007
2008 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2009 {
2010     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2011 }
2012
2013 void kvmppc_enable_logical_ci_hcalls(void)
2014 {
2015     /*
2016      * FIXME: it would be nice if we could detect the cases where
2017      * we're using a device which requires the in kernel
2018      * implementation of these hcalls, but the kernel lacks them and
2019      * produce a warning.
2020      */
2021     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2022     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2023 }
2024
2025 void kvmppc_enable_set_mode_hcall(void)
2026 {
2027     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2028 }
2029
2030 void kvmppc_set_papr(PowerPCCPU *cpu)
2031 {
2032     CPUState *cs = CPU(cpu);
2033     int ret;
2034
2035     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2036     if (ret) {
2037         error_report("This vCPU type or KVM version does not support PAPR");
2038         exit(1);
2039     }
2040
2041     /* Update the capability flag so we sync the right information
2042      * with kvm */
2043     cap_papr = 1;
2044 }
2045
2046 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2047 {
2048     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2049 }
2050
2051 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2052 {
2053     CPUState *cs = CPU(cpu);
2054     int ret;
2055
2056     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2057     if (ret && mpic_proxy) {
2058         error_report("This KVM version does not support EPR");
2059         exit(1);
2060     }
2061 }
2062
2063 int kvmppc_smt_threads(void)
2064 {
2065     return cap_ppc_smt ? cap_ppc_smt : 1;
2066 }
2067
2068 #ifdef TARGET_PPC64
2069 off_t kvmppc_alloc_rma(void **rma)
2070 {
2071     off_t size;
2072     int fd;
2073     struct kvm_allocate_rma ret;
2074
2075     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2076      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2077      *                      not necessary on this hardware
2078      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2079      *
2080      * FIXME: We should allow the user to force contiguous RMA
2081      * allocation in the cap_ppc_rma==1 case.
2082      */
2083     if (cap_ppc_rma < 2) {
2084         return 0;
2085     }
2086
2087     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2088     if (fd < 0) {
2089         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2090                 strerror(errno));
2091         return -1;
2092     }
2093
2094     size = MIN(ret.rma_size, 256ul << 20);
2095
2096     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2097     if (*rma == MAP_FAILED) {
2098         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2099         return -1;
2100     };
2101
2102     return size;
2103 }
2104
2105 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2106 {
2107     struct kvm_ppc_smmu_info info;
2108     long rampagesize, best_page_shift;
2109     int i;
2110
2111     if (cap_ppc_rma >= 2) {
2112         return current_size;
2113     }
2114
2115     /* Find the largest hardware supported page size that's less than
2116      * or equal to the (logical) backing page size of guest RAM */
2117     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2118     rampagesize = getrampagesize();
2119     best_page_shift = 0;
2120
2121     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2122         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2123
2124         if (!sps->page_shift) {
2125             continue;
2126         }
2127
2128         if ((sps->page_shift > best_page_shift)
2129             && ((1UL << sps->page_shift) <= rampagesize)) {
2130             best_page_shift = sps->page_shift;
2131         }
2132     }
2133
2134     return MIN(current_size,
2135                1ULL << (best_page_shift + hash_shift - 7));
2136 }
2137 #endif
2138
2139 bool kvmppc_spapr_use_multitce(void)
2140 {
2141     return cap_spapr_multitce;
2142 }
2143
2144 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2145                               bool need_vfio)
2146 {
2147     struct kvm_create_spapr_tce args = {
2148         .liobn = liobn,
2149         .window_size = window_size,
2150     };
2151     long len;
2152     int fd;
2153     void *table;
2154
2155     /* Must set fd to -1 so we don't try to munmap when called for
2156      * destroying the table, which the upper layers -will- do
2157      */
2158     *pfd = -1;
2159     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2160         return NULL;
2161     }
2162
2163     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2164     if (fd < 0) {
2165         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2166                 liobn);
2167         return NULL;
2168     }
2169
2170     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2171     /* FIXME: round this up to page size */
2172
2173     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2174     if (table == MAP_FAILED) {
2175         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2176                 liobn);
2177         close(fd);
2178         return NULL;
2179     }
2180
2181     *pfd = fd;
2182     return table;
2183 }
2184
2185 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2186 {
2187     long len;
2188
2189     if (fd < 0) {
2190         return -1;
2191     }
2192
2193     len = nb_table * sizeof(uint64_t);
2194     if ((munmap(table, len) < 0) ||
2195         (close(fd) < 0)) {
2196         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2197                 strerror(errno));
2198         /* Leak the table */
2199     }
2200
2201     return 0;
2202 }
2203
2204 int kvmppc_reset_htab(int shift_hint)
2205 {
2206     uint32_t shift = shift_hint;
2207
2208     if (!kvm_enabled()) {
2209         /* Full emulation, tell caller to allocate htab itself */
2210         return 0;
2211     }
2212     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2213         int ret;
2214         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2215         if (ret == -ENOTTY) {
2216             /* At least some versions of PR KVM advertise the
2217              * capability, but don't implement the ioctl().  Oops.
2218              * Return 0 so that we allocate the htab in qemu, as is
2219              * correct for PR. */
2220             return 0;
2221         } else if (ret < 0) {
2222             return ret;
2223         }
2224         return shift;
2225     }
2226
2227     /* We have a kernel that predates the htab reset calls.  For PR
2228      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2229      * this era, it has allocated a 16MB fixed size hash table
2230      * already.  Kernels of this era have the GET_PVINFO capability
2231      * only on PR, so we use this hack to determine the right
2232      * answer */
2233     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2234         /* PR - tell caller to allocate htab */
2235         return 0;
2236     } else {
2237         /* HV - assume 16MB kernel allocated htab */
2238         return 24;
2239     }
2240 }
2241
2242 static inline uint32_t mfpvr(void)
2243 {
2244     uint32_t pvr;
2245
2246     asm ("mfpvr %0"
2247          : "=r"(pvr));
2248     return pvr;
2249 }
2250
2251 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2252 {
2253     if (on) {
2254         *word |= flags;
2255     } else {
2256         *word &= ~flags;
2257     }
2258 }
2259
2260 static void kvmppc_host_cpu_initfn(Object *obj)
2261 {
2262     assert(kvm_enabled());
2263 }
2264
2265 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2266 {
2267     DeviceClass *dc = DEVICE_CLASS(oc);
2268     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2269     uint32_t vmx = kvmppc_get_vmx();
2270     uint32_t dfp = kvmppc_get_dfp();
2271     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2272     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2273
2274     /* Now fix up the class with information we can query from the host */
2275     pcc->pvr = mfpvr();
2276
2277     if (vmx != -1) {
2278         /* Only override when we know what the host supports */
2279         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2280         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2281     }
2282     if (dfp != -1) {
2283         /* Only override when we know what the host supports */
2284         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2285     }
2286
2287     if (dcache_size != -1) {
2288         pcc->l1_dcache_size = dcache_size;
2289     }
2290
2291     if (icache_size != -1) {
2292         pcc->l1_icache_size = icache_size;
2293     }
2294
2295     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2296     dc->cannot_destroy_with_object_finalize_yet = true;
2297 }
2298
2299 bool kvmppc_has_cap_epr(void)
2300 {
2301     return cap_epr;
2302 }
2303
2304 bool kvmppc_has_cap_htab_fd(void)
2305 {
2306     return cap_htab_fd;
2307 }
2308
2309 bool kvmppc_has_cap_fixup_hcalls(void)
2310 {
2311     return cap_fixup_hcalls;
2312 }
2313
2314 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2315 {
2316     ObjectClass *oc = OBJECT_CLASS(pcc);
2317
2318     while (oc && !object_class_is_abstract(oc)) {
2319         oc = object_class_get_parent(oc);
2320     }
2321     assert(oc);
2322
2323     return POWERPC_CPU_CLASS(oc);
2324 }
2325
2326 static int kvm_ppc_register_host_cpu_type(void)
2327 {
2328     TypeInfo type_info = {
2329         .name = TYPE_HOST_POWERPC_CPU,
2330         .instance_init = kvmppc_host_cpu_initfn,
2331         .class_init = kvmppc_host_cpu_class_init,
2332     };
2333     uint32_t host_pvr = mfpvr();
2334     PowerPCCPUClass *pvr_pcc;
2335     DeviceClass *dc;
2336
2337     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2338     if (pvr_pcc == NULL) {
2339         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2340     }
2341     if (pvr_pcc == NULL) {
2342         return -1;
2343     }
2344     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2345     type_register(&type_info);
2346
2347     /* Register generic family CPU class for a family */
2348     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2349     dc = DEVICE_CLASS(pvr_pcc);
2350     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2351     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2352     type_register(&type_info);
2353
2354     return 0;
2355 }
2356
2357 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2358 {
2359     struct kvm_rtas_token_args args = {
2360         .token = token,
2361     };
2362
2363     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2364         return -ENOENT;
2365     }
2366
2367     strncpy(args.name, function, sizeof(args.name));
2368
2369     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2370 }
2371
2372 int kvmppc_get_htab_fd(bool write)
2373 {
2374     struct kvm_get_htab_fd s = {
2375         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2376         .start_index = 0,
2377     };
2378
2379     if (!cap_htab_fd) {
2380         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2381         return -1;
2382     }
2383
2384     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2385 }
2386
2387 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2388 {
2389     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2390     uint8_t buf[bufsize];
2391     ssize_t rc;
2392
2393     do {
2394         rc = read(fd, buf, bufsize);
2395         if (rc < 0) {
2396             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2397                     strerror(errno));
2398             return rc;
2399         } else if (rc) {
2400             uint8_t *buffer = buf;
2401             ssize_t n = rc;
2402             while (n) {
2403                 struct kvm_get_htab_header *head =
2404                     (struct kvm_get_htab_header *) buffer;
2405                 size_t chunksize = sizeof(*head) +
2406                      HASH_PTE_SIZE_64 * head->n_valid;
2407
2408                 qemu_put_be32(f, head->index);
2409                 qemu_put_be16(f, head->n_valid);
2410                 qemu_put_be16(f, head->n_invalid);
2411                 qemu_put_buffer(f, (void *)(head + 1),
2412                                 HASH_PTE_SIZE_64 * head->n_valid);
2413
2414                 buffer += chunksize;
2415                 n -= chunksize;
2416             }
2417         }
2418     } while ((rc != 0)
2419              && ((max_ns < 0)
2420                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2421
2422     return (rc == 0) ? 1 : 0;
2423 }
2424
2425 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2426                            uint16_t n_valid, uint16_t n_invalid)
2427 {
2428     struct kvm_get_htab_header *buf;
2429     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2430     ssize_t rc;
2431
2432     buf = alloca(chunksize);
2433     buf->index = index;
2434     buf->n_valid = n_valid;
2435     buf->n_invalid = n_invalid;
2436
2437     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2438
2439     rc = write(fd, buf, chunksize);
2440     if (rc < 0) {
2441         fprintf(stderr, "Error writing KVM hash table: %s\n",
2442                 strerror(errno));
2443         return rc;
2444     }
2445     if (rc != chunksize) {
2446         /* We should never get a short write on a single chunk */
2447         fprintf(stderr, "Short write, restoring KVM hash table\n");
2448         return -1;
2449     }
2450     return 0;
2451 }
2452
2453 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2454 {
2455     return true;
2456 }
2457
2458 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2459 {
2460     return 1;
2461 }
2462
2463 int kvm_arch_on_sigbus(int code, void *addr)
2464 {
2465     return 1;
2466 }
2467
2468 void kvm_arch_init_irq_routing(KVMState *s)
2469 {
2470 }
2471
2472 struct kvm_get_htab_buf {
2473     struct kvm_get_htab_header header;
2474     /*
2475      * We require one extra byte for read
2476      */
2477     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2478 };
2479
2480 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2481 {
2482     int htab_fd;
2483     struct kvm_get_htab_fd ghf;
2484     struct kvm_get_htab_buf  *hpte_buf;
2485
2486     ghf.flags = 0;
2487     ghf.start_index = pte_index;
2488     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2489     if (htab_fd < 0) {
2490         goto error_out;
2491     }
2492
2493     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2494     /*
2495      * Read the hpte group
2496      */
2497     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2498         goto out_close;
2499     }
2500
2501     close(htab_fd);
2502     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2503
2504 out_close:
2505     g_free(hpte_buf);
2506     close(htab_fd);
2507 error_out:
2508     return 0;
2509 }
2510
2511 void kvmppc_hash64_free_pteg(uint64_t token)
2512 {
2513     struct kvm_get_htab_buf *htab_buf;
2514
2515     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2516                             hpte);
2517     g_free(htab_buf);
2518     return;
2519 }
2520
2521 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2522                              target_ulong pte0, target_ulong pte1)
2523 {
2524     int htab_fd;
2525     struct kvm_get_htab_fd ghf;
2526     struct kvm_get_htab_buf hpte_buf;
2527
2528     ghf.flags = 0;
2529     ghf.start_index = 0;     /* Ignored */
2530     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2531     if (htab_fd < 0) {
2532         goto error_out;
2533     }
2534
2535     hpte_buf.header.n_valid = 1;
2536     hpte_buf.header.n_invalid = 0;
2537     hpte_buf.header.index = pte_index;
2538     hpte_buf.hpte[0] = pte0;
2539     hpte_buf.hpte[1] = pte1;
2540     /*
2541      * Write the hpte entry.
2542      * CAUTION: write() has the warn_unused_result attribute. Hence we
2543      * need to check the return value, even though we do nothing.
2544      */
2545     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2546         goto out_close;
2547     }
2548
2549 out_close:
2550     close(htab_fd);
2551     return;
2552
2553 error_out:
2554     return;
2555 }
2556
2557 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2558                              uint64_t address, uint32_t data, PCIDevice *dev)
2559 {
2560     return 0;
2561 }
2562
2563 int kvm_arch_msi_data_to_gsi(uint32_t data)
2564 {
2565     return data & 0xffff;
2566 }
2567
2568 int kvmppc_enable_hwrng(void)
2569 {
2570     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2571         return -1;
2572     }
2573
2574     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2575 }