target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <[email protected]>
   9  *  Christian Ehrhardt <[email protected]>
  10  *  Hollis Blanchard <[email protected]>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "sysemu/watchdog.h"
  39 #include "trace.h"
  40
  41 //#define DEBUG_KVM
  42
  43 #ifdef DEBUG_KVM
  44 #define DPRINTF(fmt, ...) \
  45     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  46 #else
  47 #define DPRINTF(fmt, ...) \
  48     do { } while (0)
  49 #endif
  50
  51 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  52
  53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  54     KVM_CAP_LAST_INFO
  55 };
  56
  57 static int cap_interrupt_unset = false;
  58 static int cap_interrupt_level = false;
  59 static int cap_segstate;
  60 static int cap_booke_sregs;
  61 static int cap_ppc_smt;
  62 static int cap_ppc_rma;
  63 static int cap_spapr_tce;
  64 static int cap_hior;
  65 static int cap_one_reg;
  66 static int cap_epr;
  67 static int cap_ppc_watchdog;
  68 static int cap_papr;
  69 static int cap_htab_fd;
  70
  71 /* XXX We have a race condition where we actually have a level triggered
  72  *     interrupt, but the infrastructure can't expose that yet, so the guest
  73  *     takes but ignores it, goes to sleep and never gets notified that there's
  74  *     still an interrupt pending.
  75  *
  76  *     As a quick workaround, let's just wake up again 20 ms after we injected
  77  *     an interrupt. That way we can assure that we're always reinjecting
  78  *     interrupts in case the guest swallowed them.
  79  */
  80 static QEMUTimer *idle_timer;
  81
  82 static void kvm_kick_cpu(void *opaque)
  83 {
  84     PowerPCCPU *cpu = opaque;
  85
  86     qemu_cpu_kick(CPU(cpu));
  87 }
  88
  89 static int kvm_ppc_register_host_cpu_type(void);
  90
  91 int kvm_arch_init(KVMState *s)
  92 {
  93     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  94     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  95     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  96     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  97     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  98     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  99     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 100     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 101     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 102     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 103     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 104     /* Note: we don't set cap_papr here, because this capability is
 105      * only activated after this by kvmppc_set_papr() */
 106     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 107
 108     if (!cap_interrupt_level) {
 109         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 110                         "VM to stall at times!\n");
 111     }
 112
 113     kvm_ppc_register_host_cpu_type();
 114
 115     return 0;
 116 }
 117
 118 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 119 {
 120     CPUPPCState *cenv = &cpu->env;
 121     CPUState *cs = CPU(cpu);
 122     struct kvm_sregs sregs;
 123     int ret;
 124
 125     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 126         /* What we're really trying to say is "if we're on BookE, we use
 127            the native PVR for now". This is the only sane way to check
 128            it though, so we potentially confuse users that they can run
 129            BookE guests on BookS. Let's hope nobody dares enough :) */
 130         return 0;
 131     } else {
 132         if (!cap_segstate) {
 133             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 134             return -ENOSYS;
 135         }
 136     }
 137
 138     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 139     if (ret) {
 140         return ret;
 141     }
 142
 143     sregs.pvr = cenv->spr[SPR_PVR];
 144     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 145 }
 146
 147 /* Set up a shared TLB array with KVM */
 148 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 149 {
 150     CPUPPCState *env = &cpu->env;
 151     CPUState *cs = CPU(cpu);
 152     struct kvm_book3e_206_tlb_params params = {};
 153     struct kvm_config_tlb cfg = {};
 154     struct kvm_enable_cap encap = {};
 155     unsigned int entries = 0;
 156     int ret, i;
 157
 158     if (!kvm_enabled() ||
 159         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 160         return 0;
 161     }
 162
 163     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 164
 165     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 166         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 167         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 168         entries += params.tlb_sizes[i];
 169     }
 170
 171     assert(entries == env->nb_tlb);
 172     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 173
 174     env->tlb_dirty = true;
 175
 176     cfg.array = (uintptr_t)env->tlb.tlbm;
 177     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 178     cfg.params = (uintptr_t)&params;
 179     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 180
 181     encap.cap = KVM_CAP_SW_TLB;
 182     encap.args[0] = (uintptr_t)&cfg;
 183
 184     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
 185     if (ret < 0) {
 186         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 187                 __func__, strerror(-ret));
 188         return ret;
 189     }
 190
 191     env->kvm_sw_tlb = true;
 192     return 0;
 193 }
 194
 195
 196 #if defined(TARGET_PPC64)
 197 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 198                                        struct kvm_ppc_smmu_info *info)
 199 {
 200     CPUPPCState *env = &cpu->env;
 201     CPUState *cs = CPU(cpu);
 202
 203     memset(info, 0, sizeof(*info));
 204
 205     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 206      * need to "guess" what the supported page sizes are.
 207      *
 208      * For that to work we make a few assumptions:
 209      *
 210      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 211      *   KVM which only supports 4K and 16M pages, but supports them
 212      *   regardless of the backing store characteritics. We also don't
 213      *   support 1T segments.
 214      *
 215      *   This is safe as if HV KVM ever supports that capability or PR
 216      *   KVM grows supports for more page/segment sizes, those versions
 217      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 218      *   will not hit this fallback
 219      *
 220      * - Else we are running HV KVM. This means we only support page
 221      *   sizes that fit in the backing store. Additionally we only
 222      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 223      *   P7 encodings for the SLB and hash table. Here too, we assume
 224      *   support for any newer processor will mean a kernel that
 225      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 226      *   this fallback.
 227      */
 228     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 229         /* No flags */
 230         info->flags = 0;
 231         info->slb_size = 64;
 232
 233         /* Standard 4k base page size segment */
 234         info->sps[0].page_shift = 12;
 235         info->sps[0].slb_enc = 0;
 236         info->sps[0].enc[0].page_shift = 12;
 237         info->sps[0].enc[0].pte_enc = 0;
 238
 239         /* Standard 16M large page size segment */
 240         info->sps[1].page_shift = 24;
 241         info->sps[1].slb_enc = SLB_VSID_L;
 242         info->sps[1].enc[0].page_shift = 24;
 243         info->sps[1].enc[0].pte_enc = 0;
 244     } else {
 245         int i = 0;
 246
 247         /* HV KVM has backing store size restrictions */
 248         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 249
 250         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 251             info->flags |= KVM_PPC_1T_SEGMENTS;
 252         }
 253
 254         if (env->mmu_model == POWERPC_MMU_2_06) {
 255             info->slb_size = 32;
 256         } else {
 257             info->slb_size = 64;
 258         }
 259
 260         /* Standard 4k base page size segment */
 261         info->sps[i].page_shift = 12;
 262         info->sps[i].slb_enc = 0;
 263         info->sps[i].enc[0].page_shift = 12;
 264         info->sps[i].enc[0].pte_enc = 0;
 265         i++;
 266
 267         /* 64K on MMU 2.06 */
 268         if (env->mmu_model == POWERPC_MMU_2_06) {
 269             info->sps[i].page_shift = 16;
 270             info->sps[i].slb_enc = 0x110;
 271             info->sps[i].enc[0].page_shift = 16;
 272             info->sps[i].enc[0].pte_enc = 1;
 273             i++;
 274         }
 275
 276         /* Standard 16M large page size segment */
 277         info->sps[i].page_shift = 24;
 278         info->sps[i].slb_enc = SLB_VSID_L;
 279         info->sps[i].enc[0].page_shift = 24;
 280         info->sps[i].enc[0].pte_enc = 0;
 281     }
 282 }
 283
 284 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 285 {
 286     CPUState *cs = CPU(cpu);
 287     int ret;
 288
 289     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 290         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 291         if (ret == 0) {
 292             return;
 293         }
 294     }
 295
 296     kvm_get_fallback_smmu_info(cpu, info);
 297 }
 298
 299 static long getrampagesize(void)
 300 {
 301     struct statfs fs;
 302     int ret;
 303
 304     if (!mem_path) {
 305         /* guest RAM is backed by normal anonymous pages */
 306         return getpagesize();
 307     }
 308
 309     do {
 310         ret = statfs(mem_path, &fs);
 311     } while (ret != 0 && errno == EINTR);
 312
 313     if (ret != 0) {
 314         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 315                 strerror(errno));
 316         exit(1);
 317     }
 318
 319 #define HUGETLBFS_MAGIC       0x958458f6
 320
 321     if (fs.f_type != HUGETLBFS_MAGIC) {
 322         /* Explicit mempath, but it's ordinary pages */
 323         return getpagesize();
 324     }
 325
 326     /* It's hugepage, return the huge page size */
 327     return fs.f_bsize;
 328 }
 329
 330 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 331 {
 332     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 333         return true;
 334     }
 335
 336     return (1ul << shift) <= rampgsize;
 337 }
 338
 339 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 340 {
 341     static struct kvm_ppc_smmu_info smmu_info;
 342     static bool has_smmu_info;
 343     CPUPPCState *env = &cpu->env;
 344     long rampagesize;
 345     int iq, ik, jq, jk;
 346
 347     /* We only handle page sizes for 64-bit server guests for now */
 348     if (!(env->mmu_model & POWERPC_MMU_64)) {
 349         return;
 350     }
 351
 352     /* Collect MMU info from kernel if not already */
 353     if (!has_smmu_info) {
 354         kvm_get_smmu_info(cpu, &smmu_info);
 355         has_smmu_info = true;
 356     }
 357
 358     rampagesize = getrampagesize();
 359
 360     /* Convert to QEMU form */
 361     memset(&env->sps, 0, sizeof(env->sps));
 362
 363     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 364         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 365         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 366
 367         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 368                                  ksps->page_shift)) {
 369             continue;
 370         }
 371         qsps->page_shift = ksps->page_shift;
 372         qsps->slb_enc = ksps->slb_enc;
 373         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 374             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 375                                      ksps->enc[jk].page_shift)) {
 376                 continue;
 377             }
 378             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 379             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 380             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 381                 break;
 382             }
 383         }
 384         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 385             break;
 386         }
 387     }
 388     env->slb_nr = smmu_info.slb_size;
 389     if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 390         env->mmu_model |= POWERPC_MMU_1TSEG;
 391     } else {
 392         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 393     }
 394 }
 395 #else /* defined (TARGET_PPC64) */
 396
 397 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 398 {
 399 }
 400
 401 #endif /* !defined (TARGET_PPC64) */
 402
 403 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 404 {
 405     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 406 }
 407
 408 int kvm_arch_init_vcpu(CPUState *cs)
 409 {
 410     PowerPCCPU *cpu = POWERPC_CPU(cs);
 411     CPUPPCState *cenv = &cpu->env;
 412     int ret;
 413
 414     /* Gather server mmu info from KVM and update the CPU state */
 415     kvm_fixup_page_sizes(cpu);
 416
 417     /* Synchronize sregs with kvm */
 418     ret = kvm_arch_sync_sregs(cpu);
 419     if (ret) {
 420         return ret;
 421     }
 422
 423     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 424
 425     /* Some targets support access to KVM's guest TLB. */
 426     switch (cenv->mmu_model) {
 427     case POWERPC_MMU_BOOKE206:
 428         ret = kvm_booke206_tlb_init(cpu);
 429         break;
 430     default:
 431         break;
 432     }
 433
 434     return ret;
 435 }
 436
 437 void kvm_arch_reset_vcpu(CPUState *cpu)
 438 {
 439 }
 440
 441 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 442 {
 443     CPUPPCState *env = &cpu->env;
 444     CPUState *cs = CPU(cpu);
 445     struct kvm_dirty_tlb dirty_tlb;
 446     unsigned char *bitmap;
 447     int ret;
 448
 449     if (!env->kvm_sw_tlb) {
 450         return;
 451     }
 452
 453     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 454     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 455
 456     dirty_tlb.bitmap = (uintptr_t)bitmap;
 457     dirty_tlb.num_dirty = env->nb_tlb;
 458
 459     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 460     if (ret) {
 461         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 462                 __func__, strerror(-ret));
 463     }
 464
 465     g_free(bitmap);
 466 }
 467
 468 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 469 {
 470     PowerPCCPU *cpu = POWERPC_CPU(cs);
 471     CPUPPCState *env = &cpu->env;
 472     union {
 473         uint32_t u32;
 474         uint64_t u64;
 475     } val;
 476     struct kvm_one_reg reg = {
 477         .id = id,
 478         .addr = (uintptr_t) &val,
 479     };
 480     int ret;
 481
 482     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 483     if (ret != 0) {
 484         trace_kvm_failed_spr_get(spr, strerror(errno));
 485     } else {
 486         switch (id & KVM_REG_SIZE_MASK) {
 487         case KVM_REG_SIZE_U32:
 488             env->spr[spr] = val.u32;
 489             break;
 490
 491         case KVM_REG_SIZE_U64:
 492             env->spr[spr] = val.u64;
 493             break;
 494
 495         default:
 496             /* Don't handle this size yet */
 497             abort();
 498         }
 499     }
 500 }
 501
 502 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 503 {
 504     PowerPCCPU *cpu = POWERPC_CPU(cs);
 505     CPUPPCState *env = &cpu->env;
 506     union {
 507         uint32_t u32;
 508         uint64_t u64;
 509     } val;
 510     struct kvm_one_reg reg = {
 511         .id = id,
 512         .addr = (uintptr_t) &val,
 513     };
 514     int ret;
 515
 516     switch (id & KVM_REG_SIZE_MASK) {
 517     case KVM_REG_SIZE_U32:
 518         val.u32 = env->spr[spr];
 519         break;
 520
 521     case KVM_REG_SIZE_U64:
 522         val.u64 = env->spr[spr];
 523         break;
 524
 525     default:
 526         /* Don't handle this size yet */
 527         abort();
 528     }
 529
 530     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 531     if (ret != 0) {
 532         trace_kvm_failed_spr_set(spr, strerror(errno));
 533     }
 534 }
 535
 536 static int kvm_put_fp(CPUState *cs)
 537 {
 538     PowerPCCPU *cpu = POWERPC_CPU(cs);
 539     CPUPPCState *env = &cpu->env;
 540     struct kvm_one_reg reg;
 541     int i;
 542     int ret;
 543
 544     if (env->insns_flags & PPC_FLOAT) {
 545         uint64_t fpscr = env->fpscr;
 546         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 547
 548         reg.id = KVM_REG_PPC_FPSCR;
 549         reg.addr = (uintptr_t)&fpscr;
 550         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 551         if (ret < 0) {
 552             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 553             return ret;
 554         }
 555
 556         for (i = 0; i < 32; i++) {
 557             uint64_t vsr[2];
 558
 559             vsr[0] = float64_val(env->fpr[i]);
 560             vsr[1] = env->vsr[i];
 561             reg.addr = (uintptr_t) &vsr;
 562             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 563
 564             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 565             if (ret < 0) {
 566                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 567                         i, strerror(errno));
 568                 return ret;
 569             }
 570         }
 571     }
 572
 573     if (env->insns_flags & PPC_ALTIVEC) {
 574         reg.id = KVM_REG_PPC_VSCR;
 575         reg.addr = (uintptr_t)&env->vscr;
 576         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 577         if (ret < 0) {
 578             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 579             return ret;
 580         }
 581
 582         for (i = 0; i < 32; i++) {
 583             reg.id = KVM_REG_PPC_VR(i);
 584             reg.addr = (uintptr_t)&env->avr[i];
 585             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 586             if (ret < 0) {
 587                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 588                 return ret;
 589             }
 590         }
 591     }
 592
 593     return 0;
 594 }
 595
 596 static int kvm_get_fp(CPUState *cs)
 597 {
 598     PowerPCCPU *cpu = POWERPC_CPU(cs);
 599     CPUPPCState *env = &cpu->env;
 600     struct kvm_one_reg reg;
 601     int i;
 602     int ret;
 603
 604     if (env->insns_flags & PPC_FLOAT) {
 605         uint64_t fpscr;
 606         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 607
 608         reg.id = KVM_REG_PPC_FPSCR;
 609         reg.addr = (uintptr_t)&fpscr;
 610         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 611         if (ret < 0) {
 612             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 613             return ret;
 614         } else {
 615             env->fpscr = fpscr;
 616         }
 617
 618         for (i = 0; i < 32; i++) {
 619             uint64_t vsr[2];
 620
 621             reg.addr = (uintptr_t) &vsr;
 622             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 623
 624             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 625             if (ret < 0) {
 626                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 627                         vsx ? "VSR" : "FPR", i, strerror(errno));
 628                 return ret;
 629             } else {
 630                 env->fpr[i] = vsr[0];
 631                 if (vsx) {
 632                     env->vsr[i] = vsr[1];
 633                 }
 634             }
 635         }
 636     }
 637
 638     if (env->insns_flags & PPC_ALTIVEC) {
 639         reg.id = KVM_REG_PPC_VSCR;
 640         reg.addr = (uintptr_t)&env->vscr;
 641         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 642         if (ret < 0) {
 643             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 644             return ret;
 645         }
 646
 647         for (i = 0; i < 32; i++) {
 648             reg.id = KVM_REG_PPC_VR(i);
 649             reg.addr = (uintptr_t)&env->avr[i];
 650             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 651             if (ret < 0) {
 652                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 653                         i, strerror(errno));
 654                 return ret;
 655             }
 656         }
 657     }
 658
 659     return 0;
 660 }
 661
 662 #if defined(TARGET_PPC64)
 663 static int kvm_get_vpa(CPUState *cs)
 664 {
 665     PowerPCCPU *cpu = POWERPC_CPU(cs);
 666     CPUPPCState *env = &cpu->env;
 667     struct kvm_one_reg reg;
 668     int ret;
 669
 670     reg.id = KVM_REG_PPC_VPA_ADDR;
 671     reg.addr = (uintptr_t)&env->vpa_addr;
 672     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 673     if (ret < 0) {
 674         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 675         return ret;
 676     }
 677
 678     assert((uintptr_t)&env->slb_shadow_size
 679            == ((uintptr_t)&env->slb_shadow_addr + 8));
 680     reg.id = KVM_REG_PPC_VPA_SLB;
 681     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 682     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 683     if (ret < 0) {
 684         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 685                 strerror(errno));
 686         return ret;
 687     }
 688
 689     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 690     reg.id = KVM_REG_PPC_VPA_DTL;
 691     reg.addr = (uintptr_t)&env->dtl_addr;
 692     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 693     if (ret < 0) {
 694         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 695                 strerror(errno));
 696         return ret;
 697     }
 698
 699     return 0;
 700 }
 701
 702 static int kvm_put_vpa(CPUState *cs)
 703 {
 704     PowerPCCPU *cpu = POWERPC_CPU(cs);
 705     CPUPPCState *env = &cpu->env;
 706     struct kvm_one_reg reg;
 707     int ret;
 708
 709     /* SLB shadow or DTL can't be registered unless a master VPA is
 710      * registered.  That means when restoring state, if a VPA *is*
 711      * registered, we need to set that up first.  If not, we need to
 712      * deregister the others before deregistering the master VPA */
 713     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 714
 715     if (env->vpa_addr) {
 716         reg.id = KVM_REG_PPC_VPA_ADDR;
 717         reg.addr = (uintptr_t)&env->vpa_addr;
 718         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 719         if (ret < 0) {
 720             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 721             return ret;
 722         }
 723     }
 724
 725     assert((uintptr_t)&env->slb_shadow_size
 726            == ((uintptr_t)&env->slb_shadow_addr + 8));
 727     reg.id = KVM_REG_PPC_VPA_SLB;
 728     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 729     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 730     if (ret < 0) {
 731         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 732         return ret;
 733     }
 734
 735     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 736     reg.id = KVM_REG_PPC_VPA_DTL;
 737     reg.addr = (uintptr_t)&env->dtl_addr;
 738     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 739     if (ret < 0) {
 740         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 741                 strerror(errno));
 742         return ret;
 743     }
 744
 745     if (!env->vpa_addr) {
 746         reg.id = KVM_REG_PPC_VPA_ADDR;
 747         reg.addr = (uintptr_t)&env->vpa_addr;
 748         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 749         if (ret < 0) {
 750             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 751             return ret;
 752         }
 753     }
 754
 755     return 0;
 756 }
 757 #endif /* TARGET_PPC64 */
 758
 759 int kvm_arch_put_registers(CPUState *cs, int level)
 760 {
 761     PowerPCCPU *cpu = POWERPC_CPU(cs);
 762     CPUPPCState *env = &cpu->env;
 763     struct kvm_regs regs;
 764     int ret;
 765     int i;
 766
 767     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 768     if (ret < 0) {
 769         return ret;
 770     }
 771
 772     regs.ctr = env->ctr;
 773     regs.lr  = env->lr;
 774     regs.xer = cpu_read_xer(env);
 775     regs.msr = env->msr;
 776     regs.pc = env->nip;
 777
 778     regs.srr0 = env->spr[SPR_SRR0];
 779     regs.srr1 = env->spr[SPR_SRR1];
 780
 781     regs.sprg0 = env->spr[SPR_SPRG0];
 782     regs.sprg1 = env->spr[SPR_SPRG1];
 783     regs.sprg2 = env->spr[SPR_SPRG2];
 784     regs.sprg3 = env->spr[SPR_SPRG3];
 785     regs.sprg4 = env->spr[SPR_SPRG4];
 786     regs.sprg5 = env->spr[SPR_SPRG5];
 787     regs.sprg6 = env->spr[SPR_SPRG6];
 788     regs.sprg7 = env->spr[SPR_SPRG7];
 789
 790     regs.pid = env->spr[SPR_BOOKE_PID];
 791
 792     for (i = 0;i < 32; i++)
 793         regs.gpr[i] = env->gpr[i];
 794
 795     regs.cr = 0;
 796     for (i = 0; i < 8; i++) {
 797         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 798     }
 799
 800     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 801     if (ret < 0)
 802         return ret;
 803
 804     kvm_put_fp(cs);
 805
 806     if (env->tlb_dirty) {
 807         kvm_sw_tlb_put(cpu);
 808         env->tlb_dirty = false;
 809     }
 810
 811     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 812         struct kvm_sregs sregs;
 813
 814         sregs.pvr = env->spr[SPR_PVR];
 815
 816         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 817
 818         /* Sync SLB */
 819 #ifdef TARGET_PPC64
 820         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 821             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 822             if (env->slb[i].esid & SLB_ESID_V) {
 823                 sregs.u.s.ppc64.slb[i].slbe |= i;
 824             }
 825             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 826         }
 827 #endif
 828
 829         /* Sync SRs */
 830         for (i = 0; i < 16; i++) {
 831             sregs.u.s.ppc32.sr[i] = env->sr[i];
 832         }
 833
 834         /* Sync BATs */
 835         for (i = 0; i < 8; i++) {
 836             /* Beware. We have to swap upper and lower bits here */
 837             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 838                 | env->DBAT[1][i];
 839             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 840                 | env->IBAT[1][i];
 841         }
 842
 843         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 844         if (ret) {
 845             return ret;
 846         }
 847     }
 848
 849     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 850         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 851     }
 852
 853     if (cap_one_reg) {
 854         int i;
 855
 856         /* We deliberately ignore errors here, for kernels which have
 857          * the ONE_REG calls, but don't support the specific
 858          * registers, there's a reasonable chance things will still
 859          * work, at least until we try to migrate. */
 860         for (i = 0; i < 1024; i++) {
 861             uint64_t id = env->spr_cb[i].one_reg_id;
 862
 863             if (id != 0) {
 864                 kvm_put_one_spr(cs, id, i);
 865             }
 866         }
 867
 868 #ifdef TARGET_PPC64
 869         if (cap_papr) {
 870             if (kvm_put_vpa(cs) < 0) {
 871                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 872             }
 873         }
 874 #endif /* TARGET_PPC64 */
 875     }
 876
 877     return ret;
 878 }
 879
 880 int kvm_arch_get_registers(CPUState *cs)
 881 {
 882     PowerPCCPU *cpu = POWERPC_CPU(cs);
 883     CPUPPCState *env = &cpu->env;
 884     struct kvm_regs regs;
 885     struct kvm_sregs sregs;
 886     uint32_t cr;
 887     int i, ret;
 888
 889     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 890     if (ret < 0)
 891         return ret;
 892
 893     cr = regs.cr;
 894     for (i = 7; i >= 0; i--) {
 895         env->crf[i] = cr & 15;
 896         cr >>= 4;
 897     }
 898
 899     env->ctr = regs.ctr;
 900     env->lr = regs.lr;
 901     cpu_write_xer(env, regs.xer);
 902     env->msr = regs.msr;
 903     env->nip = regs.pc;
 904
 905     env->spr[SPR_SRR0] = regs.srr0;
 906     env->spr[SPR_SRR1] = regs.srr1;
 907
 908     env->spr[SPR_SPRG0] = regs.sprg0;
 909     env->spr[SPR_SPRG1] = regs.sprg1;
 910     env->spr[SPR_SPRG2] = regs.sprg2;
 911     env->spr[SPR_SPRG3] = regs.sprg3;
 912     env->spr[SPR_SPRG4] = regs.sprg4;
 913     env->spr[SPR_SPRG5] = regs.sprg5;
 914     env->spr[SPR_SPRG6] = regs.sprg6;
 915     env->spr[SPR_SPRG7] = regs.sprg7;
 916
 917     env->spr[SPR_BOOKE_PID] = regs.pid;
 918
 919     for (i = 0;i < 32; i++)
 920         env->gpr[i] = regs.gpr[i];
 921
 922     kvm_get_fp(cs);
 923
 924     if (cap_booke_sregs) {
 925         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 926         if (ret < 0) {
 927             return ret;
 928         }
 929
 930         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 931             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 932             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 933             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 934             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 935             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 936             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 937             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 938             env->spr[SPR_DECR] = sregs.u.e.dec;
 939             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 940             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 941             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 942         }
 943
 944         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 945             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 946             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 947             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 948             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 949             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 950         }
 951
 952         if (sregs.u.e.features & KVM_SREGS_E_64) {
 953             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 954         }
 955
 956         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 957             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 958         }
 959
 960         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 961             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 962             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 963             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 964             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 965             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 966             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 967             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 968             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 969             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 970             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 971             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 972             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 973             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 974             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 975             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 976             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 977
 978             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 979                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 980                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 981                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 982             }
 983
 984             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 985                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 986             }
 987
 988             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 989                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 990                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 991             }
 992         }
 993
 994         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 995             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 996             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 997             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 998             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 999             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1000             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1001             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1002             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1003             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1004             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1005         }
1006
1007         if (sregs.u.e.features & KVM_SREGS_EXP) {
1008             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1009         }
1010
1011         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1012             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1013             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1014         }
1015
1016         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1017             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1018             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1019             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1020
1021             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1022                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1023                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1024             }
1025         }
1026     }
1027
1028     if (cap_segstate) {
1029         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1030         if (ret < 0) {
1031             return ret;
1032         }
1033
1034         if (!env->external_htab) {
1035             ppc_store_sdr1(env, sregs.u.s.sdr1);
1036         }
1037
1038         /* Sync SLB */
1039 #ifdef TARGET_PPC64
1040         /*
1041          * The packed SLB array we get from KVM_GET_SREGS only contains
1042          * information about valid entries. So we flush our internal
1043          * copy to get rid of stale ones, then put all valid SLB entries
1044          * back in.
1045          */
1046         memset(env->slb, 0, sizeof(env->slb));
1047         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1048             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1049             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1050             /*
1051              * Only restore valid entries
1052              */
1053             if (rb & SLB_ESID_V) {
1054                 ppc_store_slb(env, rb, rs);
1055             }
1056         }
1057 #endif
1058
1059         /* Sync SRs */
1060         for (i = 0; i < 16; i++) {
1061             env->sr[i] = sregs.u.s.ppc32.sr[i];
1062         }
1063
1064         /* Sync BATs */
1065         for (i = 0; i < 8; i++) {
1066             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1067             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1068             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1069             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1070         }
1071     }
1072
1073     if (cap_hior) {
1074         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1075     }
1076
1077     if (cap_one_reg) {
1078         int i;
1079
1080         /* We deliberately ignore errors here, for kernels which have
1081          * the ONE_REG calls, but don't support the specific
1082          * registers, there's a reasonable chance things will still
1083          * work, at least until we try to migrate. */
1084         for (i = 0; i < 1024; i++) {
1085             uint64_t id = env->spr_cb[i].one_reg_id;
1086
1087             if (id != 0) {
1088                 kvm_get_one_spr(cs, id, i);
1089             }
1090         }
1091
1092 #ifdef TARGET_PPC64
1093         if (cap_papr) {
1094             if (kvm_get_vpa(cs) < 0) {
1095                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1096             }
1097         }
1098 #endif
1099     }
1100
1101     return 0;
1102 }
1103
1104 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1105 {
1106     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1107
1108     if (irq != PPC_INTERRUPT_EXT) {
1109         return 0;
1110     }
1111
1112     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1113         return 0;
1114     }
1115
1116     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1117
1118     return 0;
1119 }
1120
1121 #if defined(TARGET_PPCEMB)
1122 #define PPC_INPUT_INT PPC40x_INPUT_INT
1123 #elif defined(TARGET_PPC64)
1124 #define PPC_INPUT_INT PPC970_INPUT_INT
1125 #else
1126 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1127 #endif
1128
1129 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1130 {
1131     PowerPCCPU *cpu = POWERPC_CPU(cs);
1132     CPUPPCState *env = &cpu->env;
1133     int r;
1134     unsigned irq;
1135
1136     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1137      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1138     if (!cap_interrupt_level &&
1139         run->ready_for_interrupt_injection &&
1140         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1141         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1142     {
1143         /* For now KVM disregards the 'irq' argument. However, in the
1144          * future KVM could cache it in-kernel to avoid a heavyweight exit
1145          * when reading the UIC.
1146          */
1147         irq = KVM_INTERRUPT_SET;
1148
1149         DPRINTF("injected interrupt %d\n", irq);
1150         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1151         if (r < 0) {
1152             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1153         }
1154
1155         /* Always wake up soon in case the interrupt was level based */
1156         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1157                        (get_ticks_per_sec() / 50));
1158     }
1159
1160     /* We don't know if there are more interrupts pending after this. However,
1161      * the guest will return to userspace in the course of handling this one
1162      * anyways, so we will get a chance to deliver the rest. */
1163 }
1164
1165 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1166 {
1167 }
1168
1169 int kvm_arch_process_async_events(CPUState *cs)
1170 {
1171     return cs->halted;
1172 }
1173
1174 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1175 {
1176     CPUState *cs = CPU(cpu);
1177     CPUPPCState *env = &cpu->env;
1178
1179     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1180         cs->halted = 1;
1181         cs->exception_index = EXCP_HLT;
1182     }
1183
1184     return 0;
1185 }
1186
1187 /* map dcr access to existing qemu dcr emulation */
1188 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1189 {
1190     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1191         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1192
1193     return 0;
1194 }
1195
1196 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1197 {
1198     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1199         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1200
1201     return 0;
1202 }
1203
1204 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1205 {
1206     PowerPCCPU *cpu = POWERPC_CPU(cs);
1207     CPUPPCState *env = &cpu->env;
1208     int ret;
1209
1210     switch (run->exit_reason) {
1211     case KVM_EXIT_DCR:
1212         if (run->dcr.is_write) {
1213             DPRINTF("handle dcr write\n");
1214             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1215         } else {
1216             DPRINTF("handle dcr read\n");
1217             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1218         }
1219         break;
1220     case KVM_EXIT_HLT:
1221         DPRINTF("handle halt\n");
1222         ret = kvmppc_handle_halt(cpu);
1223         break;
1224 #if defined(TARGET_PPC64)
1225     case KVM_EXIT_PAPR_HCALL:
1226         DPRINTF("handle PAPR hypercall\n");
1227         run->papr_hcall.ret = spapr_hypercall(cpu,
1228                                               run->papr_hcall.nr,
1229                                               run->papr_hcall.args);
1230         ret = 0;
1231         break;
1232 #endif
1233     case KVM_EXIT_EPR:
1234         DPRINTF("handle epr\n");
1235         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1236         ret = 0;
1237         break;
1238     case KVM_EXIT_WATCHDOG:
1239         DPRINTF("handle watchdog expiry\n");
1240         watchdog_perform_action();
1241         ret = 0;
1242         break;
1243
1244     default:
1245         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1246         ret = -1;
1247         break;
1248     }
1249
1250     return ret;
1251 }
1252
1253 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1254 {
1255     CPUState *cs = CPU(cpu);
1256     uint32_t bits = tsr_bits;
1257     struct kvm_one_reg reg = {
1258         .id = KVM_REG_PPC_OR_TSR,
1259         .addr = (uintptr_t) &bits,
1260     };
1261
1262     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1263 }
1264
1265 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1266 {
1267
1268     CPUState *cs = CPU(cpu);
1269     uint32_t bits = tsr_bits;
1270     struct kvm_one_reg reg = {
1271         .id = KVM_REG_PPC_CLEAR_TSR,
1272         .addr = (uintptr_t) &bits,
1273     };
1274
1275     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1276 }
1277
1278 int kvmppc_set_tcr(PowerPCCPU *cpu)
1279 {
1280     CPUState *cs = CPU(cpu);
1281     CPUPPCState *env = &cpu->env;
1282     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1283
1284     struct kvm_one_reg reg = {
1285         .id = KVM_REG_PPC_TCR,
1286         .addr = (uintptr_t) &tcr,
1287     };
1288
1289     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1290 }
1291
1292 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1293 {
1294     CPUState *cs = CPU(cpu);
1295     struct kvm_enable_cap encap = {};
1296     int ret;
1297
1298     if (!kvm_enabled()) {
1299         return -1;
1300     }
1301
1302     if (!cap_ppc_watchdog) {
1303         printf("warning: KVM does not support watchdog");
1304         return -1;
1305     }
1306
1307     encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1308     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1309     if (ret < 0) {
1310         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1311                 __func__, strerror(-ret));
1312         return ret;
1313     }
1314
1315     return ret;
1316 }
1317
1318 static int read_cpuinfo(const char *field, char *value, int len)
1319 {
1320     FILE *f;
1321     int ret = -1;
1322     int field_len = strlen(field);
1323     char line[512];
1324
1325     f = fopen("/proc/cpuinfo", "r");
1326     if (!f) {
1327         return -1;
1328     }
1329
1330     do {
1331         if(!fgets(line, sizeof(line), f)) {
1332             break;
1333         }
1334         if (!strncmp(line, field, field_len)) {
1335             pstrcpy(value, len, line);
1336             ret = 0;
1337             break;
1338         }
1339     } while(*line);
1340
1341     fclose(f);
1342
1343     return ret;
1344 }
1345
1346 uint32_t kvmppc_get_tbfreq(void)
1347 {
1348     char line[512];
1349     char *ns;
1350     uint32_t retval = get_ticks_per_sec();
1351
1352     if (read_cpuinfo("timebase", line, sizeof(line))) {
1353         return retval;
1354     }
1355
1356     if (!(ns = strchr(line, ':'))) {
1357         return retval;
1358     }
1359
1360     ns++;
1361
1362     retval = atoi(ns);
1363     return retval;
1364 }
1365
1366 /* Try to find a device tree node for a CPU with clock-frequency property */
1367 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1368 {
1369     struct dirent *dirp;
1370     DIR *dp;
1371
1372     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1373         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1374         return -1;
1375     }
1376
1377     buf[0] = '\0';
1378     while ((dirp = readdir(dp)) != NULL) {
1379         FILE *f;
1380         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1381                  dirp->d_name);
1382         f = fopen(buf, "r");
1383         if (f) {
1384             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1385             fclose(f);
1386             break;
1387         }
1388         buf[0] = '\0';
1389     }
1390     closedir(dp);
1391     if (buf[0] == '\0') {
1392         printf("Unknown host!\n");
1393         return -1;
1394     }
1395
1396     return 0;
1397 }
1398
1399 /* Read a CPU node property from the host device tree that's a single
1400  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1401  * (can't find or open the property, or doesn't understand the
1402  * format) */
1403 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1404 {
1405     char buf[PATH_MAX];
1406     union {
1407         uint32_t v32;
1408         uint64_t v64;
1409     } u;
1410     FILE *f;
1411     int len;
1412
1413     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1414         return -1;
1415     }
1416
1417     strncat(buf, "/", sizeof(buf) - strlen(buf));
1418     strncat(buf, propname, sizeof(buf) - strlen(buf));
1419
1420     f = fopen(buf, "rb");
1421     if (!f) {
1422         return -1;
1423     }
1424
1425     len = fread(&u, 1, sizeof(u), f);
1426     fclose(f);
1427     switch (len) {
1428     case 4:
1429         /* property is a 32-bit quantity */
1430         return be32_to_cpu(u.v32);
1431     case 8:
1432         return be64_to_cpu(u.v64);
1433     }
1434
1435     return 0;
1436 }
1437
1438 uint64_t kvmppc_get_clockfreq(void)
1439 {
1440     return kvmppc_read_int_cpu_dt("clock-frequency");
1441 }
1442
1443 uint32_t kvmppc_get_vmx(void)
1444 {
1445     return kvmppc_read_int_cpu_dt("ibm,vmx");
1446 }
1447
1448 uint32_t kvmppc_get_dfp(void)
1449 {
1450     return kvmppc_read_int_cpu_dt("ibm,dfp");
1451 }
1452
1453 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1454  {
1455      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1456      CPUState *cs = CPU(cpu);
1457
1458     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1459         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1460         return 0;
1461     }
1462
1463     return 1;
1464 }
1465
1466 int kvmppc_get_hasidle(CPUPPCState *env)
1467 {
1468     struct kvm_ppc_pvinfo pvinfo;
1469
1470     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1471         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1472         return 1;
1473     }
1474
1475     return 0;
1476 }
1477
1478 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1479 {
1480     uint32_t *hc = (uint32_t*)buf;
1481     struct kvm_ppc_pvinfo pvinfo;
1482
1483     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1484         memcpy(buf, pvinfo.hcall, buf_len);
1485         return 0;
1486     }
1487
1488     /*
1489      * Fallback to always fail hypercalls:
1490      *
1491      *     li r3, -1
1492      *     nop
1493      *     nop
1494      *     nop
1495      */
1496
1497     hc[0] = 0x3860ffff;
1498     hc[1] = 0x60000000;
1499     hc[2] = 0x60000000;
1500     hc[3] = 0x60000000;
1501
1502     return 0;
1503 }
1504
1505 void kvmppc_set_papr(PowerPCCPU *cpu)
1506 {
1507     CPUState *cs = CPU(cpu);
1508     struct kvm_enable_cap cap = {};
1509     int ret;
1510
1511     cap.cap = KVM_CAP_PPC_PAPR;
1512     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1513
1514     if (ret) {
1515         cpu_abort(cs, "This KVM version does not support PAPR\n");
1516     }
1517
1518     /* Update the capability flag so we sync the right information
1519      * with kvm */
1520     cap_papr = 1;
1521 }
1522
1523 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1524 {
1525     CPUState *cs = CPU(cpu);
1526     struct kvm_enable_cap cap = {};
1527     int ret;
1528
1529     cap.cap = KVM_CAP_PPC_EPR;
1530     cap.args[0] = mpic_proxy;
1531     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1532
1533     if (ret && mpic_proxy) {
1534         cpu_abort(cs, "This KVM version does not support EPR\n");
1535     }
1536 }
1537
1538 int kvmppc_smt_threads(void)
1539 {
1540     return cap_ppc_smt ? cap_ppc_smt : 1;
1541 }
1542
1543 #ifdef TARGET_PPC64
1544 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1545 {
1546     void *rma;
1547     off_t size;
1548     int fd;
1549     struct kvm_allocate_rma ret;
1550     MemoryRegion *rma_region;
1551
1552     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1553      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1554      *                      not necessary on this hardware
1555      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1556      *
1557      * FIXME: We should allow the user to force contiguous RMA
1558      * allocation in the cap_ppc_rma==1 case.
1559      */
1560     if (cap_ppc_rma < 2) {
1561         return 0;
1562     }
1563
1564     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1565     if (fd < 0) {
1566         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1567                 strerror(errno));
1568         return -1;
1569     }
1570
1571     size = MIN(ret.rma_size, 256ul << 20);
1572
1573     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1574     if (rma == MAP_FAILED) {
1575         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1576         return -1;
1577     };
1578
1579     rma_region = g_new(MemoryRegion, 1);
1580     memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1581     vmstate_register_ram_global(rma_region);
1582     memory_region_add_subregion(sysmem, 0, rma_region);
1583
1584     return size;
1585 }
1586
1587 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1588 {
1589     struct kvm_ppc_smmu_info info;
1590     long rampagesize, best_page_shift;
1591     int i;
1592
1593     if (cap_ppc_rma >= 2) {
1594         return current_size;
1595     }
1596
1597     /* Find the largest hardware supported page size that's less than
1598      * or equal to the (logical) backing page size of guest RAM */
1599     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1600     rampagesize = getrampagesize();
1601     best_page_shift = 0;
1602
1603     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1604         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1605
1606         if (!sps->page_shift) {
1607             continue;
1608         }
1609
1610         if ((sps->page_shift > best_page_shift)
1611             && ((1UL << sps->page_shift) <= rampagesize)) {
1612             best_page_shift = sps->page_shift;
1613         }
1614     }
1615
1616     return MIN(current_size,
1617                1ULL << (best_page_shift + hash_shift - 7));
1618 }
1619 #endif
1620
1621 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1622 {
1623     struct kvm_create_spapr_tce args = {
1624         .liobn = liobn,
1625         .window_size = window_size,
1626     };
1627     long len;
1628     int fd;
1629     void *table;
1630
1631     /* Must set fd to -1 so we don't try to munmap when called for
1632      * destroying the table, which the upper layers -will- do
1633      */
1634     *pfd = -1;
1635     if (!cap_spapr_tce) {
1636         return NULL;
1637     }
1638
1639     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1640     if (fd < 0) {
1641         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1642                 liobn);
1643         return NULL;
1644     }
1645
1646     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1647     /* FIXME: round this up to page size */
1648
1649     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1650     if (table == MAP_FAILED) {
1651         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1652                 liobn);
1653         close(fd);
1654         return NULL;
1655     }
1656
1657     *pfd = fd;
1658     return table;
1659 }
1660
1661 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1662 {
1663     long len;
1664
1665     if (fd < 0) {
1666         return -1;
1667     }
1668
1669     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1670     if ((munmap(table, len) < 0) ||
1671         (close(fd) < 0)) {
1672         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1673                 strerror(errno));
1674         /* Leak the table */
1675     }
1676
1677     return 0;
1678 }
1679
1680 int kvmppc_reset_htab(int shift_hint)
1681 {
1682     uint32_t shift = shift_hint;
1683
1684     if (!kvm_enabled()) {
1685         /* Full emulation, tell caller to allocate htab itself */
1686         return 0;
1687     }
1688     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1689         int ret;
1690         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1691         if (ret == -ENOTTY) {
1692             /* At least some versions of PR KVM advertise the
1693              * capability, but don't implement the ioctl().  Oops.
1694              * Return 0 so that we allocate the htab in qemu, as is
1695              * correct for PR. */
1696             return 0;
1697         } else if (ret < 0) {
1698             return ret;
1699         }
1700         return shift;
1701     }
1702
1703     /* We have a kernel that predates the htab reset calls.  For PR
1704      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1705      * this era, it has allocated a 16MB fixed size hash table
1706      * already.  Kernels of this era have the GET_PVINFO capability
1707      * only on PR, so we use this hack to determine the right
1708      * answer */
1709     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1710         /* PR - tell caller to allocate htab */
1711         return 0;
1712     } else {
1713         /* HV - assume 16MB kernel allocated htab */
1714         return 24;
1715     }
1716 }
1717
1718 static inline uint32_t mfpvr(void)
1719 {
1720     uint32_t pvr;
1721
1722     asm ("mfpvr %0"
1723          : "=r"(pvr));
1724     return pvr;
1725 }
1726
1727 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1728 {
1729     if (on) {
1730         *word |= flags;
1731     } else {
1732         *word &= ~flags;
1733     }
1734 }
1735
1736 static void kvmppc_host_cpu_initfn(Object *obj)
1737 {
1738     assert(kvm_enabled());
1739 }
1740
1741 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1742 {
1743     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1744     uint32_t vmx = kvmppc_get_vmx();
1745     uint32_t dfp = kvmppc_get_dfp();
1746     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1747     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1748
1749     /* Now fix up the class with information we can query from the host */
1750     pcc->pvr = mfpvr();
1751
1752     if (vmx != -1) {
1753         /* Only override when we know what the host supports */
1754         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1755         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1756     }
1757     if (dfp != -1) {
1758         /* Only override when we know what the host supports */
1759         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1760     }
1761
1762     if (dcache_size != -1) {
1763         pcc->l1_dcache_size = dcache_size;
1764     }
1765
1766     if (icache_size != -1) {
1767         pcc->l1_icache_size = icache_size;
1768     }
1769 }
1770
1771 bool kvmppc_has_cap_epr(void)
1772 {
1773     return cap_epr;
1774 }
1775
1776 bool kvmppc_has_cap_htab_fd(void)
1777 {
1778     return cap_htab_fd;
1779 }
1780
1781 static int kvm_ppc_register_host_cpu_type(void)
1782 {
1783     TypeInfo type_info = {
1784         .name = TYPE_HOST_POWERPC_CPU,
1785         .instance_init = kvmppc_host_cpu_initfn,
1786         .class_init = kvmppc_host_cpu_class_init,
1787     };
1788     uint32_t host_pvr = mfpvr();
1789     PowerPCCPUClass *pvr_pcc;
1790
1791     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1792     if (pvr_pcc == NULL) {
1793         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1794     }
1795     if (pvr_pcc == NULL) {
1796         return -1;
1797     }
1798     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1799     type_register(&type_info);
1800     return 0;
1801 }
1802
1803 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1804 {
1805     struct kvm_rtas_token_args args = {
1806         .token = token,
1807     };
1808
1809     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1810         return -ENOENT;
1811     }
1812
1813     strncpy(args.name, function, sizeof(args.name));
1814
1815     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1816 }
1817
1818 int kvmppc_get_htab_fd(bool write)
1819 {
1820     struct kvm_get_htab_fd s = {
1821         .flags = write ? KVM_GET_HTAB_WRITE : 0,
1822         .start_index = 0,
1823     };
1824
1825     if (!cap_htab_fd) {
1826         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1827         return -1;
1828     }
1829
1830     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1831 }
1832
1833 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1834 {
1835     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1836     uint8_t buf[bufsize];
1837     ssize_t rc;
1838
1839     do {
1840         rc = read(fd, buf, bufsize);
1841         if (rc < 0) {
1842             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1843                     strerror(errno));
1844             return rc;
1845         } else if (rc) {
1846             /* Kernel already retuns data in BE format for the file */
1847             qemu_put_buffer(f, buf, rc);
1848         }
1849     } while ((rc != 0)
1850              && ((max_ns < 0)
1851                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1852
1853     return (rc == 0) ? 1 : 0;
1854 }
1855
1856 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1857                            uint16_t n_valid, uint16_t n_invalid)
1858 {
1859     struct kvm_get_htab_header *buf;
1860     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1861     ssize_t rc;
1862
1863     buf = alloca(chunksize);
1864     /* This is KVM on ppc, so this is all big-endian */
1865     buf->index = index;
1866     buf->n_valid = n_valid;
1867     buf->n_invalid = n_invalid;
1868
1869     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1870
1871     rc = write(fd, buf, chunksize);
1872     if (rc < 0) {
1873         fprintf(stderr, "Error writing KVM hash table: %s\n",
1874                 strerror(errno));
1875         return rc;
1876     }
1877     if (rc != chunksize) {
1878         /* We should never get a short write on a single chunk */
1879         fprintf(stderr, "Short write, restoring KVM hash table\n");
1880         return -1;
1881     }
1882     return 0;
1883 }
1884
1885 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1886 {
1887     return true;
1888 }
1889
1890 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1891 {
1892     return 1;
1893 }
1894
1895 int kvm_arch_on_sigbus(int code, void *addr)
1896 {
1897     return 1;
1898 }
1899
1900 void kvm_arch_init_irq_routing(KVMState *s)
1901 {
1902 }
1903
1904 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1905 {
1906     return -EINVAL;
1907 }
1908
1909 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1910 {
1911     return -EINVAL;
1912 }
1913
1914 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1915 {
1916     return -EINVAL;
1917 }
1918
1919 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1920 {
1921     return -EINVAL;
1922 }
1923
1924 void kvm_arch_remove_all_hw_breakpoints(void)
1925 {
1926 }
1927
1928 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1929 {
1930 }
1931
1932 struct kvm_get_htab_buf {
1933     struct kvm_get_htab_header header;
1934     /*
1935      * We require one extra byte for read
1936      */
1937     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1938 };
1939
1940 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1941 {
1942     int htab_fd;
1943     struct kvm_get_htab_fd ghf;
1944     struct kvm_get_htab_buf  *hpte_buf;
1945
1946     ghf.flags = 0;
1947     ghf.start_index = pte_index;
1948     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1949     if (htab_fd < 0) {
1950         goto error_out;
1951     }
1952
1953     hpte_buf = g_malloc0(sizeof(*hpte_buf));
1954     /*
1955      * Read the hpte group
1956      */
1957     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1958         goto out_close;
1959     }
1960
1961     close(htab_fd);
1962     return (uint64_t)(uintptr_t) hpte_buf->hpte;
1963
1964 out_close:
1965     g_free(hpte_buf);
1966     close(htab_fd);
1967 error_out:
1968     return 0;
1969 }
1970
1971 void kvmppc_hash64_free_pteg(uint64_t token)
1972 {
1973     struct kvm_get_htab_buf *htab_buf;
1974
1975     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1976                             hpte);
1977     g_free(htab_buf);
1978     return;
1979 }
1980
1981 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1982                              target_ulong pte0, target_ulong pte1)
1983 {
1984     int htab_fd;
1985     struct kvm_get_htab_fd ghf;
1986     struct kvm_get_htab_buf hpte_buf;
1987
1988     ghf.flags = 0;
1989     ghf.start_index = 0;     /* Ignored */
1990     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1991     if (htab_fd < 0) {
1992         goto error_out;
1993     }
1994
1995     hpte_buf.header.n_valid = 1;
1996     hpte_buf.header.n_invalid = 0;
1997     hpte_buf.header.index = pte_index;
1998     hpte_buf.hpte[0] = pte0;
1999     hpte_buf.hpte[1] = pte1;
2000     /*
2001      * Write the hpte entry.
2002      * CAUTION: write() has the warn_unused_result attribute. Hence we
2003      * need to check the return value, even though we do nothing.
2004      */
2005     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2006         goto out_close;
2007     }
2008
2009 out_close:
2010     close(htab_fd);
2011     return;
2012
2013 error_out:
2014     return;
2015 }