target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <[email protected]>
   9  *  Christian Ehrhardt <[email protected]>
  10  *  Hollis Blanchard <[email protected]>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "qemu/timer.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/kvm.h"
  30 #include "sysemu/numa.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/spapr_cpu_core.h"
  40 #include "hw/ppc/ppc.h"
  41 #include "sysemu/watchdog.h"
  42 #include "trace.h"
  43 #include "exec/gdbstub.h"
  44 #include "exec/memattrs.h"
  45 #include "sysemu/hostmem.h"
  46 #include "qemu/cutils.h"
  47 #if defined(TARGET_PPC64)
  48 #include "hw/ppc/spapr_cpu_core.h"
  49 #endif
  50
  51 //#define DEBUG_KVM
  52
  53 #ifdef DEBUG_KVM
  54 #define DPRINTF(fmt, ...) \
  55     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  56 #else
  57 #define DPRINTF(fmt, ...) \
  58     do { } while (0)
  59 #endif
  60
  61 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  62
  63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  64     KVM_CAP_LAST_INFO
  65 };
  66
  67 static int cap_interrupt_unset = false;
  68 static int cap_interrupt_level = false;
  69 static int cap_segstate;
  70 static int cap_booke_sregs;
  71 static int cap_ppc_smt;
  72 static int cap_ppc_rma;
  73 static int cap_spapr_tce;
  74 static int cap_spapr_multitce;
  75 static int cap_spapr_vfio;
  76 static int cap_hior;
  77 static int cap_one_reg;
  78 static int cap_epr;
  79 static int cap_ppc_watchdog;
  80 static int cap_papr;
  81 static int cap_htab_fd;
  82 static int cap_fixup_hcalls;
  83 static int cap_htm;             /* Hardware transactional memory support */
  84
  85 static uint32_t debug_inst_opcode;
  86
  87 /* XXX We have a race condition where we actually have a level triggered
  88  *     interrupt, but the infrastructure can't expose that yet, so the guest
  89  *     takes but ignores it, goes to sleep and never gets notified that there's
  90  *     still an interrupt pending.
  91  *
  92  *     As a quick workaround, let's just wake up again 20 ms after we injected
  93  *     an interrupt. That way we can assure that we're always reinjecting
  94  *     interrupts in case the guest swallowed them.
  95  */
  96 static QEMUTimer *idle_timer;
  97
  98 static void kvm_kick_cpu(void *opaque)
  99 {
 100     PowerPCCPU *cpu = opaque;
 101
 102     qemu_cpu_kick(CPU(cpu));
 103 }
 104
 105 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 106  * should only be used for fallback tests - generally we should use
 107  * explicit capabilities for the features we want, rather than
 108  * assuming what is/isn't available depending on the KVM variant. */
 109 static bool kvmppc_is_pr(KVMState *ks)
 110 {
 111     /* Assume KVM-PR if the GET_PVINFO capability is available */
 112     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 113 }
 114
 115 static int kvm_ppc_register_host_cpu_type(void);
 116
 117 int kvm_arch_init(MachineState *ms, KVMState *s)
 118 {
 119     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 120     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 121     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 122     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 123     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 124     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 125     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 126     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 127     cap_spapr_vfio = false;
 128     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 129     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 130     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 131     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 132     /* Note: we don't set cap_papr here, because this capability is
 133      * only activated after this by kvmppc_set_papr() */
 134     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 135     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 136     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 137
 138     if (!cap_interrupt_level) {
 139         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 140                         "VM to stall at times!\n");
 141     }
 142
 143     kvm_ppc_register_host_cpu_type();
 144
 145     return 0;
 146 }
 147
 148 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 149 {
 150     CPUPPCState *cenv = &cpu->env;
 151     CPUState *cs = CPU(cpu);
 152     struct kvm_sregs sregs;
 153     int ret;
 154
 155     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 156         /* What we're really trying to say is "if we're on BookE, we use
 157            the native PVR for now". This is the only sane way to check
 158            it though, so we potentially confuse users that they can run
 159            BookE guests on BookS. Let's hope nobody dares enough :) */
 160         return 0;
 161     } else {
 162         if (!cap_segstate) {
 163             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 164             return -ENOSYS;
 165         }
 166     }
 167
 168     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 169     if (ret) {
 170         return ret;
 171     }
 172
 173     sregs.pvr = cenv->spr[SPR_PVR];
 174     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 175 }
 176
 177 /* Set up a shared TLB array with KVM */
 178 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 179 {
 180     CPUPPCState *env = &cpu->env;
 181     CPUState *cs = CPU(cpu);
 182     struct kvm_book3e_206_tlb_params params = {};
 183     struct kvm_config_tlb cfg = {};
 184     unsigned int entries = 0;
 185     int ret, i;
 186
 187     if (!kvm_enabled() ||
 188         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 189         return 0;
 190     }
 191
 192     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 193
 194     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 195         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 196         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 197         entries += params.tlb_sizes[i];
 198     }
 199
 200     assert(entries == env->nb_tlb);
 201     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 202
 203     env->tlb_dirty = true;
 204
 205     cfg.array = (uintptr_t)env->tlb.tlbm;
 206     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 207     cfg.params = (uintptr_t)&params;
 208     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 209
 210     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 211     if (ret < 0) {
 212         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 213                 __func__, strerror(-ret));
 214         return ret;
 215     }
 216
 217     env->kvm_sw_tlb = true;
 218     return 0;
 219 }
 220
 221
 222 #if defined(TARGET_PPC64)
 223 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 224                                        struct kvm_ppc_smmu_info *info)
 225 {
 226     CPUPPCState *env = &cpu->env;
 227     CPUState *cs = CPU(cpu);
 228
 229     memset(info, 0, sizeof(*info));
 230
 231     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 232      * need to "guess" what the supported page sizes are.
 233      *
 234      * For that to work we make a few assumptions:
 235      *
 236      * - Check whether we are running "PR" KVM which only supports 4K
 237      *   and 16M pages, but supports them regardless of the backing
 238      *   store characteritics. We also don't support 1T segments.
 239      *
 240      *   This is safe as if HV KVM ever supports that capability or PR
 241      *   KVM grows supports for more page/segment sizes, those versions
 242      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 243      *   will not hit this fallback
 244      *
 245      * - Else we are running HV KVM. This means we only support page
 246      *   sizes that fit in the backing store. Additionally we only
 247      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 248      *   P7 encodings for the SLB and hash table. Here too, we assume
 249      *   support for any newer processor will mean a kernel that
 250      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 251      *   this fallback.
 252      */
 253     if (kvmppc_is_pr(cs->kvm_state)) {
 254         /* No flags */
 255         info->flags = 0;
 256         info->slb_size = 64;
 257
 258         /* Standard 4k base page size segment */
 259         info->sps[0].page_shift = 12;
 260         info->sps[0].slb_enc = 0;
 261         info->sps[0].enc[0].page_shift = 12;
 262         info->sps[0].enc[0].pte_enc = 0;
 263
 264         /* Standard 16M large page size segment */
 265         info->sps[1].page_shift = 24;
 266         info->sps[1].slb_enc = SLB_VSID_L;
 267         info->sps[1].enc[0].page_shift = 24;
 268         info->sps[1].enc[0].pte_enc = 0;
 269     } else {
 270         int i = 0;
 271
 272         /* HV KVM has backing store size restrictions */
 273         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 274
 275         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 276             info->flags |= KVM_PPC_1T_SEGMENTS;
 277         }
 278
 279         if (env->mmu_model == POWERPC_MMU_2_06 ||
 280             env->mmu_model == POWERPC_MMU_2_07) {
 281             info->slb_size = 32;
 282         } else {
 283             info->slb_size = 64;
 284         }
 285
 286         /* Standard 4k base page size segment */
 287         info->sps[i].page_shift = 12;
 288         info->sps[i].slb_enc = 0;
 289         info->sps[i].enc[0].page_shift = 12;
 290         info->sps[i].enc[0].pte_enc = 0;
 291         i++;
 292
 293         /* 64K on MMU 2.06 and later */
 294         if (env->mmu_model == POWERPC_MMU_2_06 ||
 295             env->mmu_model == POWERPC_MMU_2_07) {
 296             info->sps[i].page_shift = 16;
 297             info->sps[i].slb_enc = 0x110;
 298             info->sps[i].enc[0].page_shift = 16;
 299             info->sps[i].enc[0].pte_enc = 1;
 300             i++;
 301         }
 302
 303         /* Standard 16M large page size segment */
 304         info->sps[i].page_shift = 24;
 305         info->sps[i].slb_enc = SLB_VSID_L;
 306         info->sps[i].enc[0].page_shift = 24;
 307         info->sps[i].enc[0].pte_enc = 0;
 308     }
 309 }
 310
 311 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 312 {
 313     CPUState *cs = CPU(cpu);
 314     int ret;
 315
 316     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 317         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 318         if (ret == 0) {
 319             return;
 320         }
 321     }
 322
 323     kvm_get_fallback_smmu_info(cpu, info);
 324 }
 325
 326 static long gethugepagesize(const char *mem_path)
 327 {
 328     struct statfs fs;
 329     int ret;
 330
 331     do {
 332         ret = statfs(mem_path, &fs);
 333     } while (ret != 0 && errno == EINTR);
 334
 335     if (ret != 0) {
 336         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 337                 strerror(errno));
 338         exit(1);
 339     }
 340
 341 #define HUGETLBFS_MAGIC       0x958458f6
 342
 343     if (fs.f_type != HUGETLBFS_MAGIC) {
 344         /* Explicit mempath, but it's ordinary pages */
 345         return getpagesize();
 346     }
 347
 348     /* It's hugepage, return the huge page size */
 349     return fs.f_bsize;
 350 }
 351
 352 /*
 353  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 354  * may or may not name the same files / on the same filesystem now as
 355  * when we actually open and map them.  Iterate over the file
 356  * descriptors instead, and use qemu_fd_getpagesize().
 357  */
 358 static int find_max_supported_pagesize(Object *obj, void *opaque)
 359 {
 360     char *mem_path;
 361     long *hpsize_min = opaque;
 362
 363     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 364         mem_path = object_property_get_str(obj, "mem-path", NULL);
 365         if (mem_path) {
 366             long hpsize = gethugepagesize(mem_path);
 367             if (hpsize < *hpsize_min) {
 368                 *hpsize_min = hpsize;
 369             }
 370         } else {
 371             *hpsize_min = getpagesize();
 372         }
 373     }
 374
 375     return 0;
 376 }
 377
 378 static long getrampagesize(void)
 379 {
 380     long hpsize = LONG_MAX;
 381     long mainrampagesize;
 382     Object *memdev_root;
 383
 384     if (mem_path) {
 385         mainrampagesize = gethugepagesize(mem_path);
 386     } else {
 387         mainrampagesize = getpagesize();
 388     }
 389
 390     /* it's possible we have memory-backend objects with
 391      * hugepage-backed RAM. these may get mapped into system
 392      * address space via -numa parameters or memory hotplug
 393      * hooks. we want to take these into account, but we
 394      * also want to make sure these supported hugepage
 395      * sizes are applicable across the entire range of memory
 396      * we may boot from, so we take the min across all
 397      * backends, and assume normal pages in cases where a
 398      * backend isn't backed by hugepages.
 399      */
 400     memdev_root = object_resolve_path("/objects", NULL);
 401     if (memdev_root) {
 402         object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 403     }
 404     if (hpsize == LONG_MAX) {
 405         /* No additional memory regions found ==> Report main RAM page size */
 406         return mainrampagesize;
 407     }
 408
 409     /* If NUMA is disabled or the NUMA nodes are not backed with a
 410      * memory-backend, then there is at least one node using "normal" RAM,
 411      * so if its page size is smaller we have got to report that size instead.
 412      */
 413     if (hpsize > mainrampagesize &&
 414         (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
 415         static bool warned;
 416         if (!warned) {
 417             error_report("Huge page support disabled (n/a for main memory).");
 418             warned = true;
 419         }
 420         return mainrampagesize;
 421     }
 422
 423     return hpsize;
 424 }
 425
 426 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 427 {
 428     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 429         return true;
 430     }
 431
 432     return (1ul << shift) <= rampgsize;
 433 }
 434
 435 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 436 {
 437     static struct kvm_ppc_smmu_info smmu_info;
 438     static bool has_smmu_info;
 439     CPUPPCState *env = &cpu->env;
 440     long rampagesize;
 441     int iq, ik, jq, jk;
 442     bool has_64k_pages = false;
 443
 444     /* We only handle page sizes for 64-bit server guests for now */
 445     if (!(env->mmu_model & POWERPC_MMU_64)) {
 446         return;
 447     }
 448
 449     /* Collect MMU info from kernel if not already */
 450     if (!has_smmu_info) {
 451         kvm_get_smmu_info(cpu, &smmu_info);
 452         has_smmu_info = true;
 453     }
 454
 455     rampagesize = getrampagesize();
 456
 457     /* Convert to QEMU form */
 458     memset(&env->sps, 0, sizeof(env->sps));
 459
 460     /* If we have HV KVM, we need to forbid CI large pages if our
 461      * host page size is smaller than 64K.
 462      */
 463     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 464         env->ci_large_pages = getpagesize() >= 0x10000;
 465     }
 466
 467     /*
 468      * XXX This loop should be an entry wide AND of the capabilities that
 469      *     the selected CPU has with the capabilities that KVM supports.
 470      */
 471     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 472         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 473         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 474
 475         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 476                                  ksps->page_shift)) {
 477             continue;
 478         }
 479         qsps->page_shift = ksps->page_shift;
 480         qsps->slb_enc = ksps->slb_enc;
 481         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 482             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 483                                      ksps->enc[jk].page_shift)) {
 484                 continue;
 485             }
 486             if (ksps->enc[jk].page_shift == 16) {
 487                 has_64k_pages = true;
 488             }
 489             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 490             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 491             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 492                 break;
 493             }
 494         }
 495         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 496             break;
 497         }
 498     }
 499     env->slb_nr = smmu_info.slb_size;
 500     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 501         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 502     }
 503     if (!has_64k_pages) {
 504         env->mmu_model &= ~POWERPC_MMU_64K;
 505     }
 506 }
 507 #else /* defined (TARGET_PPC64) */
 508
 509 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 510 {
 511 }
 512
 513 #endif /* !defined (TARGET_PPC64) */
 514
 515 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 516 {
 517     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 518 }
 519
 520 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 521  * book3s supports only 1 watchpoint, so array size
 522  * of 4 is sufficient for now.
 523  */
 524 #define MAX_HW_BKPTS 4
 525
 526 static struct HWBreakpoint {
 527     target_ulong addr;
 528     int type;
 529 } hw_debug_points[MAX_HW_BKPTS];
 530
 531 static CPUWatchpoint hw_watchpoint;
 532
 533 /* Default there is no breakpoint and watchpoint supported */
 534 static int max_hw_breakpoint;
 535 static int max_hw_watchpoint;
 536 static int nb_hw_breakpoint;
 537 static int nb_hw_watchpoint;
 538
 539 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 540 {
 541     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 542         max_hw_breakpoint = 2;
 543         max_hw_watchpoint = 2;
 544     }
 545
 546     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 547         fprintf(stderr, "Error initializing h/w breakpoints\n");
 548         return;
 549     }
 550 }
 551
 552 int kvm_arch_init_vcpu(CPUState *cs)
 553 {
 554     PowerPCCPU *cpu = POWERPC_CPU(cs);
 555     CPUPPCState *cenv = &cpu->env;
 556     int ret;
 557
 558     /* Gather server mmu info from KVM and update the CPU state */
 559     kvm_fixup_page_sizes(cpu);
 560
 561     /* Synchronize sregs with kvm */
 562     ret = kvm_arch_sync_sregs(cpu);
 563     if (ret) {
 564         if (ret == -EINVAL) {
 565             error_report("Register sync failed... If you're using kvm-hv.ko,"
 566                          " only \"-cpu host\" is possible");
 567         }
 568         return ret;
 569     }
 570
 571     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 572
 573     switch (cenv->mmu_model) {
 574     case POWERPC_MMU_BOOKE206:
 575         /* This target supports access to KVM's guest TLB */
 576         ret = kvm_booke206_tlb_init(cpu);
 577         break;
 578     case POWERPC_MMU_2_07:
 579         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 580             /* KVM-HV has transactional memory on POWER8 also without the
 581              * KVM_CAP_PPC_HTM extension, so enable it here instead. */
 582             cap_htm = true;
 583         }
 584         break;
 585     default:
 586         break;
 587     }
 588
 589     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 590     kvmppc_hw_debug_points_init(cenv);
 591
 592     return ret;
 593 }
 594
 595 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 596 {
 597     CPUPPCState *env = &cpu->env;
 598     CPUState *cs = CPU(cpu);
 599     struct kvm_dirty_tlb dirty_tlb;
 600     unsigned char *bitmap;
 601     int ret;
 602
 603     if (!env->kvm_sw_tlb) {
 604         return;
 605     }
 606
 607     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 608     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 609
 610     dirty_tlb.bitmap = (uintptr_t)bitmap;
 611     dirty_tlb.num_dirty = env->nb_tlb;
 612
 613     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 614     if (ret) {
 615         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 616                 __func__, strerror(-ret));
 617     }
 618
 619     g_free(bitmap);
 620 }
 621
 622 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 623 {
 624     PowerPCCPU *cpu = POWERPC_CPU(cs);
 625     CPUPPCState *env = &cpu->env;
 626     union {
 627         uint32_t u32;
 628         uint64_t u64;
 629     } val;
 630     struct kvm_one_reg reg = {
 631         .id = id,
 632         .addr = (uintptr_t) &val,
 633     };
 634     int ret;
 635
 636     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 637     if (ret != 0) {
 638         trace_kvm_failed_spr_get(spr, strerror(errno));
 639     } else {
 640         switch (id & KVM_REG_SIZE_MASK) {
 641         case KVM_REG_SIZE_U32:
 642             env->spr[spr] = val.u32;
 643             break;
 644
 645         case KVM_REG_SIZE_U64:
 646             env->spr[spr] = val.u64;
 647             break;
 648
 649         default:
 650             /* Don't handle this size yet */
 651             abort();
 652         }
 653     }
 654 }
 655
 656 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 657 {
 658     PowerPCCPU *cpu = POWERPC_CPU(cs);
 659     CPUPPCState *env = &cpu->env;
 660     union {
 661         uint32_t u32;
 662         uint64_t u64;
 663     } val;
 664     struct kvm_one_reg reg = {
 665         .id = id,
 666         .addr = (uintptr_t) &val,
 667     };
 668     int ret;
 669
 670     switch (id & KVM_REG_SIZE_MASK) {
 671     case KVM_REG_SIZE_U32:
 672         val.u32 = env->spr[spr];
 673         break;
 674
 675     case KVM_REG_SIZE_U64:
 676         val.u64 = env->spr[spr];
 677         break;
 678
 679     default:
 680         /* Don't handle this size yet */
 681         abort();
 682     }
 683
 684     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 685     if (ret != 0) {
 686         trace_kvm_failed_spr_set(spr, strerror(errno));
 687     }
 688 }
 689
 690 static int kvm_put_fp(CPUState *cs)
 691 {
 692     PowerPCCPU *cpu = POWERPC_CPU(cs);
 693     CPUPPCState *env = &cpu->env;
 694     struct kvm_one_reg reg;
 695     int i;
 696     int ret;
 697
 698     if (env->insns_flags & PPC_FLOAT) {
 699         uint64_t fpscr = env->fpscr;
 700         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 701
 702         reg.id = KVM_REG_PPC_FPSCR;
 703         reg.addr = (uintptr_t)&fpscr;
 704         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 705         if (ret < 0) {
 706             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 707             return ret;
 708         }
 709
 710         for (i = 0; i < 32; i++) {
 711             uint64_t vsr[2];
 712
 713 #ifdef HOST_WORDS_BIGENDIAN
 714             vsr[0] = float64_val(env->fpr[i]);
 715             vsr[1] = env->vsr[i];
 716 #else
 717             vsr[0] = env->vsr[i];
 718             vsr[1] = float64_val(env->fpr[i]);
 719 #endif
 720             reg.addr = (uintptr_t) &vsr;
 721             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 722
 723             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 724             if (ret < 0) {
 725                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 726                         i, strerror(errno));
 727                 return ret;
 728             }
 729         }
 730     }
 731
 732     if (env->insns_flags & PPC_ALTIVEC) {
 733         reg.id = KVM_REG_PPC_VSCR;
 734         reg.addr = (uintptr_t)&env->vscr;
 735         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 736         if (ret < 0) {
 737             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 738             return ret;
 739         }
 740
 741         for (i = 0; i < 32; i++) {
 742             reg.id = KVM_REG_PPC_VR(i);
 743             reg.addr = (uintptr_t)&env->avr[i];
 744             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 745             if (ret < 0) {
 746                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 747                 return ret;
 748             }
 749         }
 750     }
 751
 752     return 0;
 753 }
 754
 755 static int kvm_get_fp(CPUState *cs)
 756 {
 757     PowerPCCPU *cpu = POWERPC_CPU(cs);
 758     CPUPPCState *env = &cpu->env;
 759     struct kvm_one_reg reg;
 760     int i;
 761     int ret;
 762
 763     if (env->insns_flags & PPC_FLOAT) {
 764         uint64_t fpscr;
 765         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 766
 767         reg.id = KVM_REG_PPC_FPSCR;
 768         reg.addr = (uintptr_t)&fpscr;
 769         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 770         if (ret < 0) {
 771             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 772             return ret;
 773         } else {
 774             env->fpscr = fpscr;
 775         }
 776
 777         for (i = 0; i < 32; i++) {
 778             uint64_t vsr[2];
 779
 780             reg.addr = (uintptr_t) &vsr;
 781             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 782
 783             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 784             if (ret < 0) {
 785                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 786                         vsx ? "VSR" : "FPR", i, strerror(errno));
 787                 return ret;
 788             } else {
 789 #ifdef HOST_WORDS_BIGENDIAN
 790                 env->fpr[i] = vsr[0];
 791                 if (vsx) {
 792                     env->vsr[i] = vsr[1];
 793                 }
 794 #else
 795                 env->fpr[i] = vsr[1];
 796                 if (vsx) {
 797                     env->vsr[i] = vsr[0];
 798                 }
 799 #endif
 800             }
 801         }
 802     }
 803
 804     if (env->insns_flags & PPC_ALTIVEC) {
 805         reg.id = KVM_REG_PPC_VSCR;
 806         reg.addr = (uintptr_t)&env->vscr;
 807         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 808         if (ret < 0) {
 809             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 810             return ret;
 811         }
 812
 813         for (i = 0; i < 32; i++) {
 814             reg.id = KVM_REG_PPC_VR(i);
 815             reg.addr = (uintptr_t)&env->avr[i];
 816             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 817             if (ret < 0) {
 818                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 819                         i, strerror(errno));
 820                 return ret;
 821             }
 822         }
 823     }
 824
 825     return 0;
 826 }
 827
 828 #if defined(TARGET_PPC64)
 829 static int kvm_get_vpa(CPUState *cs)
 830 {
 831     PowerPCCPU *cpu = POWERPC_CPU(cs);
 832     CPUPPCState *env = &cpu->env;
 833     struct kvm_one_reg reg;
 834     int ret;
 835
 836     reg.id = KVM_REG_PPC_VPA_ADDR;
 837     reg.addr = (uintptr_t)&env->vpa_addr;
 838     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 839     if (ret < 0) {
 840         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 841         return ret;
 842     }
 843
 844     assert((uintptr_t)&env->slb_shadow_size
 845            == ((uintptr_t)&env->slb_shadow_addr + 8));
 846     reg.id = KVM_REG_PPC_VPA_SLB;
 847     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 848     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 849     if (ret < 0) {
 850         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 851                 strerror(errno));
 852         return ret;
 853     }
 854
 855     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 856     reg.id = KVM_REG_PPC_VPA_DTL;
 857     reg.addr = (uintptr_t)&env->dtl_addr;
 858     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 859     if (ret < 0) {
 860         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 861                 strerror(errno));
 862         return ret;
 863     }
 864
 865     return 0;
 866 }
 867
 868 static int kvm_put_vpa(CPUState *cs)
 869 {
 870     PowerPCCPU *cpu = POWERPC_CPU(cs);
 871     CPUPPCState *env = &cpu->env;
 872     struct kvm_one_reg reg;
 873     int ret;
 874
 875     /* SLB shadow or DTL can't be registered unless a master VPA is
 876      * registered.  That means when restoring state, if a VPA *is*
 877      * registered, we need to set that up first.  If not, we need to
 878      * deregister the others before deregistering the master VPA */
 879     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 880
 881     if (env->vpa_addr) {
 882         reg.id = KVM_REG_PPC_VPA_ADDR;
 883         reg.addr = (uintptr_t)&env->vpa_addr;
 884         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 885         if (ret < 0) {
 886             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 887             return ret;
 888         }
 889     }
 890
 891     assert((uintptr_t)&env->slb_shadow_size
 892            == ((uintptr_t)&env->slb_shadow_addr + 8));
 893     reg.id = KVM_REG_PPC_VPA_SLB;
 894     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 895     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 896     if (ret < 0) {
 897         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 898         return ret;
 899     }
 900
 901     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 902     reg.id = KVM_REG_PPC_VPA_DTL;
 903     reg.addr = (uintptr_t)&env->dtl_addr;
 904     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 905     if (ret < 0) {
 906         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 907                 strerror(errno));
 908         return ret;
 909     }
 910
 911     if (!env->vpa_addr) {
 912         reg.id = KVM_REG_PPC_VPA_ADDR;
 913         reg.addr = (uintptr_t)&env->vpa_addr;
 914         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 915         if (ret < 0) {
 916             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 917             return ret;
 918         }
 919     }
 920
 921     return 0;
 922 }
 923 #endif /* TARGET_PPC64 */
 924
 925 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 926 {
 927     CPUPPCState *env = &cpu->env;
 928     struct kvm_sregs sregs;
 929     int i;
 930
 931     sregs.pvr = env->spr[SPR_PVR];
 932
 933     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 934
 935     /* Sync SLB */
 936 #ifdef TARGET_PPC64
 937     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 938         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 939         if (env->slb[i].esid & SLB_ESID_V) {
 940             sregs.u.s.ppc64.slb[i].slbe |= i;
 941         }
 942         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 943     }
 944 #endif
 945
 946     /* Sync SRs */
 947     for (i = 0; i < 16; i++) {
 948         sregs.u.s.ppc32.sr[i] = env->sr[i];
 949     }
 950
 951     /* Sync BATs */
 952     for (i = 0; i < 8; i++) {
 953         /* Beware. We have to swap upper and lower bits here */
 954         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 955             | env->DBAT[1][i];
 956         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 957             | env->IBAT[1][i];
 958     }
 959
 960     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 961 }
 962
 963 int kvm_arch_put_registers(CPUState *cs, int level)
 964 {
 965     PowerPCCPU *cpu = POWERPC_CPU(cs);
 966     CPUPPCState *env = &cpu->env;
 967     struct kvm_regs regs;
 968     int ret;
 969     int i;
 970
 971     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 972     if (ret < 0) {
 973         return ret;
 974     }
 975
 976     regs.ctr = env->ctr;
 977     regs.lr  = env->lr;
 978     regs.xer = cpu_read_xer(env);
 979     regs.msr = env->msr;
 980     regs.pc = env->nip;
 981
 982     regs.srr0 = env->spr[SPR_SRR0];
 983     regs.srr1 = env->spr[SPR_SRR1];
 984
 985     regs.sprg0 = env->spr[SPR_SPRG0];
 986     regs.sprg1 = env->spr[SPR_SPRG1];
 987     regs.sprg2 = env->spr[SPR_SPRG2];
 988     regs.sprg3 = env->spr[SPR_SPRG3];
 989     regs.sprg4 = env->spr[SPR_SPRG4];
 990     regs.sprg5 = env->spr[SPR_SPRG5];
 991     regs.sprg6 = env->spr[SPR_SPRG6];
 992     regs.sprg7 = env->spr[SPR_SPRG7];
 993
 994     regs.pid = env->spr[SPR_BOOKE_PID];
 995
 996     for (i = 0;i < 32; i++)
 997         regs.gpr[i] = env->gpr[i];
 998
 999     regs.cr = 0;
1000     for (i = 0; i < 8; i++) {
1001         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1002     }
1003
1004     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1005     if (ret < 0)
1006         return ret;
1007
1008     kvm_put_fp(cs);
1009
1010     if (env->tlb_dirty) {
1011         kvm_sw_tlb_put(cpu);
1012         env->tlb_dirty = false;
1013     }
1014
1015     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1016         ret = kvmppc_put_books_sregs(cpu);
1017         if (ret < 0) {
1018             return ret;
1019         }
1020     }
1021
1022     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1023         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1024     }
1025
1026     if (cap_one_reg) {
1027         int i;
1028
1029         /* We deliberately ignore errors here, for kernels which have
1030          * the ONE_REG calls, but don't support the specific
1031          * registers, there's a reasonable chance things will still
1032          * work, at least until we try to migrate. */
1033         for (i = 0; i < 1024; i++) {
1034             uint64_t id = env->spr_cb[i].one_reg_id;
1035
1036             if (id != 0) {
1037                 kvm_put_one_spr(cs, id, i);
1038             }
1039         }
1040
1041 #ifdef TARGET_PPC64
1042         if (msr_ts) {
1043             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1044                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1045             }
1046             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1047                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1048             }
1049             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1050             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1051             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1052             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1053             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1059         }
1060
1061         if (cap_papr) {
1062             if (kvm_put_vpa(cs) < 0) {
1063                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1064             }
1065         }
1066
1067         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1068 #endif /* TARGET_PPC64 */
1069     }
1070
1071     return ret;
1072 }
1073
1074 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1075 {
1076      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1077 }
1078
1079 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1080 {
1081     CPUPPCState *env = &cpu->env;
1082     struct kvm_sregs sregs;
1083     int ret;
1084
1085     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1086     if (ret < 0) {
1087         return ret;
1088     }
1089
1090     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1091         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1092         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1093         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1094         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1095         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1096         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1097         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1098         env->spr[SPR_DECR] = sregs.u.e.dec;
1099         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1100         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1101         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1102     }
1103
1104     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1105         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1106         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1107         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1108         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1109         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1110     }
1111
1112     if (sregs.u.e.features & KVM_SREGS_E_64) {
1113         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1114     }
1115
1116     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1117         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1118     }
1119
1120     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1121         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1122         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1123         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1124         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1125         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1126         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1127         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1128         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1129         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1130         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1131         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1132         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1133         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1134         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1135         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1136         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1137         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1138         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1139         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1140         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1141         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1142         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1143         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1144         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1145         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1146         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1147         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1148         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1149         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1150         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1151         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1152         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1153
1154         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1155             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1156             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1157             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1158             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1159             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1160             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1161         }
1162
1163         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1164             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1165             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1166         }
1167
1168         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1169             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1170             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1171             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1172             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1173         }
1174     }
1175
1176     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1177         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1178         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1179         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1180         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1181         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1182         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1183         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1184         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1185         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1186         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1187     }
1188
1189     if (sregs.u.e.features & KVM_SREGS_EXP) {
1190         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1191     }
1192
1193     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1194         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1195         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1196     }
1197
1198     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1199         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1200         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1201         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1202
1203         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1204             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1205             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1206         }
1207     }
1208
1209     return 0;
1210 }
1211
1212 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1213 {
1214     CPUPPCState *env = &cpu->env;
1215     struct kvm_sregs sregs;
1216     int ret;
1217     int i;
1218
1219     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1220     if (ret < 0) {
1221         return ret;
1222     }
1223
1224     if (!env->external_htab) {
1225         ppc_store_sdr1(env, sregs.u.s.sdr1);
1226     }
1227
1228     /* Sync SLB */
1229 #ifdef TARGET_PPC64
1230     /*
1231      * The packed SLB array we get from KVM_GET_SREGS only contains
1232      * information about valid entries. So we flush our internal copy
1233      * to get rid of stale ones, then put all valid SLB entries back
1234      * in.
1235      */
1236     memset(env->slb, 0, sizeof(env->slb));
1237     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1238         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1239         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1240         /*
1241          * Only restore valid entries
1242          */
1243         if (rb & SLB_ESID_V) {
1244             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1245         }
1246     }
1247 #endif
1248
1249     /* Sync SRs */
1250     for (i = 0; i < 16; i++) {
1251         env->sr[i] = sregs.u.s.ppc32.sr[i];
1252     }
1253
1254     /* Sync BATs */
1255     for (i = 0; i < 8; i++) {
1256         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1257         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1258         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1259         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1260     }
1261
1262     return 0;
1263 }
1264
1265 int kvm_arch_get_registers(CPUState *cs)
1266 {
1267     PowerPCCPU *cpu = POWERPC_CPU(cs);
1268     CPUPPCState *env = &cpu->env;
1269     struct kvm_regs regs;
1270     uint32_t cr;
1271     int i, ret;
1272
1273     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1274     if (ret < 0)
1275         return ret;
1276
1277     cr = regs.cr;
1278     for (i = 7; i >= 0; i--) {
1279         env->crf[i] = cr & 15;
1280         cr >>= 4;
1281     }
1282
1283     env->ctr = regs.ctr;
1284     env->lr = regs.lr;
1285     cpu_write_xer(env, regs.xer);
1286     env->msr = regs.msr;
1287     env->nip = regs.pc;
1288
1289     env->spr[SPR_SRR0] = regs.srr0;
1290     env->spr[SPR_SRR1] = regs.srr1;
1291
1292     env->spr[SPR_SPRG0] = regs.sprg0;
1293     env->spr[SPR_SPRG1] = regs.sprg1;
1294     env->spr[SPR_SPRG2] = regs.sprg2;
1295     env->spr[SPR_SPRG3] = regs.sprg3;
1296     env->spr[SPR_SPRG4] = regs.sprg4;
1297     env->spr[SPR_SPRG5] = regs.sprg5;
1298     env->spr[SPR_SPRG6] = regs.sprg6;
1299     env->spr[SPR_SPRG7] = regs.sprg7;
1300
1301     env->spr[SPR_BOOKE_PID] = regs.pid;
1302
1303     for (i = 0;i < 32; i++)
1304         env->gpr[i] = regs.gpr[i];
1305
1306     kvm_get_fp(cs);
1307
1308     if (cap_booke_sregs) {
1309         ret = kvmppc_get_booke_sregs(cpu);
1310         if (ret < 0) {
1311             return ret;
1312         }
1313     }
1314
1315     if (cap_segstate) {
1316         ret = kvmppc_get_books_sregs(cpu);
1317         if (ret < 0) {
1318             return ret;
1319         }
1320     }
1321
1322     if (cap_hior) {
1323         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1324     }
1325
1326     if (cap_one_reg) {
1327         int i;
1328
1329         /* We deliberately ignore errors here, for kernels which have
1330          * the ONE_REG calls, but don't support the specific
1331          * registers, there's a reasonable chance things will still
1332          * work, at least until we try to migrate. */
1333         for (i = 0; i < 1024; i++) {
1334             uint64_t id = env->spr_cb[i].one_reg_id;
1335
1336             if (id != 0) {
1337                 kvm_get_one_spr(cs, id, i);
1338             }
1339         }
1340
1341 #ifdef TARGET_PPC64
1342         if (msr_ts) {
1343             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1344                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1345             }
1346             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1347                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1348             }
1349             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1350             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1351             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1352             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1353             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1359         }
1360
1361         if (cap_papr) {
1362             if (kvm_get_vpa(cs) < 0) {
1363                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1364             }
1365         }
1366
1367         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1368 #endif
1369     }
1370
1371     return 0;
1372 }
1373
1374 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1375 {
1376     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1377
1378     if (irq != PPC_INTERRUPT_EXT) {
1379         return 0;
1380     }
1381
1382     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1383         return 0;
1384     }
1385
1386     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1387
1388     return 0;
1389 }
1390
1391 #if defined(TARGET_PPCEMB)
1392 #define PPC_INPUT_INT PPC40x_INPUT_INT
1393 #elif defined(TARGET_PPC64)
1394 #define PPC_INPUT_INT PPC970_INPUT_INT
1395 #else
1396 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1397 #endif
1398
1399 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1400 {
1401     PowerPCCPU *cpu = POWERPC_CPU(cs);
1402     CPUPPCState *env = &cpu->env;
1403     int r;
1404     unsigned irq;
1405
1406     qemu_mutex_lock_iothread();
1407
1408     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1409      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1410     if (!cap_interrupt_level &&
1411         run->ready_for_interrupt_injection &&
1412         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1413         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1414     {
1415         /* For now KVM disregards the 'irq' argument. However, in the
1416          * future KVM could cache it in-kernel to avoid a heavyweight exit
1417          * when reading the UIC.
1418          */
1419         irq = KVM_INTERRUPT_SET;
1420
1421         DPRINTF("injected interrupt %d\n", irq);
1422         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1423         if (r < 0) {
1424             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1425         }
1426
1427         /* Always wake up soon in case the interrupt was level based */
1428         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1429                        (NANOSECONDS_PER_SECOND / 50));
1430     }
1431
1432     /* We don't know if there are more interrupts pending after this. However,
1433      * the guest will return to userspace in the course of handling this one
1434      * anyways, so we will get a chance to deliver the rest. */
1435
1436     qemu_mutex_unlock_iothread();
1437 }
1438
1439 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1440 {
1441     return MEMTXATTRS_UNSPECIFIED;
1442 }
1443
1444 int kvm_arch_process_async_events(CPUState *cs)
1445 {
1446     return cs->halted;
1447 }
1448
1449 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1450 {
1451     CPUState *cs = CPU(cpu);
1452     CPUPPCState *env = &cpu->env;
1453
1454     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1455         cs->halted = 1;
1456         cs->exception_index = EXCP_HLT;
1457     }
1458
1459     return 0;
1460 }
1461
1462 /* map dcr access to existing qemu dcr emulation */
1463 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1464 {
1465     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1466         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1467
1468     return 0;
1469 }
1470
1471 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1472 {
1473     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1474         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1475
1476     return 0;
1477 }
1478
1479 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1480 {
1481     /* Mixed endian case is not handled */
1482     uint32_t sc = debug_inst_opcode;
1483
1484     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1485                             sizeof(sc), 0) ||
1486         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1487         return -EINVAL;
1488     }
1489
1490     return 0;
1491 }
1492
1493 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1494 {
1495     uint32_t sc;
1496
1497     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1498         sc != debug_inst_opcode ||
1499         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1500                             sizeof(sc), 1)) {
1501         return -EINVAL;
1502     }
1503
1504     return 0;
1505 }
1506
1507 static int find_hw_breakpoint(target_ulong addr, int type)
1508 {
1509     int n;
1510
1511     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1512            <= ARRAY_SIZE(hw_debug_points));
1513
1514     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1515         if (hw_debug_points[n].addr == addr &&
1516              hw_debug_points[n].type == type) {
1517             return n;
1518         }
1519     }
1520
1521     return -1;
1522 }
1523
1524 static int find_hw_watchpoint(target_ulong addr, int *flag)
1525 {
1526     int n;
1527
1528     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1529     if (n >= 0) {
1530         *flag = BP_MEM_ACCESS;
1531         return n;
1532     }
1533
1534     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1535     if (n >= 0) {
1536         *flag = BP_MEM_WRITE;
1537         return n;
1538     }
1539
1540     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1541     if (n >= 0) {
1542         *flag = BP_MEM_READ;
1543         return n;
1544     }
1545
1546     return -1;
1547 }
1548
1549 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1550                                   target_ulong len, int type)
1551 {
1552     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1553         return -ENOBUFS;
1554     }
1555
1556     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1557     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1558
1559     switch (type) {
1560     case GDB_BREAKPOINT_HW:
1561         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1562             return -ENOBUFS;
1563         }
1564
1565         if (find_hw_breakpoint(addr, type) >= 0) {
1566             return -EEXIST;
1567         }
1568
1569         nb_hw_breakpoint++;
1570         break;
1571
1572     case GDB_WATCHPOINT_WRITE:
1573     case GDB_WATCHPOINT_READ:
1574     case GDB_WATCHPOINT_ACCESS:
1575         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1576             return -ENOBUFS;
1577         }
1578
1579         if (find_hw_breakpoint(addr, type) >= 0) {
1580             return -EEXIST;
1581         }
1582
1583         nb_hw_watchpoint++;
1584         break;
1585
1586     default:
1587         return -ENOSYS;
1588     }
1589
1590     return 0;
1591 }
1592
1593 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1594                                   target_ulong len, int type)
1595 {
1596     int n;
1597
1598     n = find_hw_breakpoint(addr, type);
1599     if (n < 0) {
1600         return -ENOENT;
1601     }
1602
1603     switch (type) {
1604     case GDB_BREAKPOINT_HW:
1605         nb_hw_breakpoint--;
1606         break;
1607
1608     case GDB_WATCHPOINT_WRITE:
1609     case GDB_WATCHPOINT_READ:
1610     case GDB_WATCHPOINT_ACCESS:
1611         nb_hw_watchpoint--;
1612         break;
1613
1614     default:
1615         return -ENOSYS;
1616     }
1617     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1618
1619     return 0;
1620 }
1621
1622 void kvm_arch_remove_all_hw_breakpoints(void)
1623 {
1624     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1625 }
1626
1627 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1628 {
1629     int n;
1630
1631     /* Software Breakpoint updates */
1632     if (kvm_sw_breakpoints_active(cs)) {
1633         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1634     }
1635
1636     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1637            <= ARRAY_SIZE(hw_debug_points));
1638     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1639
1640     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1641         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1642         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1643         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1644             switch (hw_debug_points[n].type) {
1645             case GDB_BREAKPOINT_HW:
1646                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1647                 break;
1648             case GDB_WATCHPOINT_WRITE:
1649                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1650                 break;
1651             case GDB_WATCHPOINT_READ:
1652                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1653                 break;
1654             case GDB_WATCHPOINT_ACCESS:
1655                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1656                                         KVMPPC_DEBUG_WATCH_READ;
1657                 break;
1658             default:
1659                 cpu_abort(cs, "Unsupported breakpoint type\n");
1660             }
1661             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1662         }
1663     }
1664 }
1665
1666 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1667 {
1668     CPUState *cs = CPU(cpu);
1669     CPUPPCState *env = &cpu->env;
1670     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1671     int handle = 0;
1672     int n;
1673     int flag = 0;
1674
1675     if (cs->singlestep_enabled) {
1676         handle = 1;
1677     } else if (arch_info->status) {
1678         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1679             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1680                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1681                 if (n >= 0) {
1682                     handle = 1;
1683                 }
1684             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1685                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1686                 n = find_hw_watchpoint(arch_info->address,  &flag);
1687                 if (n >= 0) {
1688                     handle = 1;
1689                     cs->watchpoint_hit = &hw_watchpoint;
1690                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1691                     hw_watchpoint.flags = flag;
1692                 }
1693             }
1694         }
1695     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1696         handle = 1;
1697     } else {
1698         /* QEMU is not able to handle debug exception, so inject
1699          * program exception to guest;
1700          * Yes program exception NOT debug exception !!
1701          * When QEMU is using debug resources then debug exception must
1702          * be always set. To achieve this we set MSR_DE and also set
1703          * MSRP_DEP so guest cannot change MSR_DE.
1704          * When emulating debug resource for guest we want guest
1705          * to control MSR_DE (enable/disable debug interrupt on need).
1706          * Supporting both configurations are NOT possible.
1707          * So the result is that we cannot share debug resources
1708          * between QEMU and Guest on BOOKE architecture.
1709          * In the current design QEMU gets the priority over guest,
1710          * this means that if QEMU is using debug resources then guest
1711          * cannot use them;
1712          * For software breakpoint QEMU uses a privileged instruction;
1713          * So there cannot be any reason that we are here for guest
1714          * set debug exception, only possibility is guest executed a
1715          * privileged / illegal instruction and that's why we are
1716          * injecting a program interrupt.
1717          */
1718
1719         cpu_synchronize_state(cs);
1720         /* env->nip is PC, so increment this by 4 to use
1721          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1722          */
1723         env->nip += 4;
1724         cs->exception_index = POWERPC_EXCP_PROGRAM;
1725         env->error_code = POWERPC_EXCP_INVAL;
1726         ppc_cpu_do_interrupt(cs);
1727     }
1728
1729     return handle;
1730 }
1731
1732 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1733 {
1734     PowerPCCPU *cpu = POWERPC_CPU(cs);
1735     CPUPPCState *env = &cpu->env;
1736     int ret;
1737
1738     qemu_mutex_lock_iothread();
1739
1740     switch (run->exit_reason) {
1741     case KVM_EXIT_DCR:
1742         if (run->dcr.is_write) {
1743             DPRINTF("handle dcr write\n");
1744             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1745         } else {
1746             DPRINTF("handle dcr read\n");
1747             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1748         }
1749         break;
1750     case KVM_EXIT_HLT:
1751         DPRINTF("handle halt\n");
1752         ret = kvmppc_handle_halt(cpu);
1753         break;
1754 #if defined(TARGET_PPC64)
1755     case KVM_EXIT_PAPR_HCALL:
1756         DPRINTF("handle PAPR hypercall\n");
1757         run->papr_hcall.ret = spapr_hypercall(cpu,
1758                                               run->papr_hcall.nr,
1759                                               run->papr_hcall.args);
1760         ret = 0;
1761         break;
1762 #endif
1763     case KVM_EXIT_EPR:
1764         DPRINTF("handle epr\n");
1765         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1766         ret = 0;
1767         break;
1768     case KVM_EXIT_WATCHDOG:
1769         DPRINTF("handle watchdog expiry\n");
1770         watchdog_perform_action();
1771         ret = 0;
1772         break;
1773
1774     case KVM_EXIT_DEBUG:
1775         DPRINTF("handle debug exception\n");
1776         if (kvm_handle_debug(cpu, run)) {
1777             ret = EXCP_DEBUG;
1778             break;
1779         }
1780         /* re-enter, this exception was guest-internal */
1781         ret = 0;
1782         break;
1783
1784     default:
1785         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1786         ret = -1;
1787         break;
1788     }
1789
1790     qemu_mutex_unlock_iothread();
1791     return ret;
1792 }
1793
1794 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1795 {
1796     CPUState *cs = CPU(cpu);
1797     uint32_t bits = tsr_bits;
1798     struct kvm_one_reg reg = {
1799         .id = KVM_REG_PPC_OR_TSR,
1800         .addr = (uintptr_t) &bits,
1801     };
1802
1803     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1804 }
1805
1806 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1807 {
1808
1809     CPUState *cs = CPU(cpu);
1810     uint32_t bits = tsr_bits;
1811     struct kvm_one_reg reg = {
1812         .id = KVM_REG_PPC_CLEAR_TSR,
1813         .addr = (uintptr_t) &bits,
1814     };
1815
1816     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1817 }
1818
1819 int kvmppc_set_tcr(PowerPCCPU *cpu)
1820 {
1821     CPUState *cs = CPU(cpu);
1822     CPUPPCState *env = &cpu->env;
1823     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1824
1825     struct kvm_one_reg reg = {
1826         .id = KVM_REG_PPC_TCR,
1827         .addr = (uintptr_t) &tcr,
1828     };
1829
1830     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1831 }
1832
1833 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1834 {
1835     CPUState *cs = CPU(cpu);
1836     int ret;
1837
1838     if (!kvm_enabled()) {
1839         return -1;
1840     }
1841
1842     if (!cap_ppc_watchdog) {
1843         printf("warning: KVM does not support watchdog");
1844         return -1;
1845     }
1846
1847     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1848     if (ret < 0) {
1849         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1850                 __func__, strerror(-ret));
1851         return ret;
1852     }
1853
1854     return ret;
1855 }
1856
1857 static int read_cpuinfo(const char *field, char *value, int len)
1858 {
1859     FILE *f;
1860     int ret = -1;
1861     int field_len = strlen(field);
1862     char line[512];
1863
1864     f = fopen("/proc/cpuinfo", "r");
1865     if (!f) {
1866         return -1;
1867     }
1868
1869     do {
1870         if (!fgets(line, sizeof(line), f)) {
1871             break;
1872         }
1873         if (!strncmp(line, field, field_len)) {
1874             pstrcpy(value, len, line);
1875             ret = 0;
1876             break;
1877         }
1878     } while(*line);
1879
1880     fclose(f);
1881
1882     return ret;
1883 }
1884
1885 uint32_t kvmppc_get_tbfreq(void)
1886 {
1887     char line[512];
1888     char *ns;
1889     uint32_t retval = NANOSECONDS_PER_SECOND;
1890
1891     if (read_cpuinfo("timebase", line, sizeof(line))) {
1892         return retval;
1893     }
1894
1895     if (!(ns = strchr(line, ':'))) {
1896         return retval;
1897     }
1898
1899     ns++;
1900
1901     return atoi(ns);
1902 }
1903
1904 bool kvmppc_get_host_serial(char **value)
1905 {
1906     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1907                                NULL);
1908 }
1909
1910 bool kvmppc_get_host_model(char **value)
1911 {
1912     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1913 }
1914
1915 /* Try to find a device tree node for a CPU with clock-frequency property */
1916 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1917 {
1918     struct dirent *dirp;
1919     DIR *dp;
1920
1921     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1922         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1923         return -1;
1924     }
1925
1926     buf[0] = '\0';
1927     while ((dirp = readdir(dp)) != NULL) {
1928         FILE *f;
1929         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1930                  dirp->d_name);
1931         f = fopen(buf, "r");
1932         if (f) {
1933             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1934             fclose(f);
1935             break;
1936         }
1937         buf[0] = '\0';
1938     }
1939     closedir(dp);
1940     if (buf[0] == '\0') {
1941         printf("Unknown host!\n");
1942         return -1;
1943     }
1944
1945     return 0;
1946 }
1947
1948 static uint64_t kvmppc_read_int_dt(const char *filename)
1949 {
1950     union {
1951         uint32_t v32;
1952         uint64_t v64;
1953     } u;
1954     FILE *f;
1955     int len;
1956
1957     f = fopen(filename, "rb");
1958     if (!f) {
1959         return -1;
1960     }
1961
1962     len = fread(&u, 1, sizeof(u), f);
1963     fclose(f);
1964     switch (len) {
1965     case 4:
1966         /* property is a 32-bit quantity */
1967         return be32_to_cpu(u.v32);
1968     case 8:
1969         return be64_to_cpu(u.v64);
1970     }
1971
1972     return 0;
1973 }
1974
1975 /* Read a CPU node property from the host device tree that's a single
1976  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1977  * (can't find or open the property, or doesn't understand the
1978  * format) */
1979 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1980 {
1981     char buf[PATH_MAX], *tmp;
1982     uint64_t val;
1983
1984     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1985         return -1;
1986     }
1987
1988     tmp = g_strdup_printf("%s/%s", buf, propname);
1989     val = kvmppc_read_int_dt(tmp);
1990     g_free(tmp);
1991
1992     return val;
1993 }
1994
1995 uint64_t kvmppc_get_clockfreq(void)
1996 {
1997     return kvmppc_read_int_cpu_dt("clock-frequency");
1998 }
1999
2000 uint32_t kvmppc_get_vmx(void)
2001 {
2002     return kvmppc_read_int_cpu_dt("ibm,vmx");
2003 }
2004
2005 uint32_t kvmppc_get_dfp(void)
2006 {
2007     return kvmppc_read_int_cpu_dt("ibm,dfp");
2008 }
2009
2010 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2011  {
2012      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2013      CPUState *cs = CPU(cpu);
2014
2015     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2016         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2017         return 0;
2018     }
2019
2020     return 1;
2021 }
2022
2023 int kvmppc_get_hasidle(CPUPPCState *env)
2024 {
2025     struct kvm_ppc_pvinfo pvinfo;
2026
2027     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2028         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2029         return 1;
2030     }
2031
2032     return 0;
2033 }
2034
2035 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2036 {
2037     uint32_t *hc = (uint32_t*)buf;
2038     struct kvm_ppc_pvinfo pvinfo;
2039
2040     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2041         memcpy(buf, pvinfo.hcall, buf_len);
2042         return 0;
2043     }
2044
2045     /*
2046      * Fallback to always fail hypercalls regardless of endianness:
2047      *
2048      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2049      *     li r3, -1
2050      *     b .+8       (becomes nop in wrong endian)
2051      *     bswap32(li r3, -1)
2052      */
2053
2054     hc[0] = cpu_to_be32(0x08000048);
2055     hc[1] = cpu_to_be32(0x3860ffff);
2056     hc[2] = cpu_to_be32(0x48000008);
2057     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2058
2059     return 1;
2060 }
2061
2062 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2063 {
2064     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2065 }
2066
2067 void kvmppc_enable_logical_ci_hcalls(void)
2068 {
2069     /*
2070      * FIXME: it would be nice if we could detect the cases where
2071      * we're using a device which requires the in kernel
2072      * implementation of these hcalls, but the kernel lacks them and
2073      * produce a warning.
2074      */
2075     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2076     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2077 }
2078
2079 void kvmppc_enable_set_mode_hcall(void)
2080 {
2081     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2082 }
2083
2084 void kvmppc_enable_clear_ref_mod_hcalls(void)
2085 {
2086     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2087     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2088 }
2089
2090 void kvmppc_set_papr(PowerPCCPU *cpu)
2091 {
2092     CPUState *cs = CPU(cpu);
2093     int ret;
2094
2095     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2096     if (ret) {
2097         error_report("This vCPU type or KVM version does not support PAPR");
2098         exit(1);
2099     }
2100
2101     /* Update the capability flag so we sync the right information
2102      * with kvm */
2103     cap_papr = 1;
2104 }
2105
2106 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2107 {
2108     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2109 }
2110
2111 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2112 {
2113     CPUState *cs = CPU(cpu);
2114     int ret;
2115
2116     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2117     if (ret && mpic_proxy) {
2118         error_report("This KVM version does not support EPR");
2119         exit(1);
2120     }
2121 }
2122
2123 int kvmppc_smt_threads(void)
2124 {
2125     return cap_ppc_smt ? cap_ppc_smt : 1;
2126 }
2127
2128 #ifdef TARGET_PPC64
2129 off_t kvmppc_alloc_rma(void **rma)
2130 {
2131     off_t size;
2132     int fd;
2133     struct kvm_allocate_rma ret;
2134
2135     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2136      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2137      *                      not necessary on this hardware
2138      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2139      *
2140      * FIXME: We should allow the user to force contiguous RMA
2141      * allocation in the cap_ppc_rma==1 case.
2142      */
2143     if (cap_ppc_rma < 2) {
2144         return 0;
2145     }
2146
2147     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2148     if (fd < 0) {
2149         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2150                 strerror(errno));
2151         return -1;
2152     }
2153
2154     size = MIN(ret.rma_size, 256ul << 20);
2155
2156     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2157     if (*rma == MAP_FAILED) {
2158         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2159         return -1;
2160     };
2161
2162     return size;
2163 }
2164
2165 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2166 {
2167     struct kvm_ppc_smmu_info info;
2168     long rampagesize, best_page_shift;
2169     int i;
2170
2171     if (cap_ppc_rma >= 2) {
2172         return current_size;
2173     }
2174
2175     /* Find the largest hardware supported page size that's less than
2176      * or equal to the (logical) backing page size of guest RAM */
2177     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2178     rampagesize = getrampagesize();
2179     best_page_shift = 0;
2180
2181     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2182         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2183
2184         if (!sps->page_shift) {
2185             continue;
2186         }
2187
2188         if ((sps->page_shift > best_page_shift)
2189             && ((1UL << sps->page_shift) <= rampagesize)) {
2190             best_page_shift = sps->page_shift;
2191         }
2192     }
2193
2194     return MIN(current_size,
2195                1ULL << (best_page_shift + hash_shift - 7));
2196 }
2197 #endif
2198
2199 bool kvmppc_spapr_use_multitce(void)
2200 {
2201     return cap_spapr_multitce;
2202 }
2203
2204 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2205                               bool need_vfio)
2206 {
2207     struct kvm_create_spapr_tce args = {
2208         .liobn = liobn,
2209         .window_size = window_size,
2210     };
2211     long len;
2212     int fd;
2213     void *table;
2214
2215     /* Must set fd to -1 so we don't try to munmap when called for
2216      * destroying the table, which the upper layers -will- do
2217      */
2218     *pfd = -1;
2219     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2220         return NULL;
2221     }
2222
2223     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2224     if (fd < 0) {
2225         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2226                 liobn);
2227         return NULL;
2228     }
2229
2230     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2231     /* FIXME: round this up to page size */
2232
2233     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2234     if (table == MAP_FAILED) {
2235         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2236                 liobn);
2237         close(fd);
2238         return NULL;
2239     }
2240
2241     *pfd = fd;
2242     return table;
2243 }
2244
2245 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2246 {
2247     long len;
2248
2249     if (fd < 0) {
2250         return -1;
2251     }
2252
2253     len = nb_table * sizeof(uint64_t);
2254     if ((munmap(table, len) < 0) ||
2255         (close(fd) < 0)) {
2256         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2257                 strerror(errno));
2258         /* Leak the table */
2259     }
2260
2261     return 0;
2262 }
2263
2264 int kvmppc_reset_htab(int shift_hint)
2265 {
2266     uint32_t shift = shift_hint;
2267
2268     if (!kvm_enabled()) {
2269         /* Full emulation, tell caller to allocate htab itself */
2270         return 0;
2271     }
2272     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2273         int ret;
2274         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2275         if (ret == -ENOTTY) {
2276             /* At least some versions of PR KVM advertise the
2277              * capability, but don't implement the ioctl().  Oops.
2278              * Return 0 so that we allocate the htab in qemu, as is
2279              * correct for PR. */
2280             return 0;
2281         } else if (ret < 0) {
2282             return ret;
2283         }
2284         return shift;
2285     }
2286
2287     /* We have a kernel that predates the htab reset calls.  For PR
2288      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2289      * this era, it has allocated a 16MB fixed size hash table already. */
2290     if (kvmppc_is_pr(kvm_state)) {
2291         /* PR - tell caller to allocate htab */
2292         return 0;
2293     } else {
2294         /* HV - assume 16MB kernel allocated htab */
2295         return 24;
2296     }
2297 }
2298
2299 static inline uint32_t mfpvr(void)
2300 {
2301     uint32_t pvr;
2302
2303     asm ("mfpvr %0"
2304          : "=r"(pvr));
2305     return pvr;
2306 }
2307
2308 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2309 {
2310     if (on) {
2311         *word |= flags;
2312     } else {
2313         *word &= ~flags;
2314     }
2315 }
2316
2317 static void kvmppc_host_cpu_initfn(Object *obj)
2318 {
2319     assert(kvm_enabled());
2320 }
2321
2322 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2323 {
2324     DeviceClass *dc = DEVICE_CLASS(oc);
2325     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2326     uint32_t vmx = kvmppc_get_vmx();
2327     uint32_t dfp = kvmppc_get_dfp();
2328     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2329     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2330
2331     /* Now fix up the class with information we can query from the host */
2332     pcc->pvr = mfpvr();
2333
2334     if (vmx != -1) {
2335         /* Only override when we know what the host supports */
2336         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2337         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2338     }
2339     if (dfp != -1) {
2340         /* Only override when we know what the host supports */
2341         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2342     }
2343
2344     if (dcache_size != -1) {
2345         pcc->l1_dcache_size = dcache_size;
2346     }
2347
2348     if (icache_size != -1) {
2349         pcc->l1_icache_size = icache_size;
2350     }
2351
2352     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2353     dc->cannot_destroy_with_object_finalize_yet = true;
2354 }
2355
2356 bool kvmppc_has_cap_epr(void)
2357 {
2358     return cap_epr;
2359 }
2360
2361 bool kvmppc_has_cap_htab_fd(void)
2362 {
2363     return cap_htab_fd;
2364 }
2365
2366 bool kvmppc_has_cap_fixup_hcalls(void)
2367 {
2368     return cap_fixup_hcalls;
2369 }
2370
2371 bool kvmppc_has_cap_htm(void)
2372 {
2373     return cap_htm;
2374 }
2375
2376 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2377 {
2378     ObjectClass *oc = OBJECT_CLASS(pcc);
2379
2380     while (oc && !object_class_is_abstract(oc)) {
2381         oc = object_class_get_parent(oc);
2382     }
2383     assert(oc);
2384
2385     return POWERPC_CPU_CLASS(oc);
2386 }
2387
2388 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2389 {
2390     uint32_t host_pvr = mfpvr();
2391     PowerPCCPUClass *pvr_pcc;
2392
2393     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2394     if (pvr_pcc == NULL) {
2395         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2396     }
2397
2398     return pvr_pcc;
2399 }
2400
2401 static int kvm_ppc_register_host_cpu_type(void)
2402 {
2403     TypeInfo type_info = {
2404         .name = TYPE_HOST_POWERPC_CPU,
2405         .instance_init = kvmppc_host_cpu_initfn,
2406         .class_init = kvmppc_host_cpu_class_init,
2407     };
2408     PowerPCCPUClass *pvr_pcc;
2409     DeviceClass *dc;
2410
2411     pvr_pcc = kvm_ppc_get_host_cpu_class();
2412     if (pvr_pcc == NULL) {
2413         return -1;
2414     }
2415     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2416     type_register(&type_info);
2417
2418     /* Register generic family CPU class for a family */
2419     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2420     dc = DEVICE_CLASS(pvr_pcc);
2421     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2422     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2423     type_register(&type_info);
2424
2425 #if defined(TARGET_PPC64)
2426     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2427     type_info.parent = TYPE_SPAPR_CPU_CORE,
2428     type_info.instance_size = sizeof(sPAPRCPUCore);
2429     type_info.instance_init = NULL;
2430     type_info.class_init = spapr_cpu_core_class_init;
2431     type_info.class_data = (void *) "host";
2432     type_register(&type_info);
2433     g_free((void *)type_info.name);
2434
2435     /* Register generic spapr CPU family class for current host CPU type */
2436     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2437     type_info.class_data = (void *) dc->desc;
2438     type_register(&type_info);
2439     g_free((void *)type_info.name);
2440 #endif
2441
2442     return 0;
2443 }
2444
2445 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2446 {
2447     struct kvm_rtas_token_args args = {
2448         .token = token,
2449     };
2450
2451     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2452         return -ENOENT;
2453     }
2454
2455     strncpy(args.name, function, sizeof(args.name));
2456
2457     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2458 }
2459
2460 int kvmppc_get_htab_fd(bool write)
2461 {
2462     struct kvm_get_htab_fd s = {
2463         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2464         .start_index = 0,
2465     };
2466
2467     if (!cap_htab_fd) {
2468         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2469         return -1;
2470     }
2471
2472     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2473 }
2474
2475 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2476 {
2477     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2478     uint8_t buf[bufsize];
2479     ssize_t rc;
2480
2481     do {
2482         rc = read(fd, buf, bufsize);
2483         if (rc < 0) {
2484             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2485                     strerror(errno));
2486             return rc;
2487         } else if (rc) {
2488             uint8_t *buffer = buf;
2489             ssize_t n = rc;
2490             while (n) {
2491                 struct kvm_get_htab_header *head =
2492                     (struct kvm_get_htab_header *) buffer;
2493                 size_t chunksize = sizeof(*head) +
2494                      HASH_PTE_SIZE_64 * head->n_valid;
2495
2496                 qemu_put_be32(f, head->index);
2497                 qemu_put_be16(f, head->n_valid);
2498                 qemu_put_be16(f, head->n_invalid);
2499                 qemu_put_buffer(f, (void *)(head + 1),
2500                                 HASH_PTE_SIZE_64 * head->n_valid);
2501
2502                 buffer += chunksize;
2503                 n -= chunksize;
2504             }
2505         }
2506     } while ((rc != 0)
2507              && ((max_ns < 0)
2508                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2509
2510     return (rc == 0) ? 1 : 0;
2511 }
2512
2513 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2514                            uint16_t n_valid, uint16_t n_invalid)
2515 {
2516     struct kvm_get_htab_header *buf;
2517     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2518     ssize_t rc;
2519
2520     buf = alloca(chunksize);
2521     buf->index = index;
2522     buf->n_valid = n_valid;
2523     buf->n_invalid = n_invalid;
2524
2525     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2526
2527     rc = write(fd, buf, chunksize);
2528     if (rc < 0) {
2529         fprintf(stderr, "Error writing KVM hash table: %s\n",
2530                 strerror(errno));
2531         return rc;
2532     }
2533     if (rc != chunksize) {
2534         /* We should never get a short write on a single chunk */
2535         fprintf(stderr, "Short write, restoring KVM hash table\n");
2536         return -1;
2537     }
2538     return 0;
2539 }
2540
2541 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2542 {
2543     return true;
2544 }
2545
2546 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2547 {
2548     return 1;
2549 }
2550
2551 int kvm_arch_on_sigbus(int code, void *addr)
2552 {
2553     return 1;
2554 }
2555
2556 void kvm_arch_init_irq_routing(KVMState *s)
2557 {
2558 }
2559
2560 struct kvm_get_htab_buf {
2561     struct kvm_get_htab_header header;
2562     /*
2563      * We require one extra byte for read
2564      */
2565     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2566 };
2567
2568 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2569 {
2570     int htab_fd;
2571     struct kvm_get_htab_fd ghf;
2572     struct kvm_get_htab_buf  *hpte_buf;
2573
2574     ghf.flags = 0;
2575     ghf.start_index = pte_index;
2576     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2577     if (htab_fd < 0) {
2578         goto error_out;
2579     }
2580
2581     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2582     /*
2583      * Read the hpte group
2584      */
2585     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2586         goto out_close;
2587     }
2588
2589     close(htab_fd);
2590     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2591
2592 out_close:
2593     g_free(hpte_buf);
2594     close(htab_fd);
2595 error_out:
2596     return 0;
2597 }
2598
2599 void kvmppc_hash64_free_pteg(uint64_t token)
2600 {
2601     struct kvm_get_htab_buf *htab_buf;
2602
2603     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2604                             hpte);
2605     g_free(htab_buf);
2606     return;
2607 }
2608
2609 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2610                              target_ulong pte0, target_ulong pte1)
2611 {
2612     int htab_fd;
2613     struct kvm_get_htab_fd ghf;
2614     struct kvm_get_htab_buf hpte_buf;
2615
2616     ghf.flags = 0;
2617     ghf.start_index = 0;     /* Ignored */
2618     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2619     if (htab_fd < 0) {
2620         goto error_out;
2621     }
2622
2623     hpte_buf.header.n_valid = 1;
2624     hpte_buf.header.n_invalid = 0;
2625     hpte_buf.header.index = pte_index;
2626     hpte_buf.hpte[0] = pte0;
2627     hpte_buf.hpte[1] = pte1;
2628     /*
2629      * Write the hpte entry.
2630      * CAUTION: write() has the warn_unused_result attribute. Hence we
2631      * need to check the return value, even though we do nothing.
2632      */
2633     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2634         goto out_close;
2635     }
2636
2637 out_close:
2638     close(htab_fd);
2639     return;
2640
2641 error_out:
2642     return;
2643 }
2644
2645 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2646                              uint64_t address, uint32_t data, PCIDevice *dev)
2647 {
2648     return 0;
2649 }
2650
2651 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2652                                 int vector, PCIDevice *dev)
2653 {
2654     return 0;
2655 }
2656
2657 int kvm_arch_release_virq_post(int virq)
2658 {
2659     return 0;
2660 }
2661
2662 int kvm_arch_msi_data_to_gsi(uint32_t data)
2663 {
2664     return data & 0xffff;
2665 }
2666
2667 int kvmppc_enable_hwrng(void)
2668 {
2669     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2670         return -1;
2671     }
2672
2673     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2674 }