arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2020
   6  *
   7  *    Author(s): Carsten Otte <[email protected]>
   8  *               Christian Borntraeger <[email protected]>
   9  *               Heiko Carstens <[email protected]>
  10  *               Christian Ehrhardt <[email protected]>
  11  *               Jason J. Herne <[email protected]>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34 #include <linux/pgtable.h>
  35
  36 #include <asm/asm-offsets.h>
  37 #include <asm/lowcore.h>
  38 #include <asm/stp.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include <asm/uv.h>
  48 #include <asm/fpu/api.h>
  49 #include "kvm-s390.h"
  50 #include "gaccess.h"
  51
  52 #define CREATE_TRACE_POINTS
  53 #include "trace.h"
  54 #include "trace-s390.h"
  55
  56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57 #define LOCAL_IRQS 32
  58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  62         KVM_GENERIC_VM_STATS(),
  63         STATS_DESC_COUNTER(VM, inject_io),
  64         STATS_DESC_COUNTER(VM, inject_float_mchk),
  65         STATS_DESC_COUNTER(VM, inject_pfault_done),
  66         STATS_DESC_COUNTER(VM, inject_service_signal),
  67         STATS_DESC_COUNTER(VM, inject_virtio)
  68 };
  69
  70 const struct kvm_stats_header kvm_vm_stats_header = {
  71         .name_size = KVM_STATS_NAME_SIZE,
  72         .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  73         .id_offset = sizeof(struct kvm_stats_header),
  74         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  75         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  76                        sizeof(kvm_vm_stats_desc),
  77 };
  78
  79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  80         KVM_GENERIC_VCPU_STATS(),
  81         STATS_DESC_COUNTER(VCPU, exit_userspace),
  82         STATS_DESC_COUNTER(VCPU, exit_null),
  83         STATS_DESC_COUNTER(VCPU, exit_external_request),
  84         STATS_DESC_COUNTER(VCPU, exit_io_request),
  85         STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  86         STATS_DESC_COUNTER(VCPU, exit_stop_request),
  87         STATS_DESC_COUNTER(VCPU, exit_validity),
  88         STATS_DESC_COUNTER(VCPU, exit_instruction),
  89         STATS_DESC_COUNTER(VCPU, exit_pei),
  90         STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
  91         STATS_DESC_COUNTER(VCPU, instruction_lctl),
  92         STATS_DESC_COUNTER(VCPU, instruction_lctlg),
  93         STATS_DESC_COUNTER(VCPU, instruction_stctl),
  94         STATS_DESC_COUNTER(VCPU, instruction_stctg),
  95         STATS_DESC_COUNTER(VCPU, exit_program_interruption),
  96         STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
  97         STATS_DESC_COUNTER(VCPU, exit_operation_exception),
  98         STATS_DESC_COUNTER(VCPU, deliver_ckc),
  99         STATS_DESC_COUNTER(VCPU, deliver_cputm),
 100         STATS_DESC_COUNTER(VCPU, deliver_external_call),
 101         STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 102         STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 103         STATS_DESC_COUNTER(VCPU, deliver_virtio),
 104         STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 105         STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 106         STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 107         STATS_DESC_COUNTER(VCPU, deliver_program),
 108         STATS_DESC_COUNTER(VCPU, deliver_io),
 109         STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 110         STATS_DESC_COUNTER(VCPU, exit_wait_state),
 111         STATS_DESC_COUNTER(VCPU, inject_ckc),
 112         STATS_DESC_COUNTER(VCPU, inject_cputm),
 113         STATS_DESC_COUNTER(VCPU, inject_external_call),
 114         STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 115         STATS_DESC_COUNTER(VCPU, inject_mchk),
 116         STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 117         STATS_DESC_COUNTER(VCPU, inject_program),
 118         STATS_DESC_COUNTER(VCPU, inject_restart),
 119         STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 120         STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 121         STATS_DESC_COUNTER(VCPU, instruction_epsw),
 122         STATS_DESC_COUNTER(VCPU, instruction_gs),
 123         STATS_DESC_COUNTER(VCPU, instruction_io_other),
 124         STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 125         STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 126         STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 127         STATS_DESC_COUNTER(VCPU, instruction_ptff),
 128         STATS_DESC_COUNTER(VCPU, instruction_sck),
 129         STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 130         STATS_DESC_COUNTER(VCPU, instruction_stidp),
 131         STATS_DESC_COUNTER(VCPU, instruction_spx),
 132         STATS_DESC_COUNTER(VCPU, instruction_stpx),
 133         STATS_DESC_COUNTER(VCPU, instruction_stap),
 134         STATS_DESC_COUNTER(VCPU, instruction_iske),
 135         STATS_DESC_COUNTER(VCPU, instruction_ri),
 136         STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 137         STATS_DESC_COUNTER(VCPU, instruction_sske),
 138         STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 139         STATS_DESC_COUNTER(VCPU, instruction_stsi),
 140         STATS_DESC_COUNTER(VCPU, instruction_stfl),
 141         STATS_DESC_COUNTER(VCPU, instruction_tb),
 142         STATS_DESC_COUNTER(VCPU, instruction_tpi),
 143         STATS_DESC_COUNTER(VCPU, instruction_tprot),
 144         STATS_DESC_COUNTER(VCPU, instruction_tsch),
 145         STATS_DESC_COUNTER(VCPU, instruction_sie),
 146         STATS_DESC_COUNTER(VCPU, instruction_essa),
 147         STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 148         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 149         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 150         STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 151         STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 152         STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 153         STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 154         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 155         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 156         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 157         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 158         STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 159         STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 160         STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 161         STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 162         STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 163         STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 164         STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 165         STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 166         STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 167         STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 168         STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 169         STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 170         STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 171         STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 172         STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 173         STATS_DESC_COUNTER(VCPU, pfault_sync)
 174 };
 175
 176 const struct kvm_stats_header kvm_vcpu_stats_header = {
 177         .name_size = KVM_STATS_NAME_SIZE,
 178         .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 179         .id_offset = sizeof(struct kvm_stats_header),
 180         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 181         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 182                        sizeof(kvm_vcpu_stats_desc),
 183 };
 184
 185 /* allow nested virtualization in KVM (if enabled by user space) */
 186 static int nested;
 187 module_param(nested, int, S_IRUGO);
 188 MODULE_PARM_DESC(nested, "Nested virtualization support");
 189
 190 /* allow 1m huge page guest backing, if !nested */
 191 static int hpage;
 192 module_param(hpage, int, 0444);
 193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 194
 195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 196 static u8 halt_poll_max_steal = 10;
 197 module_param(halt_poll_max_steal, byte, 0644);
 198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 199
 200 /* if set to true, the GISA will be initialized and used if available */
 201 static bool use_gisa  = true;
 202 module_param(use_gisa, bool, 0644);
 203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 204
 205 /* maximum diag9c forwarding per second */
 206 unsigned int diag9c_forwarding_hz;
 207 module_param(diag9c_forwarding_hz, uint, 0644);
 208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 209
 210 /*
 211  * For now we handle at most 16 double words as this is what the s390 base
 212  * kernel handles and stores in the prefix page. If we ever need to go beyond
 213  * this, this requires changes to code, but the external uapi can stay.
 214  */
 215 #define SIZE_INTERNAL 16
 216
 217 /*
 218  * Base feature mask that defines default mask for facilities. Consists of the
 219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 220  */
 221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 222 /*
 223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 224  * and defines the facilities that can be enabled via a cpu model.
 225  */
 226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 227
 228 static unsigned long kvm_s390_fac_size(void)
 229 {
 230         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 231         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 232         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 233                 sizeof(stfle_fac_list));
 234
 235         return SIZE_INTERNAL;
 236 }
 237
 238 /* available cpu features supported by kvm */
 239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 240 /* available subfunctions indicated via query / "test bit" */
 241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 242
 243 static struct gmap_notifier gmap_notifier;
 244 static struct gmap_notifier vsie_gmap_notifier;
 245 debug_info_t *kvm_s390_dbf;
 246 debug_info_t *kvm_s390_dbf_uv;
 247
 248 /* Section: not file related */
 249 int kvm_arch_hardware_enable(void)
 250 {
 251         /* every s390 is virtualization enabled ;-) */
 252         return 0;
 253 }
 254
 255 int kvm_arch_check_processor_compat(void *opaque)
 256 {
 257         return 0;
 258 }
 259
 260 /* forward declarations */
 261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 262                               unsigned long end);
 263 static int sca_switch_to_extended(struct kvm *kvm);
 264
 265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 266 {
 267         u8 delta_idx = 0;
 268
 269         /*
 270          * The TOD jumps by delta, we have to compensate this by adding
 271          * -delta to the epoch.
 272          */
 273         delta = -delta;
 274
 275         /* sign-extension - we're adding to signed values below */
 276         if ((s64)delta < 0)
 277                 delta_idx = -1;
 278
 279         scb->epoch += delta;
 280         if (scb->ecd & ECD_MEF) {
 281                 scb->epdx += delta_idx;
 282                 if (scb->epoch < delta)
 283                         scb->epdx += 1;
 284         }
 285 }
 286
 287 /*
 288  * This callback is executed during stop_machine(). All CPUs are therefore
 289  * temporarily stopped. In order not to change guest behavior, we have to
 290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 291  * so a CPU won't be stopped while calculating with the epoch.
 292  */
 293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 294                           void *v)
 295 {
 296         struct kvm *kvm;
 297         struct kvm_vcpu *vcpu;
 298         int i;
 299         unsigned long long *delta = v;
 300
 301         list_for_each_entry(kvm, &vm_list, vm_list) {
 302                 kvm_for_each_vcpu(i, vcpu, kvm) {
 303                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 304                         if (i == 0) {
 305                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 306                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 307                         }
 308                         if (vcpu->arch.cputm_enabled)
 309                                 vcpu->arch.cputm_start += *delta;
 310                         if (vcpu->arch.vsie_block)
 311                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 312                                                    *delta);
 313                 }
 314         }
 315         return NOTIFY_OK;
 316 }
 317
 318 static struct notifier_block kvm_clock_notifier = {
 319         .notifier_call = kvm_clock_sync,
 320 };
 321
 322 int kvm_arch_hardware_setup(void *opaque)
 323 {
 324         gmap_notifier.notifier_call = kvm_gmap_notifier;
 325         gmap_register_pte_notifier(&gmap_notifier);
 326         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 327         gmap_register_pte_notifier(&vsie_gmap_notifier);
 328         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 329                                        &kvm_clock_notifier);
 330         return 0;
 331 }
 332
 333 void kvm_arch_hardware_unsetup(void)
 334 {
 335         gmap_unregister_pte_notifier(&gmap_notifier);
 336         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 337         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 338                                          &kvm_clock_notifier);
 339 }
 340
 341 static void allow_cpu_feat(unsigned long nr)
 342 {
 343         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 344 }
 345
 346 static inline int plo_test_bit(unsigned char nr)
 347 {
 348         unsigned long function = (unsigned long)nr | 0x100;
 349         int cc;
 350
 351         asm volatile(
 352                 "       lgr     0,%[function]\n"
 353                 /* Parameter registers are ignored for "test bit" */
 354                 "       plo     0,0,0,0(0)\n"
 355                 "       ipm     %0\n"
 356                 "       srl     %0,28\n"
 357                 : "=d" (cc)
 358                 : [function] "d" (function)
 359                 : "cc", "0");
 360         return cc == 0;
 361 }
 362
 363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 364 {
 365         asm volatile(
 366                 "       lghi    0,0\n"
 367                 "       lgr     1,%[query]\n"
 368                 /* Parameter registers are ignored */
 369                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 370                 :
 371                 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
 372                 : "cc", "memory", "0", "1");
 373 }
 374
 375 #define INSN_SORTL 0xb938
 376 #define INSN_DFLTCC 0xb939
 377
 378 static void kvm_s390_cpu_feat_init(void)
 379 {
 380         int i;
 381
 382         for (i = 0; i < 256; ++i) {
 383                 if (plo_test_bit(i))
 384                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 385         }
 386
 387         if (test_facility(28)) /* TOD-clock steering */
 388                 ptff(kvm_s390_available_subfunc.ptff,
 389                      sizeof(kvm_s390_available_subfunc.ptff),
 390                      PTFF_QAF);
 391
 392         if (test_facility(17)) { /* MSA */
 393                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 394                               kvm_s390_available_subfunc.kmac);
 395                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 396                               kvm_s390_available_subfunc.kmc);
 397                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 398                               kvm_s390_available_subfunc.km);
 399                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 400                               kvm_s390_available_subfunc.kimd);
 401                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 402                               kvm_s390_available_subfunc.klmd);
 403         }
 404         if (test_facility(76)) /* MSA3 */
 405                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 406                               kvm_s390_available_subfunc.pckmo);
 407         if (test_facility(77)) { /* MSA4 */
 408                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 409                               kvm_s390_available_subfunc.kmctr);
 410                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 411                               kvm_s390_available_subfunc.kmf);
 412                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 413                               kvm_s390_available_subfunc.kmo);
 414                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 415                               kvm_s390_available_subfunc.pcc);
 416         }
 417         if (test_facility(57)) /* MSA5 */
 418                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 419                               kvm_s390_available_subfunc.ppno);
 420
 421         if (test_facility(146)) /* MSA8 */
 422                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 423                               kvm_s390_available_subfunc.kma);
 424
 425         if (test_facility(155)) /* MSA9 */
 426                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 427                               kvm_s390_available_subfunc.kdsa);
 428
 429         if (test_facility(150)) /* SORTL */
 430                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 431
 432         if (test_facility(151)) /* DFLTCC */
 433                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 434
 435         if (MACHINE_HAS_ESOP)
 436                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 437         /*
 438          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 439          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 440          */
 441         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 442             !test_facility(3) || !nested)
 443                 return;
 444         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 445         if (sclp.has_64bscao)
 446                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 447         if (sclp.has_siif)
 448                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 449         if (sclp.has_gpere)
 450                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 451         if (sclp.has_gsls)
 452                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 453         if (sclp.has_ib)
 454                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 455         if (sclp.has_cei)
 456                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 457         if (sclp.has_ibs)
 458                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 459         if (sclp.has_kss)
 460                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 461         /*
 462          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 463          * all skey handling functions read/set the skey from the PGSTE
 464          * instead of the real storage key.
 465          *
 466          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 467          * pages being detected as preserved although they are resident.
 468          *
 469          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 470          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 471          *
 472          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 473          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 474          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 475          *
 476          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 477          * cannot easily shadow the SCA because of the ipte lock.
 478          */
 479 }
 480
 481 int kvm_arch_init(void *opaque)
 482 {
 483         int rc = -ENOMEM;
 484
 485         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 486         if (!kvm_s390_dbf)
 487                 return -ENOMEM;
 488
 489         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 490         if (!kvm_s390_dbf_uv)
 491                 goto out;
 492
 493         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 494             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 495                 goto out;
 496
 497         kvm_s390_cpu_feat_init();
 498
 499         /* Register floating interrupt controller interface. */
 500         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 501         if (rc) {
 502                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 503                 goto out;
 504         }
 505
 506         rc = kvm_s390_gib_init(GAL_ISC);
 507         if (rc)
 508                 goto out;
 509
 510         return 0;
 511
 512 out:
 513         kvm_arch_exit();
 514         return rc;
 515 }
 516
 517 void kvm_arch_exit(void)
 518 {
 519         kvm_s390_gib_destroy();
 520         debug_unregister(kvm_s390_dbf);
 521         debug_unregister(kvm_s390_dbf_uv);
 522 }
 523
 524 /* Section: device related */
 525 long kvm_arch_dev_ioctl(struct file *filp,
 526                         unsigned int ioctl, unsigned long arg)
 527 {
 528         if (ioctl == KVM_S390_ENABLE_SIE)
 529                 return s390_enable_sie();
 530         return -EINVAL;
 531 }
 532
 533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 534 {
 535         int r;
 536
 537         switch (ext) {
 538         case KVM_CAP_S390_PSW:
 539         case KVM_CAP_S390_GMAP:
 540         case KVM_CAP_SYNC_MMU:
 541 #ifdef CONFIG_KVM_S390_UCONTROL
 542         case KVM_CAP_S390_UCONTROL:
 543 #endif
 544         case KVM_CAP_ASYNC_PF:
 545         case KVM_CAP_SYNC_REGS:
 546         case KVM_CAP_ONE_REG:
 547         case KVM_CAP_ENABLE_CAP:
 548         case KVM_CAP_S390_CSS_SUPPORT:
 549         case KVM_CAP_IOEVENTFD:
 550         case KVM_CAP_DEVICE_CTRL:
 551         case KVM_CAP_S390_IRQCHIP:
 552         case KVM_CAP_VM_ATTRIBUTES:
 553         case KVM_CAP_MP_STATE:
 554         case KVM_CAP_IMMEDIATE_EXIT:
 555         case KVM_CAP_S390_INJECT_IRQ:
 556         case KVM_CAP_S390_USER_SIGP:
 557         case KVM_CAP_S390_USER_STSI:
 558         case KVM_CAP_S390_SKEYS:
 559         case KVM_CAP_S390_IRQ_STATE:
 560         case KVM_CAP_S390_USER_INSTR0:
 561         case KVM_CAP_S390_CMMA_MIGRATION:
 562         case KVM_CAP_S390_AIS:
 563         case KVM_CAP_S390_AIS_MIGRATION:
 564         case KVM_CAP_S390_VCPU_RESETS:
 565         case KVM_CAP_SET_GUEST_DEBUG:
 566         case KVM_CAP_S390_DIAG318:
 567                 r = 1;
 568                 break;
 569         case KVM_CAP_SET_GUEST_DEBUG2:
 570                 r = KVM_GUESTDBG_VALID_MASK;
 571                 break;
 572         case KVM_CAP_S390_HPAGE_1M:
 573                 r = 0;
 574                 if (hpage && !kvm_is_ucontrol(kvm))
 575                         r = 1;
 576                 break;
 577         case KVM_CAP_S390_MEM_OP:
 578                 r = MEM_OP_MAX_SIZE;
 579                 break;
 580         case KVM_CAP_NR_VCPUS:
 581         case KVM_CAP_MAX_VCPUS:
 582         case KVM_CAP_MAX_VCPU_ID:
 583                 r = KVM_S390_BSCA_CPU_SLOTS;
 584                 if (!kvm_s390_use_sca_entries())
 585                         r = KVM_MAX_VCPUS;
 586                 else if (sclp.has_esca && sclp.has_64bscao)
 587                         r = KVM_S390_ESCA_CPU_SLOTS;
 588                 if (ext == KVM_CAP_NR_VCPUS)
 589                         r = min_t(unsigned int, num_online_cpus(), r);
 590                 break;
 591         case KVM_CAP_S390_COW:
 592                 r = MACHINE_HAS_ESOP;
 593                 break;
 594         case KVM_CAP_S390_VECTOR_REGISTERS:
 595                 r = MACHINE_HAS_VX;
 596                 break;
 597         case KVM_CAP_S390_RI:
 598                 r = test_facility(64);
 599                 break;
 600         case KVM_CAP_S390_GS:
 601                 r = test_facility(133);
 602                 break;
 603         case KVM_CAP_S390_BPB:
 604                 r = test_facility(82);
 605                 break;
 606         case KVM_CAP_S390_PROTECTED:
 607                 r = is_prot_virt_host();
 608                 break;
 609         default:
 610                 r = 0;
 611         }
 612         return r;
 613 }
 614
 615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 616 {
 617         int i;
 618         gfn_t cur_gfn, last_gfn;
 619         unsigned long gaddr, vmaddr;
 620         struct gmap *gmap = kvm->arch.gmap;
 621         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 622
 623         /* Loop over all guest segments */
 624         cur_gfn = memslot->base_gfn;
 625         last_gfn = memslot->base_gfn + memslot->npages;
 626         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 627                 gaddr = gfn_to_gpa(cur_gfn);
 628                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 629                 if (kvm_is_error_hva(vmaddr))
 630                         continue;
 631
 632                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 633                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 634                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 635                         if (test_bit(i, bitmap))
 636                                 mark_page_dirty(kvm, cur_gfn + i);
 637                 }
 638
 639                 if (fatal_signal_pending(current))
 640                         return;
 641                 cond_resched();
 642         }
 643 }
 644
 645 /* Section: vm related */
 646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 647
 648 /*
 649  * Get (and clear) the dirty memory log for a memory slot.
 650  */
 651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 652                                struct kvm_dirty_log *log)
 653 {
 654         int r;
 655         unsigned long n;
 656         struct kvm_memory_slot *memslot;
 657         int is_dirty;
 658
 659         if (kvm_is_ucontrol(kvm))
 660                 return -EINVAL;
 661
 662         mutex_lock(&kvm->slots_lock);
 663
 664         r = -EINVAL;
 665         if (log->slot >= KVM_USER_MEM_SLOTS)
 666                 goto out;
 667
 668         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 669         if (r)
 670                 goto out;
 671
 672         /* Clear the dirty log */
 673         if (is_dirty) {
 674                 n = kvm_dirty_bitmap_bytes(memslot);
 675                 memset(memslot->dirty_bitmap, 0, n);
 676         }
 677         r = 0;
 678 out:
 679         mutex_unlock(&kvm->slots_lock);
 680         return r;
 681 }
 682
 683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 684 {
 685         unsigned int i;
 686         struct kvm_vcpu *vcpu;
 687
 688         kvm_for_each_vcpu(i, vcpu, kvm) {
 689                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 690         }
 691 }
 692
 693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 694 {
 695         int r;
 696
 697         if (cap->flags)
 698                 return -EINVAL;
 699
 700         switch (cap->cap) {
 701         case KVM_CAP_S390_IRQCHIP:
 702                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 703                 kvm->arch.use_irqchip = 1;
 704                 r = 0;
 705                 break;
 706         case KVM_CAP_S390_USER_SIGP:
 707                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 708                 kvm->arch.user_sigp = 1;
 709                 r = 0;
 710                 break;
 711         case KVM_CAP_S390_VECTOR_REGISTERS:
 712                 mutex_lock(&kvm->lock);
 713                 if (kvm->created_vcpus) {
 714                         r = -EBUSY;
 715                 } else if (MACHINE_HAS_VX) {
 716                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 717                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 718                         if (test_facility(134)) {
 719                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 720                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 721                         }
 722                         if (test_facility(135)) {
 723                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 724                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 725                         }
 726                         if (test_facility(148)) {
 727                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 728                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 729                         }
 730                         if (test_facility(152)) {
 731                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 732                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 733                         }
 734                         if (test_facility(192)) {
 735                                 set_kvm_facility(kvm->arch.model.fac_mask, 192);
 736                                 set_kvm_facility(kvm->arch.model.fac_list, 192);
 737                         }
 738                         r = 0;
 739                 } else
 740                         r = -EINVAL;
 741                 mutex_unlock(&kvm->lock);
 742                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 743                          r ? "(not available)" : "(success)");
 744                 break;
 745         case KVM_CAP_S390_RI:
 746                 r = -EINVAL;
 747                 mutex_lock(&kvm->lock);
 748                 if (kvm->created_vcpus) {
 749                         r = -EBUSY;
 750                 } else if (test_facility(64)) {
 751                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 752                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 753                         r = 0;
 754                 }
 755                 mutex_unlock(&kvm->lock);
 756                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 757                          r ? "(not available)" : "(success)");
 758                 break;
 759         case KVM_CAP_S390_AIS:
 760                 mutex_lock(&kvm->lock);
 761                 if (kvm->created_vcpus) {
 762                         r = -EBUSY;
 763                 } else {
 764                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 765                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 766                         r = 0;
 767                 }
 768                 mutex_unlock(&kvm->lock);
 769                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 770                          r ? "(not available)" : "(success)");
 771                 break;
 772         case KVM_CAP_S390_GS:
 773                 r = -EINVAL;
 774                 mutex_lock(&kvm->lock);
 775                 if (kvm->created_vcpus) {
 776                         r = -EBUSY;
 777                 } else if (test_facility(133)) {
 778                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 779                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 780                         r = 0;
 781                 }
 782                 mutex_unlock(&kvm->lock);
 783                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 784                          r ? "(not available)" : "(success)");
 785                 break;
 786         case KVM_CAP_S390_HPAGE_1M:
 787                 mutex_lock(&kvm->lock);
 788                 if (kvm->created_vcpus)
 789                         r = -EBUSY;
 790                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 791                         r = -EINVAL;
 792                 else {
 793                         r = 0;
 794                         mmap_write_lock(kvm->mm);
 795                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 796                         mmap_write_unlock(kvm->mm);
 797                         /*
 798                          * We might have to create fake 4k page
 799                          * tables. To avoid that the hardware works on
 800                          * stale PGSTEs, we emulate these instructions.
 801                          */
 802                         kvm->arch.use_skf = 0;
 803                         kvm->arch.use_pfmfi = 0;
 804                 }
 805                 mutex_unlock(&kvm->lock);
 806                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 807                          r ? "(not available)" : "(success)");
 808                 break;
 809         case KVM_CAP_S390_USER_STSI:
 810                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 811                 kvm->arch.user_stsi = 1;
 812                 r = 0;
 813                 break;
 814         case KVM_CAP_S390_USER_INSTR0:
 815                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 816                 kvm->arch.user_instr0 = 1;
 817                 icpt_operexc_on_all_vcpus(kvm);
 818                 r = 0;
 819                 break;
 820         default:
 821                 r = -EINVAL;
 822                 break;
 823         }
 824         return r;
 825 }
 826
 827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 828 {
 829         int ret;
 830
 831         switch (attr->attr) {
 832         case KVM_S390_VM_MEM_LIMIT_SIZE:
 833                 ret = 0;
 834                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 835                          kvm->arch.mem_limit);
 836                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 837                         ret = -EFAULT;
 838                 break;
 839         default:
 840                 ret = -ENXIO;
 841                 break;
 842         }
 843         return ret;
 844 }
 845
 846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 847 {
 848         int ret;
 849         unsigned int idx;
 850         switch (attr->attr) {
 851         case KVM_S390_VM_MEM_ENABLE_CMMA:
 852                 ret = -ENXIO;
 853                 if (!sclp.has_cmma)
 854                         break;
 855
 856                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 857                 mutex_lock(&kvm->lock);
 858                 if (kvm->created_vcpus)
 859                         ret = -EBUSY;
 860                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 861                         ret = -EINVAL;
 862                 else {
 863                         kvm->arch.use_cmma = 1;
 864                         /* Not compatible with cmma. */
 865                         kvm->arch.use_pfmfi = 0;
 866                         ret = 0;
 867                 }
 868                 mutex_unlock(&kvm->lock);
 869                 break;
 870         case KVM_S390_VM_MEM_CLR_CMMA:
 871                 ret = -ENXIO;
 872                 if (!sclp.has_cmma)
 873                         break;
 874                 ret = -EINVAL;
 875                 if (!kvm->arch.use_cmma)
 876                         break;
 877
 878                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 879                 mutex_lock(&kvm->lock);
 880                 idx = srcu_read_lock(&kvm->srcu);
 881                 s390_reset_cmma(kvm->arch.gmap->mm);
 882                 srcu_read_unlock(&kvm->srcu, idx);
 883                 mutex_unlock(&kvm->lock);
 884                 ret = 0;
 885                 break;
 886         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 887                 unsigned long new_limit;
 888
 889                 if (kvm_is_ucontrol(kvm))
 890                         return -EINVAL;
 891
 892                 if (get_user(new_limit, (u64 __user *)attr->addr))
 893                         return -EFAULT;
 894
 895                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 896                     new_limit > kvm->arch.mem_limit)
 897                         return -E2BIG;
 898
 899                 if (!new_limit)
 900                         return -EINVAL;
 901
 902                 /* gmap_create takes last usable address */
 903                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 904                         new_limit -= 1;
 905
 906                 ret = -EBUSY;
 907                 mutex_lock(&kvm->lock);
 908                 if (!kvm->created_vcpus) {
 909                         /* gmap_create will round the limit up */
 910                         struct gmap *new = gmap_create(current->mm, new_limit);
 911
 912                         if (!new) {
 913                                 ret = -ENOMEM;
 914                         } else {
 915                                 gmap_remove(kvm->arch.gmap);
 916                                 new->private = kvm;
 917                                 kvm->arch.gmap = new;
 918                                 ret = 0;
 919                         }
 920                 }
 921                 mutex_unlock(&kvm->lock);
 922                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 923                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 924                          (void *) kvm->arch.gmap->asce);
 925                 break;
 926         }
 927         default:
 928                 ret = -ENXIO;
 929                 break;
 930         }
 931         return ret;
 932 }
 933
 934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 935
 936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 937 {
 938         struct kvm_vcpu *vcpu;
 939         int i;
 940
 941         kvm_s390_vcpu_block_all(kvm);
 942
 943         kvm_for_each_vcpu(i, vcpu, kvm) {
 944                 kvm_s390_vcpu_crypto_setup(vcpu);
 945                 /* recreate the shadow crycb by leaving the VSIE handler */
 946                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 947         }
 948
 949         kvm_s390_vcpu_unblock_all(kvm);
 950 }
 951
 952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 953 {
 954         mutex_lock(&kvm->lock);
 955         switch (attr->attr) {
 956         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 957                 if (!test_kvm_facility(kvm, 76)) {
 958                         mutex_unlock(&kvm->lock);
 959                         return -EINVAL;
 960                 }
 961                 get_random_bytes(
 962                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 963                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 964                 kvm->arch.crypto.aes_kw = 1;
 965                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 966                 break;
 967         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 968                 if (!test_kvm_facility(kvm, 76)) {
 969                         mutex_unlock(&kvm->lock);
 970                         return -EINVAL;
 971                 }
 972                 get_random_bytes(
 973                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 974                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 975                 kvm->arch.crypto.dea_kw = 1;
 976                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 977                 break;
 978         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 979                 if (!test_kvm_facility(kvm, 76)) {
 980                         mutex_unlock(&kvm->lock);
 981                         return -EINVAL;
 982                 }
 983                 kvm->arch.crypto.aes_kw = 0;
 984                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 985                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 986                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 987                 break;
 988         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 989                 if (!test_kvm_facility(kvm, 76)) {
 990                         mutex_unlock(&kvm->lock);
 991                         return -EINVAL;
 992                 }
 993                 kvm->arch.crypto.dea_kw = 0;
 994                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 995                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 996                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 997                 break;
 998         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 999                 if (!ap_instructions_available()) {
1000                         mutex_unlock(&kvm->lock);
1001                         return -EOPNOTSUPP;
1002                 }
1003                 kvm->arch.crypto.apie = 1;
1004                 break;
1005         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006                 if (!ap_instructions_available()) {
1007                         mutex_unlock(&kvm->lock);
1008                         return -EOPNOTSUPP;
1009                 }
1010                 kvm->arch.crypto.apie = 0;
1011                 break;
1012         default:
1013                 mutex_unlock(&kvm->lock);
1014                 return -ENXIO;
1015         }
1016
1017         kvm_s390_vcpu_crypto_reset_all(kvm);
1018         mutex_unlock(&kvm->lock);
1019         return 0;
1020 }
1021
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024         int cx;
1025         struct kvm_vcpu *vcpu;
1026
1027         kvm_for_each_vcpu(cx, vcpu, kvm)
1028                 kvm_s390_sync_request(req, vcpu);
1029 }
1030
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037         struct kvm_memory_slot *ms;
1038         struct kvm_memslots *slots;
1039         unsigned long ram_pages = 0;
1040         int slotnr;
1041
1042         /* migration mode already enabled */
1043         if (kvm->arch.migration_mode)
1044                 return 0;
1045         slots = kvm_memslots(kvm);
1046         if (!slots || !slots->used_slots)
1047                 return -EINVAL;
1048
1049         if (!kvm->arch.use_cmma) {
1050                 kvm->arch.migration_mode = 1;
1051                 return 0;
1052         }
1053         /* mark all the pages in active slots as dirty */
1054         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1055                 ms = slots->memslots + slotnr;
1056                 if (!ms->dirty_bitmap)
1057                         return -EINVAL;
1058                 /*
1059                  * The second half of the bitmap is only used on x86,
1060                  * and would be wasted otherwise, so we put it to good
1061                  * use here to keep track of the state of the storage
1062                  * attributes.
1063                  */
1064                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1065                 ram_pages += ms->npages;
1066         }
1067         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1068         kvm->arch.migration_mode = 1;
1069         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1070         return 0;
1071 }
1072
1073 /*
1074  * Must be called with kvm->slots_lock to avoid races with ourselves and
1075  * kvm_s390_vm_start_migration.
1076  */
1077 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1078 {
1079         /* migration mode already disabled */
1080         if (!kvm->arch.migration_mode)
1081                 return 0;
1082         kvm->arch.migration_mode = 0;
1083         if (kvm->arch.use_cmma)
1084                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1085         return 0;
1086 }
1087
1088 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1089                                      struct kvm_device_attr *attr)
1090 {
1091         int res = -ENXIO;
1092
1093         mutex_lock(&kvm->slots_lock);
1094         switch (attr->attr) {
1095         case KVM_S390_VM_MIGRATION_START:
1096                 res = kvm_s390_vm_start_migration(kvm);
1097                 break;
1098         case KVM_S390_VM_MIGRATION_STOP:
1099                 res = kvm_s390_vm_stop_migration(kvm);
1100                 break;
1101         default:
1102                 break;
1103         }
1104         mutex_unlock(&kvm->slots_lock);
1105
1106         return res;
1107 }
1108
1109 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1110                                      struct kvm_device_attr *attr)
1111 {
1112         u64 mig = kvm->arch.migration_mode;
1113
1114         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1115                 return -ENXIO;
1116
1117         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1118                 return -EFAULT;
1119         return 0;
1120 }
1121
1122 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1123 {
1124         struct kvm_s390_vm_tod_clock gtod;
1125
1126         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1127                 return -EFAULT;
1128
1129         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1130                 return -EINVAL;
1131         kvm_s390_set_tod_clock(kvm, &gtod);
1132
1133         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1134                 gtod.epoch_idx, gtod.tod);
1135
1136         return 0;
1137 }
1138
1139 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141         u8 gtod_high;
1142
1143         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1144                                            sizeof(gtod_high)))
1145                 return -EFAULT;
1146
1147         if (gtod_high != 0)
1148                 return -EINVAL;
1149         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1150
1151         return 0;
1152 }
1153
1154 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1155 {
1156         struct kvm_s390_vm_tod_clock gtod = { 0 };
1157
1158         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1159                            sizeof(gtod.tod)))
1160                 return -EFAULT;
1161
1162         kvm_s390_set_tod_clock(kvm, &gtod);
1163         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1164         return 0;
1165 }
1166
1167 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1168 {
1169         int ret;
1170
1171         if (attr->flags)
1172                 return -EINVAL;
1173
1174         switch (attr->attr) {
1175         case KVM_S390_VM_TOD_EXT:
1176                 ret = kvm_s390_set_tod_ext(kvm, attr);
1177                 break;
1178         case KVM_S390_VM_TOD_HIGH:
1179                 ret = kvm_s390_set_tod_high(kvm, attr);
1180                 break;
1181         case KVM_S390_VM_TOD_LOW:
1182                 ret = kvm_s390_set_tod_low(kvm, attr);
1183                 break;
1184         default:
1185                 ret = -ENXIO;
1186                 break;
1187         }
1188         return ret;
1189 }
1190
1191 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1192                                    struct kvm_s390_vm_tod_clock *gtod)
1193 {
1194         union tod_clock clk;
1195
1196         preempt_disable();
1197
1198         store_tod_clock_ext(&clk);
1199
1200         gtod->tod = clk.tod + kvm->arch.epoch;
1201         gtod->epoch_idx = 0;
1202         if (test_kvm_facility(kvm, 139)) {
1203                 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1204                 if (gtod->tod < clk.tod)
1205                         gtod->epoch_idx += 1;
1206         }
1207
1208         preempt_enable();
1209 }
1210
1211 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1212 {
1213         struct kvm_s390_vm_tod_clock gtod;
1214
1215         memset(&gtod, 0, sizeof(gtod));
1216         kvm_s390_get_tod_clock(kvm, &gtod);
1217         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1218                 return -EFAULT;
1219
1220         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1221                 gtod.epoch_idx, gtod.tod);
1222         return 0;
1223 }
1224
1225 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1226 {
1227         u8 gtod_high = 0;
1228
1229         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1230                                          sizeof(gtod_high)))
1231                 return -EFAULT;
1232         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1233
1234         return 0;
1235 }
1236
1237 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1238 {
1239         u64 gtod;
1240
1241         gtod = kvm_s390_get_tod_clock_fast(kvm);
1242         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1243                 return -EFAULT;
1244         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1245
1246         return 0;
1247 }
1248
1249 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1250 {
1251         int ret;
1252
1253         if (attr->flags)
1254                 return -EINVAL;
1255
1256         switch (attr->attr) {
1257         case KVM_S390_VM_TOD_EXT:
1258                 ret = kvm_s390_get_tod_ext(kvm, attr);
1259                 break;
1260         case KVM_S390_VM_TOD_HIGH:
1261                 ret = kvm_s390_get_tod_high(kvm, attr);
1262                 break;
1263         case KVM_S390_VM_TOD_LOW:
1264                 ret = kvm_s390_get_tod_low(kvm, attr);
1265                 break;
1266         default:
1267                 ret = -ENXIO;
1268                 break;
1269         }
1270         return ret;
1271 }
1272
1273 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1274 {
1275         struct kvm_s390_vm_cpu_processor *proc;
1276         u16 lowest_ibc, unblocked_ibc;
1277         int ret = 0;
1278
1279         mutex_lock(&kvm->lock);
1280         if (kvm->created_vcpus) {
1281                 ret = -EBUSY;
1282                 goto out;
1283         }
1284         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1285         if (!proc) {
1286                 ret = -ENOMEM;
1287                 goto out;
1288         }
1289         if (!copy_from_user(proc, (void __user *)attr->addr,
1290                             sizeof(*proc))) {
1291                 kvm->arch.model.cpuid = proc->cpuid;
1292                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1293                 unblocked_ibc = sclp.ibc & 0xfff;
1294                 if (lowest_ibc && proc->ibc) {
1295                         if (proc->ibc > unblocked_ibc)
1296                                 kvm->arch.model.ibc = unblocked_ibc;
1297                         else if (proc->ibc < lowest_ibc)
1298                                 kvm->arch.model.ibc = lowest_ibc;
1299                         else
1300                                 kvm->arch.model.ibc = proc->ibc;
1301                 }
1302                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1303                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1304                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1305                          kvm->arch.model.ibc,
1306                          kvm->arch.model.cpuid);
1307                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1308                          kvm->arch.model.fac_list[0],
1309                          kvm->arch.model.fac_list[1],
1310                          kvm->arch.model.fac_list[2]);
1311         } else
1312                 ret = -EFAULT;
1313         kfree(proc);
1314 out:
1315         mutex_unlock(&kvm->lock);
1316         return ret;
1317 }
1318
1319 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1320                                        struct kvm_device_attr *attr)
1321 {
1322         struct kvm_s390_vm_cpu_feat data;
1323
1324         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1325                 return -EFAULT;
1326         if (!bitmap_subset((unsigned long *) data.feat,
1327                            kvm_s390_available_cpu_feat,
1328                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1329                 return -EINVAL;
1330
1331         mutex_lock(&kvm->lock);
1332         if (kvm->created_vcpus) {
1333                 mutex_unlock(&kvm->lock);
1334                 return -EBUSY;
1335         }
1336         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1337                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1338         mutex_unlock(&kvm->lock);
1339         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1340                          data.feat[0],
1341                          data.feat[1],
1342                          data.feat[2]);
1343         return 0;
1344 }
1345
1346 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1347                                           struct kvm_device_attr *attr)
1348 {
1349         mutex_lock(&kvm->lock);
1350         if (kvm->created_vcpus) {
1351                 mutex_unlock(&kvm->lock);
1352                 return -EBUSY;
1353         }
1354
1355         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1356                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1357                 mutex_unlock(&kvm->lock);
1358                 return -EFAULT;
1359         }
1360         mutex_unlock(&kvm->lock);
1361
1362         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1367         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1370         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1373         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1376         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1379         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1381                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1382         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1384                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1385         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1387                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1388         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1390                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1391         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1392                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1393                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1394         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1395                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1396                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1397         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1398                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1399                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1400         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1401                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1402                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1403         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1404                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1405                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1406         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1407                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1408                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1409         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1410                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1411                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1412                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1413                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1414         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1415                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1416                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1417                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1418                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1419
1420         return 0;
1421 }
1422
1423 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1424 {
1425         int ret = -ENXIO;
1426
1427         switch (attr->attr) {
1428         case KVM_S390_VM_CPU_PROCESSOR:
1429                 ret = kvm_s390_set_processor(kvm, attr);
1430                 break;
1431         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1432                 ret = kvm_s390_set_processor_feat(kvm, attr);
1433                 break;
1434         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1435                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1436                 break;
1437         }
1438         return ret;
1439 }
1440
1441 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1442 {
1443         struct kvm_s390_vm_cpu_processor *proc;
1444         int ret = 0;
1445
1446         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1447         if (!proc) {
1448                 ret = -ENOMEM;
1449                 goto out;
1450         }
1451         proc->cpuid = kvm->arch.model.cpuid;
1452         proc->ibc = kvm->arch.model.ibc;
1453         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1454                S390_ARCH_FAC_LIST_SIZE_BYTE);
1455         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1456                  kvm->arch.model.ibc,
1457                  kvm->arch.model.cpuid);
1458         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1459                  kvm->arch.model.fac_list[0],
1460                  kvm->arch.model.fac_list[1],
1461                  kvm->arch.model.fac_list[2]);
1462         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1463                 ret = -EFAULT;
1464         kfree(proc);
1465 out:
1466         return ret;
1467 }
1468
1469 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1470 {
1471         struct kvm_s390_vm_cpu_machine *mach;
1472         int ret = 0;
1473
1474         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1475         if (!mach) {
1476                 ret = -ENOMEM;
1477                 goto out;
1478         }
1479         get_cpu_id((struct cpuid *) &mach->cpuid);
1480         mach->ibc = sclp.ibc;
1481         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1482                S390_ARCH_FAC_LIST_SIZE_BYTE);
1483         memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1484                sizeof(stfle_fac_list));
1485         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1486                  kvm->arch.model.ibc,
1487                  kvm->arch.model.cpuid);
1488         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1489                  mach->fac_mask[0],
1490                  mach->fac_mask[1],
1491                  mach->fac_mask[2]);
1492         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1493                  mach->fac_list[0],
1494                  mach->fac_list[1],
1495                  mach->fac_list[2]);
1496         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1497                 ret = -EFAULT;
1498         kfree(mach);
1499 out:
1500         return ret;
1501 }
1502
1503 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1504                                        struct kvm_device_attr *attr)
1505 {
1506         struct kvm_s390_vm_cpu_feat data;
1507
1508         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1509                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1510         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1511                 return -EFAULT;
1512         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1513                          data.feat[0],
1514                          data.feat[1],
1515                          data.feat[2]);
1516         return 0;
1517 }
1518
1519 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1520                                      struct kvm_device_attr *attr)
1521 {
1522         struct kvm_s390_vm_cpu_feat data;
1523
1524         bitmap_copy((unsigned long *) data.feat,
1525                     kvm_s390_available_cpu_feat,
1526                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1527         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1528                 return -EFAULT;
1529         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1530                          data.feat[0],
1531                          data.feat[1],
1532                          data.feat[2]);
1533         return 0;
1534 }
1535
1536 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1537                                           struct kvm_device_attr *attr)
1538 {
1539         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1540             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1541                 return -EFAULT;
1542
1543         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1548         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1551         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1554         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1557         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1560         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1562                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1563         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1565                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1566         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1568                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1569         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1571                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1572         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1573                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1574                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1575         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1576                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1577                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1578         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1579                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1580                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1581         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1582                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1583                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1584         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1585                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1586                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1587         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1588                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1589                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1590         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1591                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1592                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1593                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1594                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1595         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1596                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1597                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1598                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1599                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1600
1601         return 0;
1602 }
1603
1604 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1605                                         struct kvm_device_attr *attr)
1606 {
1607         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1608             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1609                 return -EFAULT;
1610
1611         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1612                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1614                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1615                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1616         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1617                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1618                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1619         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1620                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1621                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1622         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1623                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1625         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1626                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1627                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1628         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1629                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1630                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1631         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1632                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1633                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1634         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1635                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1636                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1637         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1638                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1639                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1640         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1641                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1642                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1643         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1644                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1645                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1646         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1647                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1648                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1649         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1650                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1651                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1652         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1653                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1654                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1655         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1656                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1657                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1658         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1659                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1660                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1661                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1662                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1663         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1664                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1665                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1666                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1667                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1668
1669         return 0;
1670 }
1671
1672 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1673 {
1674         int ret = -ENXIO;
1675
1676         switch (attr->attr) {
1677         case KVM_S390_VM_CPU_PROCESSOR:
1678                 ret = kvm_s390_get_processor(kvm, attr);
1679                 break;
1680         case KVM_S390_VM_CPU_MACHINE:
1681                 ret = kvm_s390_get_machine(kvm, attr);
1682                 break;
1683         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1684                 ret = kvm_s390_get_processor_feat(kvm, attr);
1685                 break;
1686         case KVM_S390_VM_CPU_MACHINE_FEAT:
1687                 ret = kvm_s390_get_machine_feat(kvm, attr);
1688                 break;
1689         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1690                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1691                 break;
1692         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1693                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1694                 break;
1695         }
1696         return ret;
1697 }
1698
1699 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1700 {
1701         int ret;
1702
1703         switch (attr->group) {
1704         case KVM_S390_VM_MEM_CTRL:
1705                 ret = kvm_s390_set_mem_control(kvm, attr);
1706                 break;
1707         case KVM_S390_VM_TOD:
1708                 ret = kvm_s390_set_tod(kvm, attr);
1709                 break;
1710         case KVM_S390_VM_CPU_MODEL:
1711                 ret = kvm_s390_set_cpu_model(kvm, attr);
1712                 break;
1713         case KVM_S390_VM_CRYPTO:
1714                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1715                 break;
1716         case KVM_S390_VM_MIGRATION:
1717                 ret = kvm_s390_vm_set_migration(kvm, attr);
1718                 break;
1719         default:
1720                 ret = -ENXIO;
1721                 break;
1722         }
1723
1724         return ret;
1725 }
1726
1727 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1728 {
1729         int ret;
1730
1731         switch (attr->group) {
1732         case KVM_S390_VM_MEM_CTRL:
1733                 ret = kvm_s390_get_mem_control(kvm, attr);
1734                 break;
1735         case KVM_S390_VM_TOD:
1736                 ret = kvm_s390_get_tod(kvm, attr);
1737                 break;
1738         case KVM_S390_VM_CPU_MODEL:
1739                 ret = kvm_s390_get_cpu_model(kvm, attr);
1740                 break;
1741         case KVM_S390_VM_MIGRATION:
1742                 ret = kvm_s390_vm_get_migration(kvm, attr);
1743                 break;
1744         default:
1745                 ret = -ENXIO;
1746                 break;
1747         }
1748
1749         return ret;
1750 }
1751
1752 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1753 {
1754         int ret;
1755
1756         switch (attr->group) {
1757         case KVM_S390_VM_MEM_CTRL:
1758                 switch (attr->attr) {
1759                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1760                 case KVM_S390_VM_MEM_CLR_CMMA:
1761                         ret = sclp.has_cmma ? 0 : -ENXIO;
1762                         break;
1763                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1764                         ret = 0;
1765                         break;
1766                 default:
1767                         ret = -ENXIO;
1768                         break;
1769                 }
1770                 break;
1771         case KVM_S390_VM_TOD:
1772                 switch (attr->attr) {
1773                 case KVM_S390_VM_TOD_LOW:
1774                 case KVM_S390_VM_TOD_HIGH:
1775                         ret = 0;
1776                         break;
1777                 default:
1778                         ret = -ENXIO;
1779                         break;
1780                 }
1781                 break;
1782         case KVM_S390_VM_CPU_MODEL:
1783                 switch (attr->attr) {
1784                 case KVM_S390_VM_CPU_PROCESSOR:
1785                 case KVM_S390_VM_CPU_MACHINE:
1786                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1787                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1788                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1789                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1790                         ret = 0;
1791                         break;
1792                 default:
1793                         ret = -ENXIO;
1794                         break;
1795                 }
1796                 break;
1797         case KVM_S390_VM_CRYPTO:
1798                 switch (attr->attr) {
1799                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1800                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1801                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1802                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1803                         ret = 0;
1804                         break;
1805                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1806                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1807                         ret = ap_instructions_available() ? 0 : -ENXIO;
1808                         break;
1809                 default:
1810                         ret = -ENXIO;
1811                         break;
1812                 }
1813                 break;
1814         case KVM_S390_VM_MIGRATION:
1815                 ret = 0;
1816                 break;
1817         default:
1818                 ret = -ENXIO;
1819                 break;
1820         }
1821
1822         return ret;
1823 }
1824
1825 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1826 {
1827         uint8_t *keys;
1828         uint64_t hva;
1829         int srcu_idx, i, r = 0;
1830
1831         if (args->flags != 0)
1832                 return -EINVAL;
1833
1834         /* Is this guest using storage keys? */
1835         if (!mm_uses_skeys(current->mm))
1836                 return KVM_S390_GET_SKEYS_NONE;
1837
1838         /* Enforce sane limit on memory allocation */
1839         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1840                 return -EINVAL;
1841
1842         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1843         if (!keys)
1844                 return -ENOMEM;
1845
1846         mmap_read_lock(current->mm);
1847         srcu_idx = srcu_read_lock(&kvm->srcu);
1848         for (i = 0; i < args->count; i++) {
1849                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1850                 if (kvm_is_error_hva(hva)) {
1851                         r = -EFAULT;
1852                         break;
1853                 }
1854
1855                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1856                 if (r)
1857                         break;
1858         }
1859         srcu_read_unlock(&kvm->srcu, srcu_idx);
1860         mmap_read_unlock(current->mm);
1861
1862         if (!r) {
1863                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1864                                  sizeof(uint8_t) * args->count);
1865                 if (r)
1866                         r = -EFAULT;
1867         }
1868
1869         kvfree(keys);
1870         return r;
1871 }
1872
1873 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1874 {
1875         uint8_t *keys;
1876         uint64_t hva;
1877         int srcu_idx, i, r = 0;
1878         bool unlocked;
1879
1880         if (args->flags != 0)
1881                 return -EINVAL;
1882
1883         /* Enforce sane limit on memory allocation */
1884         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1885                 return -EINVAL;
1886
1887         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1888         if (!keys)
1889                 return -ENOMEM;
1890
1891         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1892                            sizeof(uint8_t) * args->count);
1893         if (r) {
1894                 r = -EFAULT;
1895                 goto out;
1896         }
1897
1898         /* Enable storage key handling for the guest */
1899         r = s390_enable_skey();
1900         if (r)
1901                 goto out;
1902
1903         i = 0;
1904         mmap_read_lock(current->mm);
1905         srcu_idx = srcu_read_lock(&kvm->srcu);
1906         while (i < args->count) {
1907                 unlocked = false;
1908                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1909                 if (kvm_is_error_hva(hva)) {
1910                         r = -EFAULT;
1911                         break;
1912                 }
1913
1914                 /* Lowest order bit is reserved */
1915                 if (keys[i] & 0x01) {
1916                         r = -EINVAL;
1917                         break;
1918                 }
1919
1920                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1921                 if (r) {
1922                         r = fixup_user_fault(current->mm, hva,
1923                                              FAULT_FLAG_WRITE, &unlocked);
1924                         if (r)
1925                                 break;
1926                 }
1927                 if (!r)
1928                         i++;
1929         }
1930         srcu_read_unlock(&kvm->srcu, srcu_idx);
1931         mmap_read_unlock(current->mm);
1932 out:
1933         kvfree(keys);
1934         return r;
1935 }
1936
1937 /*
1938  * Base address and length must be sent at the start of each block, therefore
1939  * it's cheaper to send some clean data, as long as it's less than the size of
1940  * two longs.
1941  */
1942 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1943 /* for consistency */
1944 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1945
1946 /*
1947  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1948  * address falls in a hole. In that case the index of one of the memslots
1949  * bordering the hole is returned.
1950  */
1951 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1952 {
1953         int start = 0, end = slots->used_slots;
1954         int slot = atomic_read(&slots->last_used_slot);
1955         struct kvm_memory_slot *memslots = slots->memslots;
1956
1957         if (gfn >= memslots[slot].base_gfn &&
1958             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1959                 return slot;
1960
1961         while (start < end) {
1962                 slot = start + (end - start) / 2;
1963
1964                 if (gfn >= memslots[slot].base_gfn)
1965                         end = slot;
1966                 else
1967                         start = slot + 1;
1968         }
1969
1970         if (start >= slots->used_slots)
1971                 return slots->used_slots - 1;
1972
1973         if (gfn >= memslots[start].base_gfn &&
1974             gfn < memslots[start].base_gfn + memslots[start].npages) {
1975                 atomic_set(&slots->last_used_slot, start);
1976         }
1977
1978         return start;
1979 }
1980
1981 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1982                               u8 *res, unsigned long bufsize)
1983 {
1984         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1985
1986         args->count = 0;
1987         while (args->count < bufsize) {
1988                 hva = gfn_to_hva(kvm, cur_gfn);
1989                 /*
1990                  * We return an error if the first value was invalid, but we
1991                  * return successfully if at least one value was copied.
1992                  */
1993                 if (kvm_is_error_hva(hva))
1994                         return args->count ? 0 : -EFAULT;
1995                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1996                         pgstev = 0;
1997                 res[args->count++] = (pgstev >> 24) & 0x43;
1998                 cur_gfn++;
1999         }
2000
2001         return 0;
2002 }
2003
2004 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2005                                               unsigned long cur_gfn)
2006 {
2007         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2008         struct kvm_memory_slot *ms = slots->memslots + slotidx;
2009         unsigned long ofs = cur_gfn - ms->base_gfn;
2010
2011         if (ms->base_gfn + ms->npages <= cur_gfn) {
2012                 slotidx--;
2013                 /* If we are above the highest slot, wrap around */
2014                 if (slotidx < 0)
2015                         slotidx = slots->used_slots - 1;
2016
2017                 ms = slots->memslots + slotidx;
2018                 ofs = 0;
2019         }
2020         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2021         while ((slotidx > 0) && (ofs >= ms->npages)) {
2022                 slotidx--;
2023                 ms = slots->memslots + slotidx;
2024                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2025         }
2026         return ms->base_gfn + ofs;
2027 }
2028
2029 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2030                              u8 *res, unsigned long bufsize)
2031 {
2032         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2033         struct kvm_memslots *slots = kvm_memslots(kvm);
2034         struct kvm_memory_slot *ms;
2035
2036         if (unlikely(!slots->used_slots))
2037                 return 0;
2038
2039         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2040         ms = gfn_to_memslot(kvm, cur_gfn);
2041         args->count = 0;
2042         args->start_gfn = cur_gfn;
2043         if (!ms)
2044                 return 0;
2045         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2046         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2047
2048         while (args->count < bufsize) {
2049                 hva = gfn_to_hva(kvm, cur_gfn);
2050                 if (kvm_is_error_hva(hva))
2051                         return 0;
2052                 /* Decrement only if we actually flipped the bit to 0 */
2053                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2054                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2055                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2056                         pgstev = 0;
2057                 /* Save the value */
2058                 res[args->count++] = (pgstev >> 24) & 0x43;
2059                 /* If the next bit is too far away, stop. */
2060                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2061                         return 0;
2062                 /* If we reached the previous "next", find the next one */
2063                 if (cur_gfn == next_gfn)
2064                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2065                 /* Reached the end of memory or of the buffer, stop */
2066                 if ((next_gfn >= mem_end) ||
2067                     (next_gfn - args->start_gfn >= bufsize))
2068                         return 0;
2069                 cur_gfn++;
2070                 /* Reached the end of the current memslot, take the next one. */
2071                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2072                         ms = gfn_to_memslot(kvm, cur_gfn);
2073                         if (!ms)
2074                                 return 0;
2075                 }
2076         }
2077         return 0;
2078 }
2079
2080 /*
2081  * This function searches for the next page with dirty CMMA attributes, and
2082  * saves the attributes in the buffer up to either the end of the buffer or
2083  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2084  * no trailing clean bytes are saved.
2085  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2086  * output buffer will indicate 0 as length.
2087  */
2088 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2089                                   struct kvm_s390_cmma_log *args)
2090 {
2091         unsigned long bufsize;
2092         int srcu_idx, peek, ret;
2093         u8 *values;
2094
2095         if (!kvm->arch.use_cmma)
2096                 return -ENXIO;
2097         /* Invalid/unsupported flags were specified */
2098         if (args->flags & ~KVM_S390_CMMA_PEEK)
2099                 return -EINVAL;
2100         /* Migration mode query, and we are not doing a migration */
2101         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2102         if (!peek && !kvm->arch.migration_mode)
2103                 return -EINVAL;
2104         /* CMMA is disabled or was not used, or the buffer has length zero */
2105         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2106         if (!bufsize || !kvm->mm->context.uses_cmm) {
2107                 memset(args, 0, sizeof(*args));
2108                 return 0;
2109         }
2110         /* We are not peeking, and there are no dirty pages */
2111         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2112                 memset(args, 0, sizeof(*args));
2113                 return 0;
2114         }
2115
2116         values = vmalloc(bufsize);
2117         if (!values)
2118                 return -ENOMEM;
2119
2120         mmap_read_lock(kvm->mm);
2121         srcu_idx = srcu_read_lock(&kvm->srcu);
2122         if (peek)
2123                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2124         else
2125                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2126         srcu_read_unlock(&kvm->srcu, srcu_idx);
2127         mmap_read_unlock(kvm->mm);
2128
2129         if (kvm->arch.migration_mode)
2130                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2131         else
2132                 args->remaining = 0;
2133
2134         if (copy_to_user((void __user *)args->values, values, args->count))
2135                 ret = -EFAULT;
2136
2137         vfree(values);
2138         return ret;
2139 }
2140
2141 /*
2142  * This function sets the CMMA attributes for the given pages. If the input
2143  * buffer has zero length, no action is taken, otherwise the attributes are
2144  * set and the mm->context.uses_cmm flag is set.
2145  */
2146 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2147                                   const struct kvm_s390_cmma_log *args)
2148 {
2149         unsigned long hva, mask, pgstev, i;
2150         uint8_t *bits;
2151         int srcu_idx, r = 0;
2152
2153         mask = args->mask;
2154
2155         if (!kvm->arch.use_cmma)
2156                 return -ENXIO;
2157         /* invalid/unsupported flags */
2158         if (args->flags != 0)
2159                 return -EINVAL;
2160         /* Enforce sane limit on memory allocation */
2161         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2162                 return -EINVAL;
2163         /* Nothing to do */
2164         if (args->count == 0)
2165                 return 0;
2166
2167         bits = vmalloc(array_size(sizeof(*bits), args->count));
2168         if (!bits)
2169                 return -ENOMEM;
2170
2171         r = copy_from_user(bits, (void __user *)args->values, args->count);
2172         if (r) {
2173                 r = -EFAULT;
2174                 goto out;
2175         }
2176
2177         mmap_read_lock(kvm->mm);
2178         srcu_idx = srcu_read_lock(&kvm->srcu);
2179         for (i = 0; i < args->count; i++) {
2180                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2181                 if (kvm_is_error_hva(hva)) {
2182                         r = -EFAULT;
2183                         break;
2184                 }
2185
2186                 pgstev = bits[i];
2187                 pgstev = pgstev << 24;
2188                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2189                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2190         }
2191         srcu_read_unlock(&kvm->srcu, srcu_idx);
2192         mmap_read_unlock(kvm->mm);
2193
2194         if (!kvm->mm->context.uses_cmm) {
2195                 mmap_write_lock(kvm->mm);
2196                 kvm->mm->context.uses_cmm = 1;
2197                 mmap_write_unlock(kvm->mm);
2198         }
2199 out:
2200         vfree(bits);
2201         return r;
2202 }
2203
2204 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2205 {
2206         struct kvm_vcpu *vcpu;
2207         u16 rc, rrc;
2208         int ret = 0;
2209         int i;
2210
2211         /*
2212          * We ignore failures and try to destroy as many CPUs as possible.
2213          * At the same time we must not free the assigned resources when
2214          * this fails, as the ultravisor has still access to that memory.
2215          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2216          * behind.
2217          * We want to return the first failure rc and rrc, though.
2218          */
2219         kvm_for_each_vcpu(i, vcpu, kvm) {
2220                 mutex_lock(&vcpu->mutex);
2221                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2222                         *rcp = rc;
2223                         *rrcp = rrc;
2224                         ret = -EIO;
2225                 }
2226                 mutex_unlock(&vcpu->mutex);
2227         }
2228         return ret;
2229 }
2230
2231 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2232 {
2233         int i, r = 0;
2234         u16 dummy;
2235
2236         struct kvm_vcpu *vcpu;
2237
2238         kvm_for_each_vcpu(i, vcpu, kvm) {
2239                 mutex_lock(&vcpu->mutex);
2240                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2241                 mutex_unlock(&vcpu->mutex);
2242                 if (r)
2243                         break;
2244         }
2245         if (r)
2246                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2247         return r;
2248 }
2249
2250 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2251 {
2252         int r = 0;
2253         u16 dummy;
2254         void __user *argp = (void __user *)cmd->data;
2255
2256         switch (cmd->cmd) {
2257         case KVM_PV_ENABLE: {
2258                 r = -EINVAL;
2259                 if (kvm_s390_pv_is_protected(kvm))
2260                         break;
2261
2262                 /*
2263                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2264                  *  esca, we need no cleanup in the error cases below
2265                  */
2266                 r = sca_switch_to_extended(kvm);
2267                 if (r)
2268                         break;
2269
2270                 mmap_write_lock(current->mm);
2271                 r = gmap_mark_unmergeable();
2272                 mmap_write_unlock(current->mm);
2273                 if (r)
2274                         break;
2275
2276                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2277                 if (r)
2278                         break;
2279
2280                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2281                 if (r)
2282                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2283
2284                 /* we need to block service interrupts from now on */
2285                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2286                 break;
2287         }
2288         case KVM_PV_DISABLE: {
2289                 r = -EINVAL;
2290                 if (!kvm_s390_pv_is_protected(kvm))
2291                         break;
2292
2293                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2294                 /*
2295                  * If a CPU could not be destroyed, destroy VM will also fail.
2296                  * There is no point in trying to destroy it. Instead return
2297                  * the rc and rrc from the first CPU that failed destroying.
2298                  */
2299                 if (r)
2300                         break;
2301                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2302
2303                 /* no need to block service interrupts any more */
2304                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2305                 break;
2306         }
2307         case KVM_PV_SET_SEC_PARMS: {
2308                 struct kvm_s390_pv_sec_parm parms = {};
2309                 void *hdr;
2310
2311                 r = -EINVAL;
2312                 if (!kvm_s390_pv_is_protected(kvm))
2313                         break;
2314
2315                 r = -EFAULT;
2316                 if (copy_from_user(&parms, argp, sizeof(parms)))
2317                         break;
2318
2319                 /* Currently restricted to 8KB */
2320                 r = -EINVAL;
2321                 if (parms.length > PAGE_SIZE * 2)
2322                         break;
2323
2324                 r = -ENOMEM;
2325                 hdr = vmalloc(parms.length);
2326                 if (!hdr)
2327                         break;
2328
2329                 r = -EFAULT;
2330                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2331                                     parms.length))
2332                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2333                                                       &cmd->rc, &cmd->rrc);
2334
2335                 vfree(hdr);
2336                 break;
2337         }
2338         case KVM_PV_UNPACK: {
2339                 struct kvm_s390_pv_unp unp = {};
2340
2341                 r = -EINVAL;
2342                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2343                         break;
2344
2345                 r = -EFAULT;
2346                 if (copy_from_user(&unp, argp, sizeof(unp)))
2347                         break;
2348
2349                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2350                                        &cmd->rc, &cmd->rrc);
2351                 break;
2352         }
2353         case KVM_PV_VERIFY: {
2354                 r = -EINVAL;
2355                 if (!kvm_s390_pv_is_protected(kvm))
2356                         break;
2357
2358                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2359                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2360                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2361                              cmd->rrc);
2362                 break;
2363         }
2364         case KVM_PV_PREP_RESET: {
2365                 r = -EINVAL;
2366                 if (!kvm_s390_pv_is_protected(kvm))
2367                         break;
2368
2369                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2370                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2371                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2372                              cmd->rc, cmd->rrc);
2373                 break;
2374         }
2375         case KVM_PV_UNSHARE_ALL: {
2376                 r = -EINVAL;
2377                 if (!kvm_s390_pv_is_protected(kvm))
2378                         break;
2379
2380                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2381                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2382                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2383                              cmd->rc, cmd->rrc);
2384                 break;
2385         }
2386         default:
2387                 r = -ENOTTY;
2388         }
2389         return r;
2390 }
2391
2392 long kvm_arch_vm_ioctl(struct file *filp,
2393                        unsigned int ioctl, unsigned long arg)
2394 {
2395         struct kvm *kvm = filp->private_data;
2396         void __user *argp = (void __user *)arg;
2397         struct kvm_device_attr attr;
2398         int r;
2399
2400         switch (ioctl) {
2401         case KVM_S390_INTERRUPT: {
2402                 struct kvm_s390_interrupt s390int;
2403
2404                 r = -EFAULT;
2405                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2406                         break;
2407                 r = kvm_s390_inject_vm(kvm, &s390int);
2408                 break;
2409         }
2410         case KVM_CREATE_IRQCHIP: {
2411                 struct kvm_irq_routing_entry routing;
2412
2413                 r = -EINVAL;
2414                 if (kvm->arch.use_irqchip) {
2415                         /* Set up dummy routing. */
2416                         memset(&routing, 0, sizeof(routing));
2417                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2418                 }
2419                 break;
2420         }
2421         case KVM_SET_DEVICE_ATTR: {
2422                 r = -EFAULT;
2423                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2424                         break;
2425                 r = kvm_s390_vm_set_attr(kvm, &attr);
2426                 break;
2427         }
2428         case KVM_GET_DEVICE_ATTR: {
2429                 r = -EFAULT;
2430                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2431                         break;
2432                 r = kvm_s390_vm_get_attr(kvm, &attr);
2433                 break;
2434         }
2435         case KVM_HAS_DEVICE_ATTR: {
2436                 r = -EFAULT;
2437                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2438                         break;
2439                 r = kvm_s390_vm_has_attr(kvm, &attr);
2440                 break;
2441         }
2442         case KVM_S390_GET_SKEYS: {
2443                 struct kvm_s390_skeys args;
2444
2445                 r = -EFAULT;
2446                 if (copy_from_user(&args, argp,
2447                                    sizeof(struct kvm_s390_skeys)))
2448                         break;
2449                 r = kvm_s390_get_skeys(kvm, &args);
2450                 break;
2451         }
2452         case KVM_S390_SET_SKEYS: {
2453                 struct kvm_s390_skeys args;
2454
2455                 r = -EFAULT;
2456                 if (copy_from_user(&args, argp,
2457                                    sizeof(struct kvm_s390_skeys)))
2458                         break;
2459                 r = kvm_s390_set_skeys(kvm, &args);
2460                 break;
2461         }
2462         case KVM_S390_GET_CMMA_BITS: {
2463                 struct kvm_s390_cmma_log args;
2464
2465                 r = -EFAULT;
2466                 if (copy_from_user(&args, argp, sizeof(args)))
2467                         break;
2468                 mutex_lock(&kvm->slots_lock);
2469                 r = kvm_s390_get_cmma_bits(kvm, &args);
2470                 mutex_unlock(&kvm->slots_lock);
2471                 if (!r) {
2472                         r = copy_to_user(argp, &args, sizeof(args));
2473                         if (r)
2474                                 r = -EFAULT;
2475                 }
2476                 break;
2477         }
2478         case KVM_S390_SET_CMMA_BITS: {
2479                 struct kvm_s390_cmma_log args;
2480
2481                 r = -EFAULT;
2482                 if (copy_from_user(&args, argp, sizeof(args)))
2483                         break;
2484                 mutex_lock(&kvm->slots_lock);
2485                 r = kvm_s390_set_cmma_bits(kvm, &args);
2486                 mutex_unlock(&kvm->slots_lock);
2487                 break;
2488         }
2489         case KVM_S390_PV_COMMAND: {
2490                 struct kvm_pv_cmd args;
2491
2492                 /* protvirt means user cpu state */
2493                 kvm_s390_set_user_cpu_state_ctrl(kvm);
2494                 r = 0;
2495                 if (!is_prot_virt_host()) {
2496                         r = -EINVAL;
2497                         break;
2498                 }
2499                 if (copy_from_user(&args, argp, sizeof(args))) {
2500                         r = -EFAULT;
2501                         break;
2502                 }
2503                 if (args.flags) {
2504                         r = -EINVAL;
2505                         break;
2506                 }
2507                 mutex_lock(&kvm->lock);
2508                 r = kvm_s390_handle_pv(kvm, &args);
2509                 mutex_unlock(&kvm->lock);
2510                 if (copy_to_user(argp, &args, sizeof(args))) {
2511                         r = -EFAULT;
2512                         break;
2513                 }
2514                 break;
2515         }
2516         default:
2517                 r = -ENOTTY;
2518         }
2519
2520         return r;
2521 }
2522
2523 static int kvm_s390_apxa_installed(void)
2524 {
2525         struct ap_config_info info;
2526
2527         if (ap_instructions_available()) {
2528                 if (ap_qci(&info) == 0)
2529                         return info.apxa;
2530         }
2531
2532         return 0;
2533 }
2534
2535 /*
2536  * The format of the crypto control block (CRYCB) is specified in the 3 low
2537  * order bits of the CRYCB designation (CRYCBD) field as follows:
2538  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2539  *           AP extended addressing (APXA) facility are installed.
2540  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2541  * Format 2: Both the APXA and MSAX3 facilities are installed
2542  */
2543 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2544 {
2545         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2546
2547         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2548         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2549
2550         /* Check whether MSAX3 is installed */
2551         if (!test_kvm_facility(kvm, 76))
2552                 return;
2553
2554         if (kvm_s390_apxa_installed())
2555                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2556         else
2557                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2558 }
2559
2560 /*
2561  * kvm_arch_crypto_set_masks
2562  *
2563  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2564  *       to be set.
2565  * @apm: the mask identifying the accessible AP adapters
2566  * @aqm: the mask identifying the accessible AP domains
2567  * @adm: the mask identifying the accessible AP control domains
2568  *
2569  * Set the masks that identify the adapters, domains and control domains to
2570  * which the KVM guest is granted access.
2571  *
2572  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2573  *       function.
2574  */
2575 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2576                                unsigned long *aqm, unsigned long *adm)
2577 {
2578         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2579
2580         kvm_s390_vcpu_block_all(kvm);
2581
2582         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2583         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2584                 memcpy(crycb->apcb1.apm, apm, 32);
2585                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2586                          apm[0], apm[1], apm[2], apm[3]);
2587                 memcpy(crycb->apcb1.aqm, aqm, 32);
2588                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2589                          aqm[0], aqm[1], aqm[2], aqm[3]);
2590                 memcpy(crycb->apcb1.adm, adm, 32);
2591                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2592                          adm[0], adm[1], adm[2], adm[3]);
2593                 break;
2594         case CRYCB_FORMAT1:
2595         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2596                 memcpy(crycb->apcb0.apm, apm, 8);
2597                 memcpy(crycb->apcb0.aqm, aqm, 2);
2598                 memcpy(crycb->apcb0.adm, adm, 2);
2599                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2600                          apm[0], *((unsigned short *)aqm),
2601                          *((unsigned short *)adm));
2602                 break;
2603         default:        /* Can not happen */
2604                 break;
2605         }
2606
2607         /* recreate the shadow crycb for each vcpu */
2608         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2609         kvm_s390_vcpu_unblock_all(kvm);
2610 }
2611 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2612
2613 /*
2614  * kvm_arch_crypto_clear_masks
2615  *
2616  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2617  *       to be cleared.
2618  *
2619  * Clear the masks that identify the adapters, domains and control domains to
2620  * which the KVM guest is granted access.
2621  *
2622  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2623  *       function.
2624  */
2625 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2626 {
2627         kvm_s390_vcpu_block_all(kvm);
2628
2629         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2630                sizeof(kvm->arch.crypto.crycb->apcb0));
2631         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2632                sizeof(kvm->arch.crypto.crycb->apcb1));
2633
2634         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2635         /* recreate the shadow crycb for each vcpu */
2636         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2637         kvm_s390_vcpu_unblock_all(kvm);
2638 }
2639 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2640
2641 static u64 kvm_s390_get_initial_cpuid(void)
2642 {
2643         struct cpuid cpuid;
2644
2645         get_cpu_id(&cpuid);
2646         cpuid.version = 0xff;
2647         return *((u64 *) &cpuid);
2648 }
2649
2650 static void kvm_s390_crypto_init(struct kvm *kvm)
2651 {
2652         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2653         kvm_s390_set_crycb_format(kvm);
2654         init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2655
2656         if (!test_kvm_facility(kvm, 76))
2657                 return;
2658
2659         /* Enable AES/DEA protected key functions by default */
2660         kvm->arch.crypto.aes_kw = 1;
2661         kvm->arch.crypto.dea_kw = 1;
2662         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2663                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2664         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2665                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2666 }
2667
2668 static void sca_dispose(struct kvm *kvm)
2669 {
2670         if (kvm->arch.use_esca)
2671                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2672         else
2673                 free_page((unsigned long)(kvm->arch.sca));
2674         kvm->arch.sca = NULL;
2675 }
2676
2677 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2678 {
2679         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2680         int i, rc;
2681         char debug_name[16];
2682         static unsigned long sca_offset;
2683
2684         rc = -EINVAL;
2685 #ifdef CONFIG_KVM_S390_UCONTROL
2686         if (type & ~KVM_VM_S390_UCONTROL)
2687                 goto out_err;
2688         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2689                 goto out_err;
2690 #else
2691         if (type)
2692                 goto out_err;
2693 #endif
2694
2695         rc = s390_enable_sie();
2696         if (rc)
2697                 goto out_err;
2698
2699         rc = -ENOMEM;
2700
2701         if (!sclp.has_64bscao)
2702                 alloc_flags |= GFP_DMA;
2703         rwlock_init(&kvm->arch.sca_lock);
2704         /* start with basic SCA */
2705         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2706         if (!kvm->arch.sca)
2707                 goto out_err;
2708         mutex_lock(&kvm_lock);
2709         sca_offset += 16;
2710         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2711                 sca_offset = 0;
2712         kvm->arch.sca = (struct bsca_block *)
2713                         ((char *) kvm->arch.sca + sca_offset);
2714         mutex_unlock(&kvm_lock);
2715
2716         sprintf(debug_name, "kvm-%u", current->pid);
2717
2718         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2719         if (!kvm->arch.dbf)
2720                 goto out_err;
2721
2722         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2723         kvm->arch.sie_page2 =
2724              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2725         if (!kvm->arch.sie_page2)
2726                 goto out_err;
2727
2728         kvm->arch.sie_page2->kvm = kvm;
2729         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2730
2731         for (i = 0; i < kvm_s390_fac_size(); i++) {
2732                 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2733                                               (kvm_s390_fac_base[i] |
2734                                                kvm_s390_fac_ext[i]);
2735                 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2736                                               kvm_s390_fac_base[i];
2737         }
2738         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2739
2740         /* we are always in czam mode - even on pre z14 machines */
2741         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2742         set_kvm_facility(kvm->arch.model.fac_list, 138);
2743         /* we emulate STHYI in kvm */
2744         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2745         set_kvm_facility(kvm->arch.model.fac_list, 74);
2746         if (MACHINE_HAS_TLB_GUEST) {
2747                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2748                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2749         }
2750
2751         if (css_general_characteristics.aiv && test_facility(65))
2752                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2753
2754         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2755         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2756
2757         kvm_s390_crypto_init(kvm);
2758
2759         mutex_init(&kvm->arch.float_int.ais_lock);
2760         spin_lock_init(&kvm->arch.float_int.lock);
2761         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2762                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2763         init_waitqueue_head(&kvm->arch.ipte_wq);
2764         mutex_init(&kvm->arch.ipte_mutex);
2765
2766         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2767         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2768
2769         if (type & KVM_VM_S390_UCONTROL) {
2770                 kvm->arch.gmap = NULL;
2771                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2772         } else {
2773                 if (sclp.hamax == U64_MAX)
2774                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2775                 else
2776                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2777                                                     sclp.hamax + 1);
2778                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2779                 if (!kvm->arch.gmap)
2780                         goto out_err;
2781                 kvm->arch.gmap->private = kvm;
2782                 kvm->arch.gmap->pfault_enabled = 0;
2783         }
2784
2785         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2786         kvm->arch.use_skf = sclp.has_skey;
2787         spin_lock_init(&kvm->arch.start_stop_lock);
2788         kvm_s390_vsie_init(kvm);
2789         if (use_gisa)
2790                 kvm_s390_gisa_init(kvm);
2791         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2792
2793         return 0;
2794 out_err:
2795         free_page((unsigned long)kvm->arch.sie_page2);
2796         debug_unregister(kvm->arch.dbf);
2797         sca_dispose(kvm);
2798         KVM_EVENT(3, "creation of vm failed: %d", rc);
2799         return rc;
2800 }
2801
2802 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2803 {
2804         u16 rc, rrc;
2805
2806         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2807         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2808         kvm_s390_clear_local_irqs(vcpu);
2809         kvm_clear_async_pf_completion_queue(vcpu);
2810         if (!kvm_is_ucontrol(vcpu->kvm))
2811                 sca_del_vcpu(vcpu);
2812
2813         if (kvm_is_ucontrol(vcpu->kvm))
2814                 gmap_remove(vcpu->arch.gmap);
2815
2816         if (vcpu->kvm->arch.use_cmma)
2817                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2818         /* We can not hold the vcpu mutex here, we are already dying */
2819         if (kvm_s390_pv_cpu_get_handle(vcpu))
2820                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2821         free_page((unsigned long)(vcpu->arch.sie_block));
2822 }
2823
2824 static void kvm_free_vcpus(struct kvm *kvm)
2825 {
2826         unsigned int i;
2827         struct kvm_vcpu *vcpu;
2828
2829         kvm_for_each_vcpu(i, vcpu, kvm)
2830                 kvm_vcpu_destroy(vcpu);
2831
2832         mutex_lock(&kvm->lock);
2833         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2834                 kvm->vcpus[i] = NULL;
2835
2836         atomic_set(&kvm->online_vcpus, 0);
2837         mutex_unlock(&kvm->lock);
2838 }
2839
2840 void kvm_arch_destroy_vm(struct kvm *kvm)
2841 {
2842         u16 rc, rrc;
2843
2844         kvm_free_vcpus(kvm);
2845         sca_dispose(kvm);
2846         kvm_s390_gisa_destroy(kvm);
2847         /*
2848          * We are already at the end of life and kvm->lock is not taken.
2849          * This is ok as the file descriptor is closed by now and nobody
2850          * can mess with the pv state. To avoid lockdep_assert_held from
2851          * complaining we do not use kvm_s390_pv_is_protected.
2852          */
2853         if (kvm_s390_pv_get_handle(kvm))
2854                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2855         debug_unregister(kvm->arch.dbf);
2856         free_page((unsigned long)kvm->arch.sie_page2);
2857         if (!kvm_is_ucontrol(kvm))
2858                 gmap_remove(kvm->arch.gmap);
2859         kvm_s390_destroy_adapters(kvm);
2860         kvm_s390_clear_float_irqs(kvm);
2861         kvm_s390_vsie_destroy(kvm);
2862         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2863 }
2864
2865 /* Section: vcpu related */
2866 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2867 {
2868         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2869         if (!vcpu->arch.gmap)
2870                 return -ENOMEM;
2871         vcpu->arch.gmap->private = vcpu->kvm;
2872
2873         return 0;
2874 }
2875
2876 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2877 {
2878         if (!kvm_s390_use_sca_entries())
2879                 return;
2880         read_lock(&vcpu->kvm->arch.sca_lock);
2881         if (vcpu->kvm->arch.use_esca) {
2882                 struct esca_block *sca = vcpu->kvm->arch.sca;
2883
2884                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2885                 sca->cpu[vcpu->vcpu_id].sda = 0;
2886         } else {
2887                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2888
2889                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2890                 sca->cpu[vcpu->vcpu_id].sda = 0;
2891         }
2892         read_unlock(&vcpu->kvm->arch.sca_lock);
2893 }
2894
2895 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2896 {
2897         if (!kvm_s390_use_sca_entries()) {
2898                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2899
2900                 /* we still need the basic sca for the ipte control */
2901                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2902                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2903                 return;
2904         }
2905         read_lock(&vcpu->kvm->arch.sca_lock);
2906         if (vcpu->kvm->arch.use_esca) {
2907                 struct esca_block *sca = vcpu->kvm->arch.sca;
2908
2909                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2910                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2911                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2912                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2913                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2914         } else {
2915                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2916
2917                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2918                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2919                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2920                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2921         }
2922         read_unlock(&vcpu->kvm->arch.sca_lock);
2923 }
2924
2925 /* Basic SCA to Extended SCA data copy routines */
2926 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2927 {
2928         d->sda = s->sda;
2929         d->sigp_ctrl.c = s->sigp_ctrl.c;
2930         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2931 }
2932
2933 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2934 {
2935         int i;
2936
2937         d->ipte_control = s->ipte_control;
2938         d->mcn[0] = s->mcn;
2939         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2940                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2941 }
2942
2943 static int sca_switch_to_extended(struct kvm *kvm)
2944 {
2945         struct bsca_block *old_sca = kvm->arch.sca;
2946         struct esca_block *new_sca;
2947         struct kvm_vcpu *vcpu;
2948         unsigned int vcpu_idx;
2949         u32 scaol, scaoh;
2950
2951         if (kvm->arch.use_esca)
2952                 return 0;
2953
2954         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2955         if (!new_sca)
2956                 return -ENOMEM;
2957
2958         scaoh = (u32)((u64)(new_sca) >> 32);
2959         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2960
2961         kvm_s390_vcpu_block_all(kvm);
2962         write_lock(&kvm->arch.sca_lock);
2963
2964         sca_copy_b_to_e(new_sca, old_sca);
2965
2966         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2967                 vcpu->arch.sie_block->scaoh = scaoh;
2968                 vcpu->arch.sie_block->scaol = scaol;
2969                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2970         }
2971         kvm->arch.sca = new_sca;
2972         kvm->arch.use_esca = 1;
2973
2974         write_unlock(&kvm->arch.sca_lock);
2975         kvm_s390_vcpu_unblock_all(kvm);
2976
2977         free_page((unsigned long)old_sca);
2978
2979         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2980                  old_sca, kvm->arch.sca);
2981         return 0;
2982 }
2983
2984 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2985 {
2986         int rc;
2987
2988         if (!kvm_s390_use_sca_entries()) {
2989                 if (id < KVM_MAX_VCPUS)
2990                         return true;
2991                 return false;
2992         }
2993         if (id < KVM_S390_BSCA_CPU_SLOTS)
2994                 return true;
2995         if (!sclp.has_esca || !sclp.has_64bscao)
2996                 return false;
2997
2998         mutex_lock(&kvm->lock);
2999         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3000         mutex_unlock(&kvm->lock);
3001
3002         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3003 }
3004
3005 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3006 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3007 {
3008         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3009         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3010         vcpu->arch.cputm_start = get_tod_clock_fast();
3011         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3012 }
3013
3014 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3015 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3016 {
3017         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3018         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3019         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3020         vcpu->arch.cputm_start = 0;
3021         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3022 }
3023
3024 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3025 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3026 {
3027         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3028         vcpu->arch.cputm_enabled = true;
3029         __start_cpu_timer_accounting(vcpu);
3030 }
3031
3032 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3033 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3034 {
3035         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3036         __stop_cpu_timer_accounting(vcpu);
3037         vcpu->arch.cputm_enabled = false;
3038 }
3039
3040 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3041 {
3042         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3043         __enable_cpu_timer_accounting(vcpu);
3044         preempt_enable();
3045 }
3046
3047 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3048 {
3049         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3050         __disable_cpu_timer_accounting(vcpu);
3051         preempt_enable();
3052 }
3053
3054 /* set the cpu timer - may only be called from the VCPU thread itself */
3055 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3056 {
3057         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3058         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3059         if (vcpu->arch.cputm_enabled)
3060                 vcpu->arch.cputm_start = get_tod_clock_fast();
3061         vcpu->arch.sie_block->cputm = cputm;
3062         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3063         preempt_enable();
3064 }
3065
3066 /* update and get the cpu timer - can also be called from other VCPU threads */
3067 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3068 {
3069         unsigned int seq;
3070         __u64 value;
3071
3072         if (unlikely(!vcpu->arch.cputm_enabled))
3073                 return vcpu->arch.sie_block->cputm;
3074
3075         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3076         do {
3077                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3078                 /*
3079                  * If the writer would ever execute a read in the critical
3080                  * section, e.g. in irq context, we have a deadlock.
3081                  */
3082                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3083                 value = vcpu->arch.sie_block->cputm;
3084                 /* if cputm_start is 0, accounting is being started/stopped */
3085                 if (likely(vcpu->arch.cputm_start))
3086                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3087         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3088         preempt_enable();
3089         return value;
3090 }
3091
3092 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3093 {
3094
3095         gmap_enable(vcpu->arch.enabled_gmap);
3096         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3097         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3098                 __start_cpu_timer_accounting(vcpu);
3099         vcpu->cpu = cpu;
3100 }
3101
3102 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3103 {
3104         vcpu->cpu = -1;
3105         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3106                 __stop_cpu_timer_accounting(vcpu);
3107         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3108         vcpu->arch.enabled_gmap = gmap_get_enabled();
3109         gmap_disable(vcpu->arch.enabled_gmap);
3110
3111 }
3112
3113 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3114 {
3115         mutex_lock(&vcpu->kvm->lock);
3116         preempt_disable();
3117         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3118         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3119         preempt_enable();
3120         mutex_unlock(&vcpu->kvm->lock);
3121         if (!kvm_is_ucontrol(vcpu->kvm)) {
3122                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3123                 sca_add_vcpu(vcpu);
3124         }
3125         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3126                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3127         /* make vcpu_load load the right gmap on the first trigger */
3128         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3129 }
3130
3131 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3132 {
3133         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3134             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3135                 return true;
3136         return false;
3137 }
3138
3139 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3140 {
3141         /* At least one ECC subfunction must be present */
3142         return kvm_has_pckmo_subfunc(kvm, 32) ||
3143                kvm_has_pckmo_subfunc(kvm, 33) ||
3144                kvm_has_pckmo_subfunc(kvm, 34) ||
3145                kvm_has_pckmo_subfunc(kvm, 40) ||
3146                kvm_has_pckmo_subfunc(kvm, 41);
3147
3148 }
3149
3150 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3151 {
3152         /*
3153          * If the AP instructions are not being interpreted and the MSAX3
3154          * facility is not configured for the guest, there is nothing to set up.
3155          */
3156         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3157                 return;
3158
3159         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3160         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3161         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3162         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3163
3164         if (vcpu->kvm->arch.crypto.apie)
3165                 vcpu->arch.sie_block->eca |= ECA_APIE;
3166
3167         /* Set up protected key support */
3168         if (vcpu->kvm->arch.crypto.aes_kw) {
3169                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3170                 /* ecc is also wrapped with AES key */
3171                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3172                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3173         }
3174
3175         if (vcpu->kvm->arch.crypto.dea_kw)
3176                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3177 }
3178
3179 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3180 {
3181         free_page(vcpu->arch.sie_block->cbrlo);
3182         vcpu->arch.sie_block->cbrlo = 0;
3183 }
3184
3185 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3186 {
3187         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3188         if (!vcpu->arch.sie_block->cbrlo)
3189                 return -ENOMEM;
3190         return 0;
3191 }
3192
3193 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3194 {
3195         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3196
3197         vcpu->arch.sie_block->ibc = model->ibc;
3198         if (test_kvm_facility(vcpu->kvm, 7))
3199                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3200 }
3201
3202 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3203 {
3204         int rc = 0;
3205         u16 uvrc, uvrrc;
3206
3207         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3208                                                     CPUSTAT_SM |
3209                                                     CPUSTAT_STOPPED);
3210
3211         if (test_kvm_facility(vcpu->kvm, 78))
3212                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3213         else if (test_kvm_facility(vcpu->kvm, 8))
3214                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3215
3216         kvm_s390_vcpu_setup_model(vcpu);
3217
3218         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3219         if (MACHINE_HAS_ESOP)
3220                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3221         if (test_kvm_facility(vcpu->kvm, 9))
3222                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3223         if (test_kvm_facility(vcpu->kvm, 73))
3224                 vcpu->arch.sie_block->ecb |= ECB_TE;
3225         if (!kvm_is_ucontrol(vcpu->kvm))
3226                 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3227
3228         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3229                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3230         if (test_kvm_facility(vcpu->kvm, 130))
3231                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3232         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3233         if (sclp.has_cei)
3234                 vcpu->arch.sie_block->eca |= ECA_CEI;
3235         if (sclp.has_ib)
3236                 vcpu->arch.sie_block->eca |= ECA_IB;
3237         if (sclp.has_siif)
3238                 vcpu->arch.sie_block->eca |= ECA_SII;
3239         if (sclp.has_sigpif)
3240                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3241         if (test_kvm_facility(vcpu->kvm, 129)) {
3242                 vcpu->arch.sie_block->eca |= ECA_VX;
3243                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3244         }
3245         if (test_kvm_facility(vcpu->kvm, 139))
3246                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3247         if (test_kvm_facility(vcpu->kvm, 156))
3248                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3249         if (vcpu->arch.sie_block->gd) {
3250                 vcpu->arch.sie_block->eca |= ECA_AIV;
3251                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3252                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3253         }
3254         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3255                                         | SDNXC;
3256         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3257
3258         if (sclp.has_kss)
3259                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3260         else
3261                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3262
3263         if (vcpu->kvm->arch.use_cmma) {
3264                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3265                 if (rc)
3266                         return rc;
3267         }
3268         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3269         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3270
3271         vcpu->arch.sie_block->hpid = HPID_KVM;
3272
3273         kvm_s390_vcpu_crypto_setup(vcpu);
3274
3275         mutex_lock(&vcpu->kvm->lock);
3276         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3277                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3278                 if (rc)
3279                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3280         }
3281         mutex_unlock(&vcpu->kvm->lock);
3282
3283         return rc;
3284 }
3285
3286 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3287 {
3288         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3289                 return -EINVAL;
3290         return 0;
3291 }
3292
3293 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3294 {
3295         struct sie_page *sie_page;
3296         int rc;
3297
3298         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3299         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3300         if (!sie_page)
3301                 return -ENOMEM;
3302
3303         vcpu->arch.sie_block = &sie_page->sie_block;
3304         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3305
3306         /* the real guest size will always be smaller than msl */
3307         vcpu->arch.sie_block->mso = 0;
3308         vcpu->arch.sie_block->msl = sclp.hamax;
3309
3310         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3311         spin_lock_init(&vcpu->arch.local_int.lock);
3312         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3313         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3314                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3315         seqcount_init(&vcpu->arch.cputm_seqcount);
3316
3317         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3318         kvm_clear_async_pf_completion_queue(vcpu);
3319         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3320                                     KVM_SYNC_GPRS |
3321                                     KVM_SYNC_ACRS |
3322                                     KVM_SYNC_CRS |
3323                                     KVM_SYNC_ARCH0 |
3324                                     KVM_SYNC_PFAULT |
3325                                     KVM_SYNC_DIAG318;
3326         kvm_s390_set_prefix(vcpu, 0);
3327         if (test_kvm_facility(vcpu->kvm, 64))
3328                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3329         if (test_kvm_facility(vcpu->kvm, 82))
3330                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3331         if (test_kvm_facility(vcpu->kvm, 133))
3332                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3333         if (test_kvm_facility(vcpu->kvm, 156))
3334                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3335         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3336          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3337          */
3338         if (MACHINE_HAS_VX)
3339                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3340         else
3341                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3342
3343         if (kvm_is_ucontrol(vcpu->kvm)) {
3344                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3345                 if (rc)
3346                         goto out_free_sie_block;
3347         }
3348
3349         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3350                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3351         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3352
3353         rc = kvm_s390_vcpu_setup(vcpu);
3354         if (rc)
3355                 goto out_ucontrol_uninit;
3356         return 0;
3357
3358 out_ucontrol_uninit:
3359         if (kvm_is_ucontrol(vcpu->kvm))
3360                 gmap_remove(vcpu->arch.gmap);
3361 out_free_sie_block:
3362         free_page((unsigned long)(vcpu->arch.sie_block));
3363         return rc;
3364 }
3365
3366 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3367 {
3368         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3369         return kvm_s390_vcpu_has_irq(vcpu, 0);
3370 }
3371
3372 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3373 {
3374         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3375 }
3376
3377 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3378 {
3379         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3380         exit_sie(vcpu);
3381 }
3382
3383 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3384 {
3385         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3386 }
3387
3388 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3389 {
3390         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3391         exit_sie(vcpu);
3392 }
3393
3394 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3395 {
3396         return atomic_read(&vcpu->arch.sie_block->prog20) &
3397                (PROG_BLOCK_SIE | PROG_REQUEST);
3398 }
3399
3400 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3401 {
3402         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3403 }
3404
3405 /*
3406  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3407  * If the CPU is not running (e.g. waiting as idle) the function will
3408  * return immediately. */
3409 void exit_sie(struct kvm_vcpu *vcpu)
3410 {
3411         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3412         kvm_s390_vsie_kick(vcpu);
3413         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3414                 cpu_relax();
3415 }
3416
3417 /* Kick a guest cpu out of SIE to process a request synchronously */
3418 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3419 {
3420         kvm_make_request(req, vcpu);
3421         kvm_s390_vcpu_request(vcpu);
3422 }
3423
3424 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3425                               unsigned long end)
3426 {
3427         struct kvm *kvm = gmap->private;
3428         struct kvm_vcpu *vcpu;
3429         unsigned long prefix;
3430         int i;
3431
3432         if (gmap_is_shadow(gmap))
3433                 return;
3434         if (start >= 1UL << 31)
3435                 /* We are only interested in prefix pages */
3436                 return;
3437         kvm_for_each_vcpu(i, vcpu, kvm) {
3438                 /* match against both prefix pages */
3439                 prefix = kvm_s390_get_prefix(vcpu);
3440                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3441                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3442                                    start, end);
3443                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3444                 }
3445         }
3446 }
3447
3448 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3449 {
3450         /* do not poll with more than halt_poll_max_steal percent of steal time */
3451         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3452             halt_poll_max_steal) {
3453                 vcpu->stat.halt_no_poll_steal++;
3454                 return true;
3455         }
3456         return false;
3457 }
3458
3459 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3460 {
3461         /* kvm common code refers to this, but never calls it */
3462         BUG();
3463         return 0;
3464 }
3465
3466 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3467                                            struct kvm_one_reg *reg)
3468 {
3469         int r = -EINVAL;
3470
3471         switch (reg->id) {
3472         case KVM_REG_S390_TODPR:
3473                 r = put_user(vcpu->arch.sie_block->todpr,
3474                              (u32 __user *)reg->addr);
3475                 break;
3476         case KVM_REG_S390_EPOCHDIFF:
3477                 r = put_user(vcpu->arch.sie_block->epoch,
3478                              (u64 __user *)reg->addr);
3479                 break;
3480         case KVM_REG_S390_CPU_TIMER:
3481                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3482                              (u64 __user *)reg->addr);
3483                 break;
3484         case KVM_REG_S390_CLOCK_COMP:
3485                 r = put_user(vcpu->arch.sie_block->ckc,
3486                              (u64 __user *)reg->addr);
3487                 break;
3488         case KVM_REG_S390_PFTOKEN:
3489                 r = put_user(vcpu->arch.pfault_token,
3490                              (u64 __user *)reg->addr);
3491                 break;
3492         case KVM_REG_S390_PFCOMPARE:
3493                 r = put_user(vcpu->arch.pfault_compare,
3494                              (u64 __user *)reg->addr);
3495                 break;
3496         case KVM_REG_S390_PFSELECT:
3497                 r = put_user(vcpu->arch.pfault_select,
3498                              (u64 __user *)reg->addr);
3499                 break;
3500         case KVM_REG_S390_PP:
3501                 r = put_user(vcpu->arch.sie_block->pp,
3502                              (u64 __user *)reg->addr);
3503                 break;
3504         case KVM_REG_S390_GBEA:
3505                 r = put_user(vcpu->arch.sie_block->gbea,
3506                              (u64 __user *)reg->addr);
3507                 break;
3508         default:
3509                 break;
3510         }
3511
3512         return r;
3513 }
3514
3515 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3516                                            struct kvm_one_reg *reg)
3517 {
3518         int r = -EINVAL;
3519         __u64 val;
3520
3521         switch (reg->id) {
3522         case KVM_REG_S390_TODPR:
3523                 r = get_user(vcpu->arch.sie_block->todpr,
3524                              (u32 __user *)reg->addr);
3525                 break;
3526         case KVM_REG_S390_EPOCHDIFF:
3527                 r = get_user(vcpu->arch.sie_block->epoch,
3528                              (u64 __user *)reg->addr);
3529                 break;
3530         case KVM_REG_S390_CPU_TIMER:
3531                 r = get_user(val, (u64 __user *)reg->addr);
3532                 if (!r)
3533                         kvm_s390_set_cpu_timer(vcpu, val);
3534                 break;
3535         case KVM_REG_S390_CLOCK_COMP:
3536                 r = get_user(vcpu->arch.sie_block->ckc,
3537                              (u64 __user *)reg->addr);
3538                 break;
3539         case KVM_REG_S390_PFTOKEN:
3540                 r = get_user(vcpu->arch.pfault_token,
3541                              (u64 __user *)reg->addr);
3542                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3543                         kvm_clear_async_pf_completion_queue(vcpu);
3544                 break;
3545         case KVM_REG_S390_PFCOMPARE:
3546                 r = get_user(vcpu->arch.pfault_compare,
3547                              (u64 __user *)reg->addr);
3548                 break;
3549         case KVM_REG_S390_PFSELECT:
3550                 r = get_user(vcpu->arch.pfault_select,
3551                              (u64 __user *)reg->addr);
3552                 break;
3553         case KVM_REG_S390_PP:
3554                 r = get_user(vcpu->arch.sie_block->pp,
3555                              (u64 __user *)reg->addr);
3556                 break;
3557         case KVM_REG_S390_GBEA:
3558                 r = get_user(vcpu->arch.sie_block->gbea,
3559                              (u64 __user *)reg->addr);
3560                 break;
3561         default:
3562                 break;
3563         }
3564
3565         return r;
3566 }
3567
3568 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3569 {
3570         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3571         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3572         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3573
3574         kvm_clear_async_pf_completion_queue(vcpu);
3575         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3576                 kvm_s390_vcpu_stop(vcpu);
3577         kvm_s390_clear_local_irqs(vcpu);
3578 }
3579
3580 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3581 {
3582         /* Initial reset is a superset of the normal reset */
3583         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3584
3585         /*
3586          * This equals initial cpu reset in pop, but we don't switch to ESA.
3587          * We do not only reset the internal data, but also ...
3588          */
3589         vcpu->arch.sie_block->gpsw.mask = 0;
3590         vcpu->arch.sie_block->gpsw.addr = 0;
3591         kvm_s390_set_prefix(vcpu, 0);
3592         kvm_s390_set_cpu_timer(vcpu, 0);
3593         vcpu->arch.sie_block->ckc = 0;
3594         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3595         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3596         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3597
3598         /* ... the data in sync regs */
3599         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3600         vcpu->run->s.regs.ckc = 0;
3601         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3602         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3603         vcpu->run->psw_addr = 0;
3604         vcpu->run->psw_mask = 0;
3605         vcpu->run->s.regs.todpr = 0;
3606         vcpu->run->s.regs.cputm = 0;
3607         vcpu->run->s.regs.ckc = 0;
3608         vcpu->run->s.regs.pp = 0;
3609         vcpu->run->s.regs.gbea = 1;
3610         vcpu->run->s.regs.fpc = 0;
3611         /*
3612          * Do not reset these registers in the protected case, as some of
3613          * them are overlayed and they are not accessible in this case
3614          * anyway.
3615          */
3616         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3617                 vcpu->arch.sie_block->gbea = 1;
3618                 vcpu->arch.sie_block->pp = 0;
3619                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3620                 vcpu->arch.sie_block->todpr = 0;
3621         }
3622 }
3623
3624 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3625 {
3626         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3627
3628         /* Clear reset is a superset of the initial reset */
3629         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3630
3631         memset(&regs->gprs, 0, sizeof(regs->gprs));
3632         memset(&regs->vrs, 0, sizeof(regs->vrs));
3633         memset(&regs->acrs, 0, sizeof(regs->acrs));
3634         memset(&regs->gscb, 0, sizeof(regs->gscb));
3635
3636         regs->etoken = 0;
3637         regs->etoken_extension = 0;
3638 }
3639
3640 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3641 {
3642         vcpu_load(vcpu);
3643         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3644         vcpu_put(vcpu);
3645         return 0;
3646 }
3647
3648 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3649 {
3650         vcpu_load(vcpu);
3651         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3652         vcpu_put(vcpu);
3653         return 0;
3654 }
3655
3656 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3657                                   struct kvm_sregs *sregs)
3658 {
3659         vcpu_load(vcpu);
3660
3661         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3662         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3663
3664         vcpu_put(vcpu);
3665         return 0;
3666 }
3667
3668 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3669                                   struct kvm_sregs *sregs)
3670 {
3671         vcpu_load(vcpu);
3672
3673         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3674         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3675
3676         vcpu_put(vcpu);
3677         return 0;
3678 }
3679
3680 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3681 {
3682         int ret = 0;
3683
3684         vcpu_load(vcpu);
3685
3686         if (test_fp_ctl(fpu->fpc)) {
3687                 ret = -EINVAL;
3688                 goto out;
3689         }
3690         vcpu->run->s.regs.fpc = fpu->fpc;
3691         if (MACHINE_HAS_VX)
3692                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3693                                  (freg_t *) fpu->fprs);
3694         else
3695                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3696
3697 out:
3698         vcpu_put(vcpu);
3699         return ret;
3700 }
3701
3702 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3703 {
3704         vcpu_load(vcpu);
3705
3706         /* make sure we have the latest values */
3707         save_fpu_regs();
3708         if (MACHINE_HAS_VX)
3709                 convert_vx_to_fp((freg_t *) fpu->fprs,
3710                                  (__vector128 *) vcpu->run->s.regs.vrs);
3711         else
3712                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3713         fpu->fpc = vcpu->run->s.regs.fpc;
3714
3715         vcpu_put(vcpu);
3716         return 0;
3717 }
3718
3719 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3720 {
3721         int rc = 0;
3722
3723         if (!is_vcpu_stopped(vcpu))
3724                 rc = -EBUSY;
3725         else {
3726                 vcpu->run->psw_mask = psw.mask;
3727                 vcpu->run->psw_addr = psw.addr;
3728         }
3729         return rc;
3730 }
3731
3732 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3733                                   struct kvm_translation *tr)
3734 {
3735         return -EINVAL; /* not implemented yet */
3736 }
3737
3738 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3739                               KVM_GUESTDBG_USE_HW_BP | \
3740                               KVM_GUESTDBG_ENABLE)
3741
3742 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3743                                         struct kvm_guest_debug *dbg)
3744 {
3745         int rc = 0;
3746
3747         vcpu_load(vcpu);
3748
3749         vcpu->guest_debug = 0;
3750         kvm_s390_clear_bp_data(vcpu);
3751
3752         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3753                 rc = -EINVAL;
3754                 goto out;
3755         }
3756         if (!sclp.has_gpere) {
3757                 rc = -EINVAL;
3758                 goto out;
3759         }
3760
3761         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3762                 vcpu->guest_debug = dbg->control;
3763                 /* enforce guest PER */
3764                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3765
3766                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3767                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3768         } else {
3769                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3770                 vcpu->arch.guestdbg.last_bp = 0;
3771         }
3772
3773         if (rc) {
3774                 vcpu->guest_debug = 0;
3775                 kvm_s390_clear_bp_data(vcpu);
3776                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3777         }
3778
3779 out:
3780         vcpu_put(vcpu);
3781         return rc;
3782 }
3783
3784 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3785                                     struct kvm_mp_state *mp_state)
3786 {
3787         int ret;
3788
3789         vcpu_load(vcpu);
3790
3791         /* CHECK_STOP and LOAD are not supported yet */
3792         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3793                                       KVM_MP_STATE_OPERATING;
3794
3795         vcpu_put(vcpu);
3796         return ret;
3797 }
3798
3799 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3800                                     struct kvm_mp_state *mp_state)
3801 {
3802         int rc = 0;
3803
3804         vcpu_load(vcpu);
3805
3806         /* user space knows about this interface - let it control the state */
3807         kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3808
3809         switch (mp_state->mp_state) {
3810         case KVM_MP_STATE_STOPPED:
3811                 rc = kvm_s390_vcpu_stop(vcpu);
3812                 break;
3813         case KVM_MP_STATE_OPERATING:
3814                 rc = kvm_s390_vcpu_start(vcpu);
3815                 break;
3816         case KVM_MP_STATE_LOAD:
3817                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3818                         rc = -ENXIO;
3819                         break;
3820                 }
3821                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3822                 break;
3823         case KVM_MP_STATE_CHECK_STOP:
3824                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3825         default:
3826                 rc = -ENXIO;
3827         }
3828
3829         vcpu_put(vcpu);
3830         return rc;
3831 }
3832
3833 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3834 {
3835         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3836 }
3837
3838 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3839 {
3840 retry:
3841         kvm_s390_vcpu_request_handled(vcpu);
3842         if (!kvm_request_pending(vcpu))
3843                 return 0;
3844         /*
3845          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3846          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3847          * This ensures that the ipte instruction for this request has
3848          * already finished. We might race against a second unmapper that
3849          * wants to set the blocking bit. Lets just retry the request loop.
3850          */
3851         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3852                 int rc;
3853                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3854                                           kvm_s390_get_prefix(vcpu),
3855                                           PAGE_SIZE * 2, PROT_WRITE);
3856                 if (rc) {
3857                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3858                         return rc;
3859                 }
3860                 goto retry;
3861         }
3862
3863         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3864                 vcpu->arch.sie_block->ihcpu = 0xffff;
3865                 goto retry;
3866         }
3867
3868         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3869                 if (!ibs_enabled(vcpu)) {
3870                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3871                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3872                 }
3873                 goto retry;
3874         }
3875
3876         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3877                 if (ibs_enabled(vcpu)) {
3878                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3879                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3880                 }
3881                 goto retry;
3882         }
3883
3884         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3885                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3886                 goto retry;
3887         }
3888
3889         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3890                 /*
3891                  * Disable CMM virtualization; we will emulate the ESSA
3892                  * instruction manually, in order to provide additional
3893                  * functionalities needed for live migration.
3894                  */
3895                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3896                 goto retry;
3897         }
3898
3899         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3900                 /*
3901                  * Re-enable CMM virtualization if CMMA is available and
3902                  * CMM has been used.
3903                  */
3904                 if ((vcpu->kvm->arch.use_cmma) &&
3905                     (vcpu->kvm->mm->context.uses_cmm))
3906                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3907                 goto retry;
3908         }
3909
3910         /* nothing to do, just clear the request */
3911         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3912         /* we left the vsie handler, nothing to do, just clear the request */
3913         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3914
3915         return 0;
3916 }
3917
3918 void kvm_s390_set_tod_clock(struct kvm *kvm,
3919                             const struct kvm_s390_vm_tod_clock *gtod)
3920 {
3921         struct kvm_vcpu *vcpu;
3922         union tod_clock clk;
3923         int i;
3924
3925         mutex_lock(&kvm->lock);
3926         preempt_disable();
3927
3928         store_tod_clock_ext(&clk);
3929
3930         kvm->arch.epoch = gtod->tod - clk.tod;
3931         kvm->arch.epdx = 0;
3932         if (test_kvm_facility(kvm, 139)) {
3933                 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3934                 if (kvm->arch.epoch > gtod->tod)
3935                         kvm->arch.epdx -= 1;
3936         }
3937
3938         kvm_s390_vcpu_block_all(kvm);
3939         kvm_for_each_vcpu(i, vcpu, kvm) {
3940                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3941                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3942         }
3943
3944         kvm_s390_vcpu_unblock_all(kvm);
3945         preempt_enable();
3946         mutex_unlock(&kvm->lock);
3947 }
3948
3949 /**
3950  * kvm_arch_fault_in_page - fault-in guest page if necessary
3951  * @vcpu: The corresponding virtual cpu
3952  * @gpa: Guest physical address
3953  * @writable: Whether the page should be writable or not
3954  *
3955  * Make sure that a guest page has been faulted-in on the host.
3956  *
3957  * Return: Zero on success, negative error code otherwise.
3958  */
3959 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3960 {
3961         return gmap_fault(vcpu->arch.gmap, gpa,
3962                           writable ? FAULT_FLAG_WRITE : 0);
3963 }
3964
3965 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3966                                       unsigned long token)
3967 {
3968         struct kvm_s390_interrupt inti;
3969         struct kvm_s390_irq irq;
3970
3971         if (start_token) {
3972                 irq.u.ext.ext_params2 = token;
3973                 irq.type = KVM_S390_INT_PFAULT_INIT;
3974                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3975         } else {
3976                 inti.type = KVM_S390_INT_PFAULT_DONE;
3977                 inti.parm64 = token;
3978                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3979         }
3980 }
3981
3982 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3983                                      struct kvm_async_pf *work)
3984 {
3985         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3986         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3987
3988         return true;
3989 }
3990
3991 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3992                                  struct kvm_async_pf *work)
3993 {
3994         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3995         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3996 }
3997
3998 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3999                                struct kvm_async_pf *work)
4000 {
4001         /* s390 will always inject the page directly */
4002 }
4003
4004 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4005 {
4006         /*
4007          * s390 will always inject the page directly,
4008          * but we still want check_async_completion to cleanup
4009          */
4010         return true;
4011 }
4012
4013 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4014 {
4015         hva_t hva;
4016         struct kvm_arch_async_pf arch;
4017
4018         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4019                 return false;
4020         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4021             vcpu->arch.pfault_compare)
4022                 return false;
4023         if (psw_extint_disabled(vcpu))
4024                 return false;
4025         if (kvm_s390_vcpu_has_irq(vcpu, 0))
4026                 return false;
4027         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4028                 return false;
4029         if (!vcpu->arch.gmap->pfault_enabled)
4030                 return false;
4031
4032         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4033         hva += current->thread.gmap_addr & ~PAGE_MASK;
4034         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4035                 return false;
4036
4037         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4038 }
4039
4040 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4041 {
4042         int rc, cpuflags;
4043
4044         /*
4045          * On s390 notifications for arriving pages will be delivered directly
4046          * to the guest but the house keeping for completed pfaults is
4047          * handled outside the worker.
4048          */
4049         kvm_check_async_pf_completion(vcpu);
4050
4051         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4052         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4053
4054         if (need_resched())
4055                 schedule();
4056
4057         if (!kvm_is_ucontrol(vcpu->kvm)) {
4058                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4059                 if (rc)
4060                         return rc;
4061         }
4062
4063         rc = kvm_s390_handle_requests(vcpu);
4064         if (rc)
4065                 return rc;
4066
4067         if (guestdbg_enabled(vcpu)) {
4068                 kvm_s390_backup_guest_per_regs(vcpu);
4069                 kvm_s390_patch_guest_per_regs(vcpu);
4070         }
4071
4072         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4073
4074         vcpu->arch.sie_block->icptcode = 0;
4075         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4076         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4077         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4078
4079         return 0;
4080 }
4081
4082 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4083 {
4084         struct kvm_s390_pgm_info pgm_info = {
4085                 .code = PGM_ADDRESSING,
4086         };
4087         u8 opcode, ilen;
4088         int rc;
4089
4090         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4091         trace_kvm_s390_sie_fault(vcpu);
4092
4093         /*
4094          * We want to inject an addressing exception, which is defined as a
4095          * suppressing or terminating exception. However, since we came here
4096          * by a DAT access exception, the PSW still points to the faulting
4097          * instruction since DAT exceptions are nullifying. So we've got
4098          * to look up the current opcode to get the length of the instruction
4099          * to be able to forward the PSW.
4100          */
4101         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4102         ilen = insn_length(opcode);
4103         if (rc < 0) {
4104                 return rc;
4105         } else if (rc) {
4106                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4107                  * Forward by arbitrary ilc, injection will take care of
4108                  * nullification if necessary.
4109                  */
4110                 pgm_info = vcpu->arch.pgm;
4111                 ilen = 4;
4112         }
4113         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4114         kvm_s390_forward_psw(vcpu, ilen);
4115         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4116 }
4117
4118 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4119 {
4120         struct mcck_volatile_info *mcck_info;
4121         struct sie_page *sie_page;
4122
4123         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4124                    vcpu->arch.sie_block->icptcode);
4125         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4126
4127         if (guestdbg_enabled(vcpu))
4128                 kvm_s390_restore_guest_per_regs(vcpu);
4129
4130         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4131         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4132
4133         if (exit_reason == -EINTR) {
4134                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4135                 sie_page = container_of(vcpu->arch.sie_block,
4136                                         struct sie_page, sie_block);
4137                 mcck_info = &sie_page->mcck_info;
4138                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4139                 return 0;
4140         }
4141
4142         if (vcpu->arch.sie_block->icptcode > 0) {
4143                 int rc = kvm_handle_sie_intercept(vcpu);
4144
4145                 if (rc != -EOPNOTSUPP)
4146                         return rc;
4147                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4148                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4149                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4150                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4151                 return -EREMOTE;
4152         } else if (exit_reason != -EFAULT) {
4153                 vcpu->stat.exit_null++;
4154                 return 0;
4155         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4156                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4157                 vcpu->run->s390_ucontrol.trans_exc_code =
4158                                                 current->thread.gmap_addr;
4159                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4160                 return -EREMOTE;
4161         } else if (current->thread.gmap_pfault) {
4162                 trace_kvm_s390_major_guest_pfault(vcpu);
4163                 current->thread.gmap_pfault = 0;
4164                 if (kvm_arch_setup_async_pf(vcpu))
4165                         return 0;
4166                 vcpu->stat.pfault_sync++;
4167                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4168         }
4169         return vcpu_post_run_fault_in_sie(vcpu);
4170 }
4171
4172 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4173 static int __vcpu_run(struct kvm_vcpu *vcpu)
4174 {
4175         int rc, exit_reason;
4176         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4177
4178         /*
4179          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4180          * ning the guest), so that memslots (and other stuff) are protected
4181          */
4182         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4183
4184         do {
4185                 rc = vcpu_pre_run(vcpu);
4186                 if (rc)
4187                         break;
4188
4189                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4190                 /*
4191                  * As PF_VCPU will be used in fault handler, between
4192                  * guest_enter and guest_exit should be no uaccess.
4193                  */
4194                 local_irq_disable();
4195                 guest_enter_irqoff();
4196                 __disable_cpu_timer_accounting(vcpu);
4197                 local_irq_enable();
4198                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4199                         memcpy(sie_page->pv_grregs,
4200                                vcpu->run->s.regs.gprs,
4201                                sizeof(sie_page->pv_grregs));
4202                 }
4203                 if (test_cpu_flag(CIF_FPU))
4204                         load_fpu_regs();
4205                 exit_reason = sie64a(vcpu->arch.sie_block,
4206                                      vcpu->run->s.regs.gprs);
4207                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4208                         memcpy(vcpu->run->s.regs.gprs,
4209                                sie_page->pv_grregs,
4210                                sizeof(sie_page->pv_grregs));
4211                         /*
4212                          * We're not allowed to inject interrupts on intercepts
4213                          * that leave the guest state in an "in-between" state
4214                          * where the next SIE entry will do a continuation.
4215                          * Fence interrupts in our "internal" PSW.
4216                          */
4217                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4218                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4219                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4220                         }
4221                 }
4222                 local_irq_disable();
4223                 __enable_cpu_timer_accounting(vcpu);
4224                 guest_exit_irqoff();
4225                 local_irq_enable();
4226                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4227
4228                 rc = vcpu_post_run(vcpu, exit_reason);
4229         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4230
4231         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4232         return rc;
4233 }
4234
4235 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4236 {
4237         struct kvm_run *kvm_run = vcpu->run;
4238         struct runtime_instr_cb *riccb;
4239         struct gs_cb *gscb;
4240
4241         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4242         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4243         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4244         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4245         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4246                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4247                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4248                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4249         }
4250         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4251                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4252                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4253                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4254                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4255                         kvm_clear_async_pf_completion_queue(vcpu);
4256         }
4257         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4258                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4259                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4260                 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4261         }
4262         /*
4263          * If userspace sets the riccb (e.g. after migration) to a valid state,
4264          * we should enable RI here instead of doing the lazy enablement.
4265          */
4266         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4267             test_kvm_facility(vcpu->kvm, 64) &&
4268             riccb->v &&
4269             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4270                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4271                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4272         }
4273         /*
4274          * If userspace sets the gscb (e.g. after migration) to non-zero,
4275          * we should enable GS here instead of doing the lazy enablement.
4276          */
4277         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4278             test_kvm_facility(vcpu->kvm, 133) &&
4279             gscb->gssm &&
4280             !vcpu->arch.gs_enabled) {
4281                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4282                 vcpu->arch.sie_block->ecb |= ECB_GS;
4283                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4284                 vcpu->arch.gs_enabled = 1;
4285         }
4286         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4287             test_kvm_facility(vcpu->kvm, 82)) {
4288                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4289                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4290         }
4291         if (MACHINE_HAS_GS) {
4292                 preempt_disable();
4293                 __ctl_set_bit(2, 4);
4294                 if (current->thread.gs_cb) {
4295                         vcpu->arch.host_gscb = current->thread.gs_cb;
4296                         save_gs_cb(vcpu->arch.host_gscb);
4297                 }
4298                 if (vcpu->arch.gs_enabled) {
4299                         current->thread.gs_cb = (struct gs_cb *)
4300                                                 &vcpu->run->s.regs.gscb;
4301                         restore_gs_cb(current->thread.gs_cb);
4302                 }
4303                 preempt_enable();
4304         }
4305         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4306 }
4307
4308 static void sync_regs(struct kvm_vcpu *vcpu)
4309 {
4310         struct kvm_run *kvm_run = vcpu->run;
4311
4312         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4313                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4314         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4315                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4316                 /* some control register changes require a tlb flush */
4317                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4318         }
4319         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4320                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4321                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4322         }
4323         save_access_regs(vcpu->arch.host_acrs);
4324         restore_access_regs(vcpu->run->s.regs.acrs);
4325         /* save host (userspace) fprs/vrs */
4326         save_fpu_regs();
4327         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4328         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4329         if (MACHINE_HAS_VX)
4330                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4331         else
4332                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4333         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4334         if (test_fp_ctl(current->thread.fpu.fpc))
4335                 /* User space provided an invalid FPC, let's clear it */
4336                 current->thread.fpu.fpc = 0;
4337
4338         /* Sync fmt2 only data */
4339         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4340                 sync_regs_fmt2(vcpu);
4341         } else {
4342                 /*
4343                  * In several places we have to modify our internal view to
4344                  * not do things that are disallowed by the ultravisor. For
4345                  * example we must not inject interrupts after specific exits
4346                  * (e.g. 112 prefix page not secure). We do this by turning
4347                  * off the machine check, external and I/O interrupt bits
4348                  * of our PSW copy. To avoid getting validity intercepts, we
4349                  * do only accept the condition code from userspace.
4350                  */
4351                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4352                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4353                                                    PSW_MASK_CC;
4354         }
4355
4356         kvm_run->kvm_dirty_regs = 0;
4357 }
4358
4359 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4360 {
4361         struct kvm_run *kvm_run = vcpu->run;
4362
4363         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4364         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4365         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4366         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4367         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4368         if (MACHINE_HAS_GS) {
4369                 preempt_disable();
4370                 __ctl_set_bit(2, 4);
4371                 if (vcpu->arch.gs_enabled)
4372                         save_gs_cb(current->thread.gs_cb);
4373                 current->thread.gs_cb = vcpu->arch.host_gscb;
4374                 restore_gs_cb(vcpu->arch.host_gscb);
4375                 if (!vcpu->arch.host_gscb)
4376                         __ctl_clear_bit(2, 4);
4377                 vcpu->arch.host_gscb = NULL;
4378                 preempt_enable();
4379         }
4380         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4381 }
4382
4383 static void store_regs(struct kvm_vcpu *vcpu)
4384 {
4385         struct kvm_run *kvm_run = vcpu->run;
4386
4387         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4388         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4389         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4390         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4391         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4392         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4393         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4394         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4395         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4396         save_access_regs(vcpu->run->s.regs.acrs);
4397         restore_access_regs(vcpu->arch.host_acrs);
4398         /* Save guest register state */
4399         save_fpu_regs();
4400         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4401         /* Restore will be done lazily at return */
4402         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4403         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4404         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4405                 store_regs_fmt2(vcpu);
4406 }
4407
4408 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4409 {
4410         struct kvm_run *kvm_run = vcpu->run;
4411         int rc;
4412
4413         if (kvm_run->immediate_exit)
4414                 return -EINTR;
4415
4416         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4417             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4418                 return -EINVAL;
4419
4420         vcpu_load(vcpu);
4421
4422         if (guestdbg_exit_pending(vcpu)) {
4423                 kvm_s390_prepare_debug_exit(vcpu);
4424                 rc = 0;
4425                 goto out;
4426         }
4427
4428         kvm_sigset_activate(vcpu);
4429
4430         /*
4431          * no need to check the return value of vcpu_start as it can only have
4432          * an error for protvirt, but protvirt means user cpu state
4433          */
4434         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4435                 kvm_s390_vcpu_start(vcpu);
4436         } else if (is_vcpu_stopped(vcpu)) {
4437                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4438                                    vcpu->vcpu_id);
4439                 rc = -EINVAL;
4440                 goto out;
4441         }
4442
4443         sync_regs(vcpu);
4444         enable_cpu_timer_accounting(vcpu);
4445
4446         might_fault();
4447         rc = __vcpu_run(vcpu);
4448
4449         if (signal_pending(current) && !rc) {
4450                 kvm_run->exit_reason = KVM_EXIT_INTR;
4451                 rc = -EINTR;
4452         }
4453
4454         if (guestdbg_exit_pending(vcpu) && !rc)  {
4455                 kvm_s390_prepare_debug_exit(vcpu);
4456                 rc = 0;
4457         }
4458
4459         if (rc == -EREMOTE) {
4460                 /* userspace support is needed, kvm_run has been prepared */
4461                 rc = 0;
4462         }
4463
4464         disable_cpu_timer_accounting(vcpu);
4465         store_regs(vcpu);
4466
4467         kvm_sigset_deactivate(vcpu);
4468
4469         vcpu->stat.exit_userspace++;
4470 out:
4471         vcpu_put(vcpu);
4472         return rc;
4473 }
4474
4475 /*
4476  * store status at address
4477  * we use have two special cases:
4478  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4479  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4480  */
4481 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4482 {
4483         unsigned char archmode = 1;
4484         freg_t fprs[NUM_FPRS];
4485         unsigned int px;
4486         u64 clkcomp, cputm;
4487         int rc;
4488
4489         px = kvm_s390_get_prefix(vcpu);
4490         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4491                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4492                         return -EFAULT;
4493                 gpa = 0;
4494         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4495                 if (write_guest_real(vcpu, 163, &archmode, 1))
4496                         return -EFAULT;
4497                 gpa = px;
4498         } else
4499                 gpa -= __LC_FPREGS_SAVE_AREA;
4500
4501         /* manually convert vector registers if necessary */
4502         if (MACHINE_HAS_VX) {
4503                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4504                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4505                                      fprs, 128);
4506         } else {
4507                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4508                                      vcpu->run->s.regs.fprs, 128);
4509         }
4510         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4511                               vcpu->run->s.regs.gprs, 128);
4512         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4513                               &vcpu->arch.sie_block->gpsw, 16);
4514         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4515                               &px, 4);
4516         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4517                               &vcpu->run->s.regs.fpc, 4);
4518         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4519                               &vcpu->arch.sie_block->todpr, 4);
4520         cputm = kvm_s390_get_cpu_timer(vcpu);
4521         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4522                               &cputm, 8);
4523         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4524         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4525                               &clkcomp, 8);
4526         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4527                               &vcpu->run->s.regs.acrs, 64);
4528         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4529                               &vcpu->arch.sie_block->gcr, 128);
4530         return rc ? -EFAULT : 0;
4531 }
4532
4533 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4534 {
4535         /*
4536          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4537          * switch in the run ioctl. Let's update our copies before we save
4538          * it into the save area
4539          */
4540         save_fpu_regs();
4541         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4542         save_access_regs(vcpu->run->s.regs.acrs);
4543
4544         return kvm_s390_store_status_unloaded(vcpu, addr);
4545 }
4546
4547 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4548 {
4549         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4550         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4551 }
4552
4553 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4554 {
4555         unsigned int i;
4556         struct kvm_vcpu *vcpu;
4557
4558         kvm_for_each_vcpu(i, vcpu, kvm) {
4559                 __disable_ibs_on_vcpu(vcpu);
4560         }
4561 }
4562
4563 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4564 {
4565         if (!sclp.has_ibs)
4566                 return;
4567         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4568         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4569 }
4570
4571 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4572 {
4573         int i, online_vcpus, r = 0, started_vcpus = 0;
4574
4575         if (!is_vcpu_stopped(vcpu))
4576                 return 0;
4577
4578         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4579         /* Only one cpu at a time may enter/leave the STOPPED state. */
4580         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4581         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4582
4583         /* Let's tell the UV that we want to change into the operating state */
4584         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4585                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4586                 if (r) {
4587                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4588                         return r;
4589                 }
4590         }
4591
4592         for (i = 0; i < online_vcpus; i++) {
4593                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4594                         started_vcpus++;
4595         }
4596
4597         if (started_vcpus == 0) {
4598                 /* we're the only active VCPU -> speed it up */
4599                 __enable_ibs_on_vcpu(vcpu);
4600         } else if (started_vcpus == 1) {
4601                 /*
4602                  * As we are starting a second VCPU, we have to disable
4603                  * the IBS facility on all VCPUs to remove potentially
4604                  * outstanding ENABLE requests.
4605                  */
4606                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4607         }
4608
4609         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4610         /*
4611          * The real PSW might have changed due to a RESTART interpreted by the
4612          * ultravisor. We block all interrupts and let the next sie exit
4613          * refresh our view.
4614          */
4615         if (kvm_s390_pv_cpu_is_protected(vcpu))
4616                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4617         /*
4618          * Another VCPU might have used IBS while we were offline.
4619          * Let's play safe and flush the VCPU at startup.
4620          */
4621         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4622         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4623         return 0;
4624 }
4625
4626 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4627 {
4628         int i, online_vcpus, r = 0, started_vcpus = 0;
4629         struct kvm_vcpu *started_vcpu = NULL;
4630
4631         if (is_vcpu_stopped(vcpu))
4632                 return 0;
4633
4634         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4635         /* Only one cpu at a time may enter/leave the STOPPED state. */
4636         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4637         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4638
4639         /* Let's tell the UV that we want to change into the stopped state */
4640         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4641                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4642                 if (r) {
4643                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4644                         return r;
4645                 }
4646         }
4647
4648         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4649         kvm_s390_clear_stop_irq(vcpu);
4650
4651         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4652         __disable_ibs_on_vcpu(vcpu);
4653
4654         for (i = 0; i < online_vcpus; i++) {
4655                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4656                         started_vcpus++;
4657                         started_vcpu = vcpu->kvm->vcpus[i];
4658                 }
4659         }
4660
4661         if (started_vcpus == 1) {
4662                 /*
4663                  * As we only have one VCPU left, we want to enable the
4664                  * IBS facility for that VCPU to speed it up.
4665                  */
4666                 __enable_ibs_on_vcpu(started_vcpu);
4667         }
4668
4669         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4670         return 0;
4671 }
4672
4673 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4674                                      struct kvm_enable_cap *cap)
4675 {
4676         int r;
4677
4678         if (cap->flags)
4679                 return -EINVAL;
4680
4681         switch (cap->cap) {
4682         case KVM_CAP_S390_CSS_SUPPORT:
4683                 if (!vcpu->kvm->arch.css_support) {
4684                         vcpu->kvm->arch.css_support = 1;
4685                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4686                         trace_kvm_s390_enable_css(vcpu->kvm);
4687                 }
4688                 r = 0;
4689                 break;
4690         default:
4691                 r = -EINVAL;
4692                 break;
4693         }
4694         return r;
4695 }
4696
4697 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4698                                    struct kvm_s390_mem_op *mop)
4699 {
4700         void __user *uaddr = (void __user *)mop->buf;
4701         int r = 0;
4702
4703         if (mop->flags || !mop->size)
4704                 return -EINVAL;
4705         if (mop->size + mop->sida_offset < mop->size)
4706                 return -EINVAL;
4707         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4708                 return -E2BIG;
4709
4710         switch (mop->op) {
4711         case KVM_S390_MEMOP_SIDA_READ:
4712                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4713                                  mop->sida_offset), mop->size))
4714                         r = -EFAULT;
4715
4716                 break;
4717         case KVM_S390_MEMOP_SIDA_WRITE:
4718                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4719                                    mop->sida_offset), uaddr, mop->size))
4720                         r = -EFAULT;
4721                 break;
4722         }
4723         return r;
4724 }
4725 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4726                                   struct kvm_s390_mem_op *mop)
4727 {
4728         void __user *uaddr = (void __user *)mop->buf;
4729         void *tmpbuf = NULL;
4730         int r = 0;
4731         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4732                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4733
4734         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4735                 return -EINVAL;
4736
4737         if (mop->size > MEM_OP_MAX_SIZE)
4738                 return -E2BIG;
4739
4740         if (kvm_s390_pv_cpu_is_protected(vcpu))
4741                 return -EINVAL;
4742
4743         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4744                 tmpbuf = vmalloc(mop->size);
4745                 if (!tmpbuf)
4746                         return -ENOMEM;
4747         }
4748
4749         switch (mop->op) {
4750         case KVM_S390_MEMOP_LOGICAL_READ:
4751                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4752                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4753                                             mop->size, GACC_FETCH);
4754                         break;
4755                 }
4756                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4757                 if (r == 0) {
4758                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4759                                 r = -EFAULT;
4760                 }
4761                 break;
4762         case KVM_S390_MEMOP_LOGICAL_WRITE:
4763                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4764                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4765                                             mop->size, GACC_STORE);
4766                         break;
4767                 }
4768                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4769                         r = -EFAULT;
4770                         break;
4771                 }
4772                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4773                 break;
4774         }
4775
4776         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4777                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4778
4779         vfree(tmpbuf);
4780         return r;
4781 }
4782
4783 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4784                                       struct kvm_s390_mem_op *mop)
4785 {
4786         int r, srcu_idx;
4787
4788         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4789
4790         switch (mop->op) {
4791         case KVM_S390_MEMOP_LOGICAL_READ:
4792         case KVM_S390_MEMOP_LOGICAL_WRITE:
4793                 r = kvm_s390_guest_mem_op(vcpu, mop);
4794                 break;
4795         case KVM_S390_MEMOP_SIDA_READ:
4796         case KVM_S390_MEMOP_SIDA_WRITE:
4797                 /* we are locked against sida going away by the vcpu->mutex */
4798                 r = kvm_s390_guest_sida_op(vcpu, mop);
4799                 break;
4800         default:
4801                 r = -EINVAL;
4802         }
4803
4804         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4805         return r;
4806 }
4807
4808 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4809                                unsigned int ioctl, unsigned long arg)
4810 {
4811         struct kvm_vcpu *vcpu = filp->private_data;
4812         void __user *argp = (void __user *)arg;
4813
4814         switch (ioctl) {
4815         case KVM_S390_IRQ: {
4816                 struct kvm_s390_irq s390irq;
4817
4818                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4819                         return -EFAULT;
4820                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4821         }
4822         case KVM_S390_INTERRUPT: {
4823                 struct kvm_s390_interrupt s390int;
4824                 struct kvm_s390_irq s390irq = {};
4825
4826                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4827                         return -EFAULT;
4828                 if (s390int_to_s390irq(&s390int, &s390irq))
4829                         return -EINVAL;
4830                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4831         }
4832         }
4833         return -ENOIOCTLCMD;
4834 }
4835
4836 long kvm_arch_vcpu_ioctl(struct file *filp,
4837                          unsigned int ioctl, unsigned long arg)
4838 {
4839         struct kvm_vcpu *vcpu = filp->private_data;
4840         void __user *argp = (void __user *)arg;
4841         int idx;
4842         long r;
4843         u16 rc, rrc;
4844
4845         vcpu_load(vcpu);
4846
4847         switch (ioctl) {
4848         case KVM_S390_STORE_STATUS:
4849                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4850                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4851                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4852                 break;
4853         case KVM_S390_SET_INITIAL_PSW: {
4854                 psw_t psw;
4855
4856                 r = -EFAULT;
4857                 if (copy_from_user(&psw, argp, sizeof(psw)))
4858                         break;
4859                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4860                 break;
4861         }
4862         case KVM_S390_CLEAR_RESET:
4863                 r = 0;
4864                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4865                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4866                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4867                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4868                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4869                                    rc, rrc);
4870                 }
4871                 break;
4872         case KVM_S390_INITIAL_RESET:
4873                 r = 0;
4874                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4875                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4876                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4877                                           UVC_CMD_CPU_RESET_INITIAL,
4878                                           &rc, &rrc);
4879                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4880                                    rc, rrc);
4881                 }
4882                 break;
4883         case KVM_S390_NORMAL_RESET:
4884                 r = 0;
4885                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4886                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4887                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4888                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4889                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4890                                    rc, rrc);
4891                 }
4892                 break;
4893         case KVM_SET_ONE_REG:
4894         case KVM_GET_ONE_REG: {
4895                 struct kvm_one_reg reg;
4896                 r = -EINVAL;
4897                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4898                         break;
4899                 r = -EFAULT;
4900                 if (copy_from_user(&reg, argp, sizeof(reg)))
4901                         break;
4902                 if (ioctl == KVM_SET_ONE_REG)
4903                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4904                 else
4905                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4906                 break;
4907         }
4908 #ifdef CONFIG_KVM_S390_UCONTROL
4909         case KVM_S390_UCAS_MAP: {
4910                 struct kvm_s390_ucas_mapping ucasmap;
4911
4912                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4913                         r = -EFAULT;
4914                         break;
4915                 }
4916
4917                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4918                         r = -EINVAL;
4919                         break;
4920                 }
4921
4922                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4923                                      ucasmap.vcpu_addr, ucasmap.length);
4924                 break;
4925         }
4926         case KVM_S390_UCAS_UNMAP: {
4927                 struct kvm_s390_ucas_mapping ucasmap;
4928
4929                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4930                         r = -EFAULT;
4931                         break;
4932                 }
4933
4934                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4935                         r = -EINVAL;
4936                         break;
4937                 }
4938
4939                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4940                         ucasmap.length);
4941                 break;
4942         }
4943 #endif
4944         case KVM_S390_VCPU_FAULT: {
4945                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4946                 break;
4947         }
4948         case KVM_ENABLE_CAP:
4949         {
4950                 struct kvm_enable_cap cap;
4951                 r = -EFAULT;
4952                 if (copy_from_user(&cap, argp, sizeof(cap)))
4953                         break;
4954                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4955                 break;
4956         }
4957         case KVM_S390_MEM_OP: {
4958                 struct kvm_s390_mem_op mem_op;
4959
4960                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4961                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4962                 else
4963                         r = -EFAULT;
4964                 break;
4965         }
4966         case KVM_S390_SET_IRQ_STATE: {
4967                 struct kvm_s390_irq_state irq_state;
4968
4969                 r = -EFAULT;
4970                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4971                         break;
4972                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4973                     irq_state.len == 0 ||
4974                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4975                         r = -EINVAL;
4976                         break;
4977                 }
4978                 /* do not use irq_state.flags, it will break old QEMUs */
4979                 r = kvm_s390_set_irq_state(vcpu,
4980                                            (void __user *) irq_state.buf,
4981                                            irq_state.len);
4982                 break;
4983         }
4984         case KVM_S390_GET_IRQ_STATE: {
4985                 struct kvm_s390_irq_state irq_state;
4986
4987                 r = -EFAULT;
4988                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4989                         break;
4990                 if (irq_state.len == 0) {
4991                         r = -EINVAL;
4992                         break;
4993                 }
4994                 /* do not use irq_state.flags, it will break old QEMUs */
4995                 r = kvm_s390_get_irq_state(vcpu,
4996                                            (__u8 __user *)  irq_state.buf,
4997                                            irq_state.len);
4998                 break;
4999         }
5000         default:
5001                 r = -ENOTTY;
5002         }
5003
5004         vcpu_put(vcpu);
5005         return r;
5006 }
5007
5008 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5009 {
5010 #ifdef CONFIG_KVM_S390_UCONTROL
5011         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5012                  && (kvm_is_ucontrol(vcpu->kvm))) {
5013                 vmf->page = virt_to_page(vcpu->arch.sie_block);
5014                 get_page(vmf->page);
5015                 return 0;
5016         }
5017 #endif
5018         return VM_FAULT_SIGBUS;
5019 }
5020
5021 /* Section: memory related */
5022 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5023                                    struct kvm_memory_slot *memslot,
5024                                    const struct kvm_userspace_memory_region *mem,
5025                                    enum kvm_mr_change change)
5026 {
5027         /* A few sanity checks. We can have memory slots which have to be
5028            located/ended at a segment boundary (1MB). The memory in userland is
5029            ok to be fragmented into various different vmas. It is okay to mmap()
5030            and munmap() stuff in this slot after doing this call at any time */
5031
5032         if (mem->userspace_addr & 0xffffful)
5033                 return -EINVAL;
5034
5035         if (mem->memory_size & 0xffffful)
5036                 return -EINVAL;
5037
5038         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5039                 return -EINVAL;
5040
5041         /* When we are protected, we should not change the memory slots */
5042         if (kvm_s390_pv_get_handle(kvm))
5043                 return -EINVAL;
5044         return 0;
5045 }
5046
5047 void kvm_arch_commit_memory_region(struct kvm *kvm,
5048                                 const struct kvm_userspace_memory_region *mem,
5049                                 struct kvm_memory_slot *old,
5050                                 const struct kvm_memory_slot *new,
5051                                 enum kvm_mr_change change)
5052 {
5053         int rc = 0;
5054
5055         switch (change) {
5056         case KVM_MR_DELETE:
5057                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5058                                         old->npages * PAGE_SIZE);
5059                 break;
5060         case KVM_MR_MOVE:
5061                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5062                                         old->npages * PAGE_SIZE);
5063                 if (rc)
5064                         break;
5065                 fallthrough;
5066         case KVM_MR_CREATE:
5067                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5068                                       mem->guest_phys_addr, mem->memory_size);
5069                 break;
5070         case KVM_MR_FLAGS_ONLY:
5071                 break;
5072         default:
5073                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5074         }
5075         if (rc)
5076                 pr_warn("failed to commit memory region\n");
5077         return;
5078 }
5079
5080 static inline unsigned long nonhyp_mask(int i)
5081 {
5082         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5083
5084         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5085 }
5086
5087 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5088 {
5089         vcpu->valid_wakeup = false;
5090 }
5091
5092 static int __init kvm_s390_init(void)
5093 {
5094         int i;
5095
5096         if (!sclp.has_sief2) {
5097                 pr_info("SIE is not available\n");
5098                 return -ENODEV;
5099         }
5100
5101         if (nested && hpage) {
5102                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5103                 return -EINVAL;
5104         }
5105
5106         for (i = 0; i < 16; i++)
5107                 kvm_s390_fac_base[i] |=
5108                         stfle_fac_list[i] & nonhyp_mask(i);
5109
5110         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5111 }
5112
5113 static void __exit kvm_s390_exit(void)
5114 {
5115         kvm_exit();
5116 }
5117
5118 module_init(kvm_s390_init);
5119 module_exit(kvm_s390_exit);
5120
5121 /*
5122  * Enable autoloading of the kvm module.
5123  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5124  * since x86 takes a different approach.
5125  */
5126 #include <linux/miscdevice.h>
5127 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5128 MODULE_ALIAS("devname:kvm");