1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2020
13 #define KMSG_COMPONENT "kvm-s390"
14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/mman.h>
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/pgtable.h>
34 #include <linux/mmu_notifier.h>
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
48 #include <asm/fpu/api.h>
53 #define CREATE_TRACE_POINTS
55 #include "trace-s390.h"
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
62 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
63 KVM_GENERIC_VM_STATS(),
64 STATS_DESC_COUNTER(VM, inject_io),
65 STATS_DESC_COUNTER(VM, inject_float_mchk),
66 STATS_DESC_COUNTER(VM, inject_pfault_done),
67 STATS_DESC_COUNTER(VM, inject_service_signal),
68 STATS_DESC_COUNTER(VM, inject_virtio),
69 STATS_DESC_COUNTER(VM, aen_forward)
72 const struct kvm_stats_header kvm_vm_stats_header = {
73 .name_size = KVM_STATS_NAME_SIZE,
74 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
75 .id_offset = sizeof(struct kvm_stats_header),
76 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
77 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
78 sizeof(kvm_vm_stats_desc),
81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
82 KVM_GENERIC_VCPU_STATS(),
83 STATS_DESC_COUNTER(VCPU, exit_userspace),
84 STATS_DESC_COUNTER(VCPU, exit_null),
85 STATS_DESC_COUNTER(VCPU, exit_external_request),
86 STATS_DESC_COUNTER(VCPU, exit_io_request),
87 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
88 STATS_DESC_COUNTER(VCPU, exit_stop_request),
89 STATS_DESC_COUNTER(VCPU, exit_validity),
90 STATS_DESC_COUNTER(VCPU, exit_instruction),
91 STATS_DESC_COUNTER(VCPU, exit_pei),
92 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
93 STATS_DESC_COUNTER(VCPU, instruction_lctl),
94 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
95 STATS_DESC_COUNTER(VCPU, instruction_stctl),
96 STATS_DESC_COUNTER(VCPU, instruction_stctg),
97 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
98 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
99 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
100 STATS_DESC_COUNTER(VCPU, deliver_ckc),
101 STATS_DESC_COUNTER(VCPU, deliver_cputm),
102 STATS_DESC_COUNTER(VCPU, deliver_external_call),
103 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
104 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_virtio),
106 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
107 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
108 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
109 STATS_DESC_COUNTER(VCPU, deliver_program),
110 STATS_DESC_COUNTER(VCPU, deliver_io),
111 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
112 STATS_DESC_COUNTER(VCPU, exit_wait_state),
113 STATS_DESC_COUNTER(VCPU, inject_ckc),
114 STATS_DESC_COUNTER(VCPU, inject_cputm),
115 STATS_DESC_COUNTER(VCPU, inject_external_call),
116 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
117 STATS_DESC_COUNTER(VCPU, inject_mchk),
118 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
119 STATS_DESC_COUNTER(VCPU, inject_program),
120 STATS_DESC_COUNTER(VCPU, inject_restart),
121 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
122 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
123 STATS_DESC_COUNTER(VCPU, instruction_epsw),
124 STATS_DESC_COUNTER(VCPU, instruction_gs),
125 STATS_DESC_COUNTER(VCPU, instruction_io_other),
126 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
127 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
128 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
129 STATS_DESC_COUNTER(VCPU, instruction_ptff),
130 STATS_DESC_COUNTER(VCPU, instruction_sck),
131 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
132 STATS_DESC_COUNTER(VCPU, instruction_stidp),
133 STATS_DESC_COUNTER(VCPU, instruction_spx),
134 STATS_DESC_COUNTER(VCPU, instruction_stpx),
135 STATS_DESC_COUNTER(VCPU, instruction_stap),
136 STATS_DESC_COUNTER(VCPU, instruction_iske),
137 STATS_DESC_COUNTER(VCPU, instruction_ri),
138 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
139 STATS_DESC_COUNTER(VCPU, instruction_sske),
140 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
141 STATS_DESC_COUNTER(VCPU, instruction_stsi),
142 STATS_DESC_COUNTER(VCPU, instruction_stfl),
143 STATS_DESC_COUNTER(VCPU, instruction_tb),
144 STATS_DESC_COUNTER(VCPU, instruction_tpi),
145 STATS_DESC_COUNTER(VCPU, instruction_tprot),
146 STATS_DESC_COUNTER(VCPU, instruction_tsch),
147 STATS_DESC_COUNTER(VCPU, instruction_sie),
148 STATS_DESC_COUNTER(VCPU, instruction_essa),
149 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
164 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
165 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
167 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
169 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
170 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
173 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
174 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
175 STATS_DESC_COUNTER(VCPU, pfault_sync)
178 const struct kvm_stats_header kvm_vcpu_stats_header = {
179 .name_size = KVM_STATS_NAME_SIZE,
180 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
181 .id_offset = sizeof(struct kvm_stats_header),
182 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
183 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
184 sizeof(kvm_vcpu_stats_desc),
187 /* allow nested virtualization in KVM (if enabled by user space) */
189 module_param(nested, int, S_IRUGO);
190 MODULE_PARM_DESC(nested, "Nested virtualization support");
192 /* allow 1m huge page guest backing, if !nested */
194 module_param(hpage, int, 0444);
195 MODULE_PARM_DESC(hpage, "1m huge page backing support");
197 /* maximum percentage of steal time for polling. >100 is treated like 100 */
198 static u8 halt_poll_max_steal = 10;
199 module_param(halt_poll_max_steal, byte, 0644);
200 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
202 /* if set to true, the GISA will be initialized and used if available */
203 static bool use_gisa = true;
204 module_param(use_gisa, bool, 0644);
205 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
207 /* maximum diag9c forwarding per second */
208 unsigned int diag9c_forwarding_hz;
209 module_param(diag9c_forwarding_hz, uint, 0644);
210 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
213 * allow asynchronous deinit for protected guests; enable by default since
214 * the feature is opt-in anyway
216 static int async_destroy = 1;
217 module_param(async_destroy, int, 0444);
218 MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
221 * For now we handle at most 16 double words as this is what the s390 base
222 * kernel handles and stores in the prefix page. If we ever need to go beyond
223 * this, this requires changes to code, but the external uapi can stay.
225 #define SIZE_INTERNAL 16
228 * Base feature mask that defines default mask for facilities. Consists of the
229 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
231 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
233 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
234 * and defines the facilities that can be enabled via a cpu model.
236 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
238 static unsigned long kvm_s390_fac_size(void)
240 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
241 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
242 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
243 sizeof(stfle_fac_list));
245 return SIZE_INTERNAL;
248 /* available cpu features supported by kvm */
249 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
250 /* available subfunctions indicated via query / "test bit" */
251 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
253 static struct gmap_notifier gmap_notifier;
254 static struct gmap_notifier vsie_gmap_notifier;
255 debug_info_t *kvm_s390_dbf;
256 debug_info_t *kvm_s390_dbf_uv;
258 /* Section: not file related */
259 int kvm_arch_hardware_enable(void)
261 /* every s390 is virtualization enabled ;-) */
265 int kvm_arch_check_processor_compat(void *opaque)
270 /* forward declarations */
271 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
273 static int sca_switch_to_extended(struct kvm *kvm);
275 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
280 * The TOD jumps by delta, we have to compensate this by adding
281 * -delta to the epoch.
285 /* sign-extension - we're adding to signed values below */
290 if (scb->ecd & ECD_MEF) {
291 scb->epdx += delta_idx;
292 if (scb->epoch < delta)
298 * This callback is executed during stop_machine(). All CPUs are therefore
299 * temporarily stopped. In order not to change guest behavior, we have to
300 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
301 * so a CPU won't be stopped while calculating with the epoch.
303 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
307 struct kvm_vcpu *vcpu;
309 unsigned long long *delta = v;
311 list_for_each_entry(kvm, &vm_list, vm_list) {
312 kvm_for_each_vcpu(i, vcpu, kvm) {
313 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
315 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
316 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
318 if (vcpu->arch.cputm_enabled)
319 vcpu->arch.cputm_start += *delta;
320 if (vcpu->arch.vsie_block)
321 kvm_clock_sync_scb(vcpu->arch.vsie_block,
328 static struct notifier_block kvm_clock_notifier = {
329 .notifier_call = kvm_clock_sync,
332 int kvm_arch_hardware_setup(void *opaque)
334 gmap_notifier.notifier_call = kvm_gmap_notifier;
335 gmap_register_pte_notifier(&gmap_notifier);
336 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
337 gmap_register_pte_notifier(&vsie_gmap_notifier);
338 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
339 &kvm_clock_notifier);
343 void kvm_arch_hardware_unsetup(void)
345 gmap_unregister_pte_notifier(&gmap_notifier);
346 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
347 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
348 &kvm_clock_notifier);
351 static void allow_cpu_feat(unsigned long nr)
353 set_bit_inv(nr, kvm_s390_available_cpu_feat);
356 static inline int plo_test_bit(unsigned char nr)
358 unsigned long function = (unsigned long)nr | 0x100;
362 " lgr 0,%[function]\n"
363 /* Parameter registers are ignored for "test bit" */
368 : [function] "d" (function)
373 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
378 /* Parameter registers are ignored */
379 " .insn rrf,%[opc] << 16,2,4,6,0\n"
381 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
382 : "cc", "memory", "0", "1");
385 #define INSN_SORTL 0xb938
386 #define INSN_DFLTCC 0xb939
388 static void kvm_s390_cpu_feat_init(void)
392 for (i = 0; i < 256; ++i) {
394 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
397 if (test_facility(28)) /* TOD-clock steering */
398 ptff(kvm_s390_available_subfunc.ptff,
399 sizeof(kvm_s390_available_subfunc.ptff),
402 if (test_facility(17)) { /* MSA */
403 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
404 kvm_s390_available_subfunc.kmac);
405 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
406 kvm_s390_available_subfunc.kmc);
407 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
408 kvm_s390_available_subfunc.km);
409 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
410 kvm_s390_available_subfunc.kimd);
411 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
412 kvm_s390_available_subfunc.klmd);
414 if (test_facility(76)) /* MSA3 */
415 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
416 kvm_s390_available_subfunc.pckmo);
417 if (test_facility(77)) { /* MSA4 */
418 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
419 kvm_s390_available_subfunc.kmctr);
420 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
421 kvm_s390_available_subfunc.kmf);
422 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
423 kvm_s390_available_subfunc.kmo);
424 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
425 kvm_s390_available_subfunc.pcc);
427 if (test_facility(57)) /* MSA5 */
428 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
429 kvm_s390_available_subfunc.ppno);
431 if (test_facility(146)) /* MSA8 */
432 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
433 kvm_s390_available_subfunc.kma);
435 if (test_facility(155)) /* MSA9 */
436 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
437 kvm_s390_available_subfunc.kdsa);
439 if (test_facility(150)) /* SORTL */
440 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
442 if (test_facility(151)) /* DFLTCC */
443 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
445 if (MACHINE_HAS_ESOP)
446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
448 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
449 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
451 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
452 !test_facility(3) || !nested)
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
455 if (sclp.has_64bscao)
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
462 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
464 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
466 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
468 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
470 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
472 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
473 * all skey handling functions read/set the skey from the PGSTE
474 * instead of the real storage key.
476 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
477 * pages being detected as preserved although they are resident.
479 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
480 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
482 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
483 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
484 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
486 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
487 * cannot easily shadow the SCA because of the ipte lock.
491 int kvm_arch_init(void *opaque)
495 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
499 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
500 if (!kvm_s390_dbf_uv)
503 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
504 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
507 kvm_s390_cpu_feat_init();
509 /* Register floating interrupt controller interface. */
510 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
512 pr_err("A FLIC registration call failed with rc=%d\n", rc);
516 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
517 rc = kvm_s390_pci_init();
519 pr_err("Unable to allocate AIFT for PCI\n");
524 rc = kvm_s390_gib_init(GAL_ISC);
535 void kvm_arch_exit(void)
537 kvm_s390_gib_destroy();
538 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
540 debug_unregister(kvm_s390_dbf);
541 debug_unregister(kvm_s390_dbf_uv);
544 /* Section: device related */
545 long kvm_arch_dev_ioctl(struct file *filp,
546 unsigned int ioctl, unsigned long arg)
548 if (ioctl == KVM_S390_ENABLE_SIE)
549 return s390_enable_sie();
553 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
558 case KVM_CAP_S390_PSW:
559 case KVM_CAP_S390_GMAP:
560 case KVM_CAP_SYNC_MMU:
561 #ifdef CONFIG_KVM_S390_UCONTROL
562 case KVM_CAP_S390_UCONTROL:
564 case KVM_CAP_ASYNC_PF:
565 case KVM_CAP_SYNC_REGS:
566 case KVM_CAP_ONE_REG:
567 case KVM_CAP_ENABLE_CAP:
568 case KVM_CAP_S390_CSS_SUPPORT:
569 case KVM_CAP_IOEVENTFD:
570 case KVM_CAP_DEVICE_CTRL:
571 case KVM_CAP_S390_IRQCHIP:
572 case KVM_CAP_VM_ATTRIBUTES:
573 case KVM_CAP_MP_STATE:
574 case KVM_CAP_IMMEDIATE_EXIT:
575 case KVM_CAP_S390_INJECT_IRQ:
576 case KVM_CAP_S390_USER_SIGP:
577 case KVM_CAP_S390_USER_STSI:
578 case KVM_CAP_S390_SKEYS:
579 case KVM_CAP_S390_IRQ_STATE:
580 case KVM_CAP_S390_USER_INSTR0:
581 case KVM_CAP_S390_CMMA_MIGRATION:
582 case KVM_CAP_S390_AIS:
583 case KVM_CAP_S390_AIS_MIGRATION:
584 case KVM_CAP_S390_VCPU_RESETS:
585 case KVM_CAP_SET_GUEST_DEBUG:
586 case KVM_CAP_S390_DIAG318:
587 case KVM_CAP_S390_MEM_OP_EXTENSION:
590 case KVM_CAP_SET_GUEST_DEBUG2:
591 r = KVM_GUESTDBG_VALID_MASK;
593 case KVM_CAP_S390_HPAGE_1M:
595 if (hpage && !kvm_is_ucontrol(kvm))
598 case KVM_CAP_S390_MEM_OP:
601 case KVM_CAP_NR_VCPUS:
602 case KVM_CAP_MAX_VCPUS:
603 case KVM_CAP_MAX_VCPU_ID:
604 r = KVM_S390_BSCA_CPU_SLOTS;
605 if (!kvm_s390_use_sca_entries())
607 else if (sclp.has_esca && sclp.has_64bscao)
608 r = KVM_S390_ESCA_CPU_SLOTS;
609 if (ext == KVM_CAP_NR_VCPUS)
610 r = min_t(unsigned int, num_online_cpus(), r);
612 case KVM_CAP_S390_COW:
613 r = MACHINE_HAS_ESOP;
615 case KVM_CAP_S390_VECTOR_REGISTERS:
618 case KVM_CAP_S390_RI:
619 r = test_facility(64);
621 case KVM_CAP_S390_GS:
622 r = test_facility(133);
624 case KVM_CAP_S390_BPB:
625 r = test_facility(82);
627 case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
628 r = async_destroy && is_prot_virt_host();
630 case KVM_CAP_S390_PROTECTED:
631 r = is_prot_virt_host();
633 case KVM_CAP_S390_PROTECTED_DUMP: {
634 u64 pv_cmds_dump[] = {
635 BIT_UVC_CMD_DUMP_INIT,
636 BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
637 BIT_UVC_CMD_DUMP_CPU,
638 BIT_UVC_CMD_DUMP_COMPLETE,
642 r = is_prot_virt_host();
644 for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
645 if (!test_bit_inv(pv_cmds_dump[i],
646 (unsigned long *)&uv_info.inst_calls_list)) {
653 case KVM_CAP_S390_ZPCI_OP:
654 r = kvm_s390_pci_interp_allowed();
656 case KVM_CAP_S390_CPU_TOPOLOGY:
657 r = test_facility(11);
665 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
668 gfn_t cur_gfn, last_gfn;
669 unsigned long gaddr, vmaddr;
670 struct gmap *gmap = kvm->arch.gmap;
671 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
673 /* Loop over all guest segments */
674 cur_gfn = memslot->base_gfn;
675 last_gfn = memslot->base_gfn + memslot->npages;
676 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
677 gaddr = gfn_to_gpa(cur_gfn);
678 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
679 if (kvm_is_error_hva(vmaddr))
682 bitmap_zero(bitmap, _PAGE_ENTRIES);
683 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
684 for (i = 0; i < _PAGE_ENTRIES; i++) {
685 if (test_bit(i, bitmap))
686 mark_page_dirty(kvm, cur_gfn + i);
689 if (fatal_signal_pending(current))
695 /* Section: vm related */
696 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
699 * Get (and clear) the dirty memory log for a memory slot.
701 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
702 struct kvm_dirty_log *log)
706 struct kvm_memory_slot *memslot;
709 if (kvm_is_ucontrol(kvm))
712 mutex_lock(&kvm->slots_lock);
715 if (log->slot >= KVM_USER_MEM_SLOTS)
718 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
722 /* Clear the dirty log */
724 n = kvm_dirty_bitmap_bytes(memslot);
725 memset(memslot->dirty_bitmap, 0, n);
729 mutex_unlock(&kvm->slots_lock);
733 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
736 struct kvm_vcpu *vcpu;
738 kvm_for_each_vcpu(i, vcpu, kvm) {
739 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
743 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
751 case KVM_CAP_S390_IRQCHIP:
752 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
753 kvm->arch.use_irqchip = 1;
756 case KVM_CAP_S390_USER_SIGP:
757 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
758 kvm->arch.user_sigp = 1;
761 case KVM_CAP_S390_VECTOR_REGISTERS:
762 mutex_lock(&kvm->lock);
763 if (kvm->created_vcpus) {
765 } else if (MACHINE_HAS_VX) {
766 set_kvm_facility(kvm->arch.model.fac_mask, 129);
767 set_kvm_facility(kvm->arch.model.fac_list, 129);
768 if (test_facility(134)) {
769 set_kvm_facility(kvm->arch.model.fac_mask, 134);
770 set_kvm_facility(kvm->arch.model.fac_list, 134);
772 if (test_facility(135)) {
773 set_kvm_facility(kvm->arch.model.fac_mask, 135);
774 set_kvm_facility(kvm->arch.model.fac_list, 135);
776 if (test_facility(148)) {
777 set_kvm_facility(kvm->arch.model.fac_mask, 148);
778 set_kvm_facility(kvm->arch.model.fac_list, 148);
780 if (test_facility(152)) {
781 set_kvm_facility(kvm->arch.model.fac_mask, 152);
782 set_kvm_facility(kvm->arch.model.fac_list, 152);
784 if (test_facility(192)) {
785 set_kvm_facility(kvm->arch.model.fac_mask, 192);
786 set_kvm_facility(kvm->arch.model.fac_list, 192);
791 mutex_unlock(&kvm->lock);
792 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
793 r ? "(not available)" : "(success)");
795 case KVM_CAP_S390_RI:
797 mutex_lock(&kvm->lock);
798 if (kvm->created_vcpus) {
800 } else if (test_facility(64)) {
801 set_kvm_facility(kvm->arch.model.fac_mask, 64);
802 set_kvm_facility(kvm->arch.model.fac_list, 64);
805 mutex_unlock(&kvm->lock);
806 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
807 r ? "(not available)" : "(success)");
809 case KVM_CAP_S390_AIS:
810 mutex_lock(&kvm->lock);
811 if (kvm->created_vcpus) {
814 set_kvm_facility(kvm->arch.model.fac_mask, 72);
815 set_kvm_facility(kvm->arch.model.fac_list, 72);
818 mutex_unlock(&kvm->lock);
819 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
820 r ? "(not available)" : "(success)");
822 case KVM_CAP_S390_GS:
824 mutex_lock(&kvm->lock);
825 if (kvm->created_vcpus) {
827 } else if (test_facility(133)) {
828 set_kvm_facility(kvm->arch.model.fac_mask, 133);
829 set_kvm_facility(kvm->arch.model.fac_list, 133);
832 mutex_unlock(&kvm->lock);
833 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
834 r ? "(not available)" : "(success)");
836 case KVM_CAP_S390_HPAGE_1M:
837 mutex_lock(&kvm->lock);
838 if (kvm->created_vcpus)
840 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
844 mmap_write_lock(kvm->mm);
845 kvm->mm->context.allow_gmap_hpage_1m = 1;
846 mmap_write_unlock(kvm->mm);
848 * We might have to create fake 4k page
849 * tables. To avoid that the hardware works on
850 * stale PGSTEs, we emulate these instructions.
852 kvm->arch.use_skf = 0;
853 kvm->arch.use_pfmfi = 0;
855 mutex_unlock(&kvm->lock);
856 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
857 r ? "(not available)" : "(success)");
859 case KVM_CAP_S390_USER_STSI:
860 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
861 kvm->arch.user_stsi = 1;
864 case KVM_CAP_S390_USER_INSTR0:
865 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
866 kvm->arch.user_instr0 = 1;
867 icpt_operexc_on_all_vcpus(kvm);
870 case KVM_CAP_S390_CPU_TOPOLOGY:
872 mutex_lock(&kvm->lock);
873 if (kvm->created_vcpus) {
875 } else if (test_facility(11)) {
876 set_kvm_facility(kvm->arch.model.fac_mask, 11);
877 set_kvm_facility(kvm->arch.model.fac_list, 11);
880 mutex_unlock(&kvm->lock);
881 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
882 r ? "(not available)" : "(success)");
891 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
895 switch (attr->attr) {
896 case KVM_S390_VM_MEM_LIMIT_SIZE:
898 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
899 kvm->arch.mem_limit);
900 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
910 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
914 switch (attr->attr) {
915 case KVM_S390_VM_MEM_ENABLE_CMMA:
920 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
921 mutex_lock(&kvm->lock);
922 if (kvm->created_vcpus)
924 else if (kvm->mm->context.allow_gmap_hpage_1m)
927 kvm->arch.use_cmma = 1;
928 /* Not compatible with cmma. */
929 kvm->arch.use_pfmfi = 0;
932 mutex_unlock(&kvm->lock);
934 case KVM_S390_VM_MEM_CLR_CMMA:
939 if (!kvm->arch.use_cmma)
942 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
943 mutex_lock(&kvm->lock);
944 idx = srcu_read_lock(&kvm->srcu);
945 s390_reset_cmma(kvm->arch.gmap->mm);
946 srcu_read_unlock(&kvm->srcu, idx);
947 mutex_unlock(&kvm->lock);
950 case KVM_S390_VM_MEM_LIMIT_SIZE: {
951 unsigned long new_limit;
953 if (kvm_is_ucontrol(kvm))
956 if (get_user(new_limit, (u64 __user *)attr->addr))
959 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
960 new_limit > kvm->arch.mem_limit)
966 /* gmap_create takes last usable address */
967 if (new_limit != KVM_S390_NO_MEM_LIMIT)
971 mutex_lock(&kvm->lock);
972 if (!kvm->created_vcpus) {
973 /* gmap_create will round the limit up */
974 struct gmap *new = gmap_create(current->mm, new_limit);
979 gmap_remove(kvm->arch.gmap);
981 kvm->arch.gmap = new;
985 mutex_unlock(&kvm->lock);
986 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
987 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
988 (void *) kvm->arch.gmap->asce);
998 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
1000 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
1002 struct kvm_vcpu *vcpu;
1005 kvm_s390_vcpu_block_all(kvm);
1007 kvm_for_each_vcpu(i, vcpu, kvm) {
1008 kvm_s390_vcpu_crypto_setup(vcpu);
1009 /* recreate the shadow crycb by leaving the VSIE handler */
1010 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1013 kvm_s390_vcpu_unblock_all(kvm);
1016 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1018 mutex_lock(&kvm->lock);
1019 switch (attr->attr) {
1020 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1021 if (!test_kvm_facility(kvm, 76)) {
1022 mutex_unlock(&kvm->lock);
1026 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1027 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1028 kvm->arch.crypto.aes_kw = 1;
1029 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1031 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1032 if (!test_kvm_facility(kvm, 76)) {
1033 mutex_unlock(&kvm->lock);
1037 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1038 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1039 kvm->arch.crypto.dea_kw = 1;
1040 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1042 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1043 if (!test_kvm_facility(kvm, 76)) {
1044 mutex_unlock(&kvm->lock);
1047 kvm->arch.crypto.aes_kw = 0;
1048 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1049 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1050 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1052 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1053 if (!test_kvm_facility(kvm, 76)) {
1054 mutex_unlock(&kvm->lock);
1057 kvm->arch.crypto.dea_kw = 0;
1058 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1059 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1060 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1062 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1063 if (!ap_instructions_available()) {
1064 mutex_unlock(&kvm->lock);
1067 kvm->arch.crypto.apie = 1;
1069 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1070 if (!ap_instructions_available()) {
1071 mutex_unlock(&kvm->lock);
1074 kvm->arch.crypto.apie = 0;
1077 mutex_unlock(&kvm->lock);
1081 kvm_s390_vcpu_crypto_reset_all(kvm);
1082 mutex_unlock(&kvm->lock);
1086 static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1088 /* Only set the ECB bits after guest requests zPCI interpretation */
1089 if (!vcpu->kvm->arch.use_zpci_interp)
1092 vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1093 vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1096 void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1098 struct kvm_vcpu *vcpu;
1101 lockdep_assert_held(&kvm->lock);
1103 if (!kvm_s390_pci_interp_allowed())
1107 * If host is configured for PCI and the necessary facilities are
1108 * available, turn on interpretation for the life of this guest
1110 kvm->arch.use_zpci_interp = 1;
1112 kvm_s390_vcpu_block_all(kvm);
1114 kvm_for_each_vcpu(i, vcpu, kvm) {
1115 kvm_s390_vcpu_pci_setup(vcpu);
1116 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1119 kvm_s390_vcpu_unblock_all(kvm);
1122 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1125 struct kvm_vcpu *vcpu;
1127 kvm_for_each_vcpu(cx, vcpu, kvm)
1128 kvm_s390_sync_request(req, vcpu);
1132 * Must be called with kvm->srcu held to avoid races on memslots, and with
1133 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1135 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1137 struct kvm_memory_slot *ms;
1138 struct kvm_memslots *slots;
1139 unsigned long ram_pages = 0;
1142 /* migration mode already enabled */
1143 if (kvm->arch.migration_mode)
1145 slots = kvm_memslots(kvm);
1146 if (!slots || kvm_memslots_empty(slots))
1149 if (!kvm->arch.use_cmma) {
1150 kvm->arch.migration_mode = 1;
1153 /* mark all the pages in active slots as dirty */
1154 kvm_for_each_memslot(ms, bkt, slots) {
1155 if (!ms->dirty_bitmap)
1158 * The second half of the bitmap is only used on x86,
1159 * and would be wasted otherwise, so we put it to good
1160 * use here to keep track of the state of the storage
1163 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1164 ram_pages += ms->npages;
1166 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1167 kvm->arch.migration_mode = 1;
1168 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1173 * Must be called with kvm->slots_lock to avoid races with ourselves and
1174 * kvm_s390_vm_start_migration.
1176 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1178 /* migration mode already disabled */
1179 if (!kvm->arch.migration_mode)
1181 kvm->arch.migration_mode = 0;
1182 if (kvm->arch.use_cmma)
1183 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1187 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1188 struct kvm_device_attr *attr)
1192 mutex_lock(&kvm->slots_lock);
1193 switch (attr->attr) {
1194 case KVM_S390_VM_MIGRATION_START:
1195 res = kvm_s390_vm_start_migration(kvm);
1197 case KVM_S390_VM_MIGRATION_STOP:
1198 res = kvm_s390_vm_stop_migration(kvm);
1203 mutex_unlock(&kvm->slots_lock);
1208 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1209 struct kvm_device_attr *attr)
1211 u64 mig = kvm->arch.migration_mode;
1213 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1216 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1221 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1223 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1225 struct kvm_s390_vm_tod_clock gtod;
1227 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1230 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1232 __kvm_s390_set_tod_clock(kvm, >od);
1234 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1235 gtod.epoch_idx, gtod.tod);
1240 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1244 if (copy_from_user(>od_high, (void __user *)attr->addr,
1250 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1255 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1257 struct kvm_s390_vm_tod_clock gtod = { 0 };
1259 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1263 __kvm_s390_set_tod_clock(kvm, >od);
1264 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1268 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1275 mutex_lock(&kvm->lock);
1277 * For protected guests, the TOD is managed by the ultravisor, so trying
1278 * to change it will never bring the expected results.
1280 if (kvm_s390_pv_is_protected(kvm)) {
1285 switch (attr->attr) {
1286 case KVM_S390_VM_TOD_EXT:
1287 ret = kvm_s390_set_tod_ext(kvm, attr);
1289 case KVM_S390_VM_TOD_HIGH:
1290 ret = kvm_s390_set_tod_high(kvm, attr);
1292 case KVM_S390_VM_TOD_LOW:
1293 ret = kvm_s390_set_tod_low(kvm, attr);
1301 mutex_unlock(&kvm->lock);
1305 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1306 struct kvm_s390_vm_tod_clock *gtod)
1308 union tod_clock clk;
1312 store_tod_clock_ext(&clk);
1314 gtod->tod = clk.tod + kvm->arch.epoch;
1315 gtod->epoch_idx = 0;
1316 if (test_kvm_facility(kvm, 139)) {
1317 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1318 if (gtod->tod < clk.tod)
1319 gtod->epoch_idx += 1;
1325 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1327 struct kvm_s390_vm_tod_clock gtod;
1329 memset(>od, 0, sizeof(gtod));
1330 kvm_s390_get_tod_clock(kvm, >od);
1331 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1334 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1335 gtod.epoch_idx, gtod.tod);
1339 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1343 if (copy_to_user((void __user *)attr->addr, >od_high,
1346 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1351 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1355 gtod = kvm_s390_get_tod_clock_fast(kvm);
1356 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1358 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1363 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1370 switch (attr->attr) {
1371 case KVM_S390_VM_TOD_EXT:
1372 ret = kvm_s390_get_tod_ext(kvm, attr);
1374 case KVM_S390_VM_TOD_HIGH:
1375 ret = kvm_s390_get_tod_high(kvm, attr);
1377 case KVM_S390_VM_TOD_LOW:
1378 ret = kvm_s390_get_tod_low(kvm, attr);
1387 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1389 struct kvm_s390_vm_cpu_processor *proc;
1390 u16 lowest_ibc, unblocked_ibc;
1393 mutex_lock(&kvm->lock);
1394 if (kvm->created_vcpus) {
1398 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1403 if (!copy_from_user(proc, (void __user *)attr->addr,
1405 kvm->arch.model.cpuid = proc->cpuid;
1406 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1407 unblocked_ibc = sclp.ibc & 0xfff;
1408 if (lowest_ibc && proc->ibc) {
1409 if (proc->ibc > unblocked_ibc)
1410 kvm->arch.model.ibc = unblocked_ibc;
1411 else if (proc->ibc < lowest_ibc)
1412 kvm->arch.model.ibc = lowest_ibc;
1414 kvm->arch.model.ibc = proc->ibc;
1416 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1417 S390_ARCH_FAC_LIST_SIZE_BYTE);
1418 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1419 kvm->arch.model.ibc,
1420 kvm->arch.model.cpuid);
1421 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1422 kvm->arch.model.fac_list[0],
1423 kvm->arch.model.fac_list[1],
1424 kvm->arch.model.fac_list[2]);
1429 mutex_unlock(&kvm->lock);
1433 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1434 struct kvm_device_attr *attr)
1436 struct kvm_s390_vm_cpu_feat data;
1438 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1440 if (!bitmap_subset((unsigned long *) data.feat,
1441 kvm_s390_available_cpu_feat,
1442 KVM_S390_VM_CPU_FEAT_NR_BITS))
1445 mutex_lock(&kvm->lock);
1446 if (kvm->created_vcpus) {
1447 mutex_unlock(&kvm->lock);
1450 bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1451 mutex_unlock(&kvm->lock);
1452 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1459 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1460 struct kvm_device_attr *attr)
1462 mutex_lock(&kvm->lock);
1463 if (kvm->created_vcpus) {
1464 mutex_unlock(&kvm->lock);
1468 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1469 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1470 mutex_unlock(&kvm->lock);
1473 mutex_unlock(&kvm->lock);
1475 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1476 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1477 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1478 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1479 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1480 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1481 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1482 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1483 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1484 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1485 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1486 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1487 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1488 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1489 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1490 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1491 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1492 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1493 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1494 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1495 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1496 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1497 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1498 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1499 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1500 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1501 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1502 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1503 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1504 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1505 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1506 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1507 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1508 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1509 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1510 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1511 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1512 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1513 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1514 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1515 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1516 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1517 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1518 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1519 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1520 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1521 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1522 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1523 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1524 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1525 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1526 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1527 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1528 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1529 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1530 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1531 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1536 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1540 switch (attr->attr) {
1541 case KVM_S390_VM_CPU_PROCESSOR:
1542 ret = kvm_s390_set_processor(kvm, attr);
1544 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1545 ret = kvm_s390_set_processor_feat(kvm, attr);
1547 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1548 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1554 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1556 struct kvm_s390_vm_cpu_processor *proc;
1559 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1564 proc->cpuid = kvm->arch.model.cpuid;
1565 proc->ibc = kvm->arch.model.ibc;
1566 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1567 S390_ARCH_FAC_LIST_SIZE_BYTE);
1568 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1569 kvm->arch.model.ibc,
1570 kvm->arch.model.cpuid);
1571 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1572 kvm->arch.model.fac_list[0],
1573 kvm->arch.model.fac_list[1],
1574 kvm->arch.model.fac_list[2]);
1575 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1582 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1584 struct kvm_s390_vm_cpu_machine *mach;
1587 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1592 get_cpu_id((struct cpuid *) &mach->cpuid);
1593 mach->ibc = sclp.ibc;
1594 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1595 S390_ARCH_FAC_LIST_SIZE_BYTE);
1596 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1597 sizeof(stfle_fac_list));
1598 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1599 kvm->arch.model.ibc,
1600 kvm->arch.model.cpuid);
1601 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1605 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1609 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1616 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1617 struct kvm_device_attr *attr)
1619 struct kvm_s390_vm_cpu_feat data;
1621 bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1622 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1624 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1631 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1632 struct kvm_device_attr *attr)
1634 struct kvm_s390_vm_cpu_feat data;
1636 bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1637 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1639 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1646 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1647 struct kvm_device_attr *attr)
1649 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1650 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1653 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1654 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1655 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1656 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1657 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1658 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1659 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1660 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1661 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1662 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1663 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1664 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1665 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1666 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1667 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1668 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1669 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1670 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1671 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1672 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1673 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1674 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1675 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1676 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1677 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1678 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1679 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1680 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1681 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1682 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1683 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1684 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1685 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1686 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1687 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1688 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1689 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1690 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1691 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1692 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1693 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1694 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1695 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1696 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1697 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1698 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1699 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1700 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1701 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1702 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1703 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1704 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1705 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1706 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1707 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1708 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1709 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1714 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1715 struct kvm_device_attr *attr)
1717 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1718 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1721 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1722 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1723 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1724 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1725 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1726 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1727 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1728 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1729 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1730 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1731 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1732 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1733 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1734 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1735 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1736 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1737 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1738 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1739 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1740 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1741 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1742 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1743 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1744 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1745 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1746 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1747 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1748 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1749 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1750 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1751 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1752 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1753 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1754 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1755 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1756 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1757 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1758 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1759 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1760 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1761 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1762 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1763 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1764 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1765 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1766 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1767 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1768 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1769 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1770 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1771 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1772 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1773 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1774 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1775 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1776 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1777 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1782 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1786 switch (attr->attr) {
1787 case KVM_S390_VM_CPU_PROCESSOR:
1788 ret = kvm_s390_get_processor(kvm, attr);
1790 case KVM_S390_VM_CPU_MACHINE:
1791 ret = kvm_s390_get_machine(kvm, attr);
1793 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1794 ret = kvm_s390_get_processor_feat(kvm, attr);
1796 case KVM_S390_VM_CPU_MACHINE_FEAT:
1797 ret = kvm_s390_get_machine_feat(kvm, attr);
1799 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1800 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1802 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1803 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1810 * kvm_s390_update_topology_change_report - update CPU topology change report
1811 * @kvm: guest KVM description
1812 * @val: set or clear the MTCR bit
1814 * Updates the Multiprocessor Topology-Change-Report bit to signal
1815 * the guest with a topology change.
1816 * This is only relevant if the topology facility is present.
1818 * The SCA version, bsca or esca, doesn't matter as offset is the same.
1820 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1822 union sca_utility new, old;
1823 struct bsca_block *sca;
1825 read_lock(&kvm->arch.sca_lock);
1826 sca = kvm->arch.sca;
1828 old = READ_ONCE(sca->utility);
1831 } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
1832 read_unlock(&kvm->arch.sca_lock);
1835 static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1836 struct kvm_device_attr *attr)
1838 if (!test_kvm_facility(kvm, 11))
1841 kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1845 static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1846 struct kvm_device_attr *attr)
1850 if (!test_kvm_facility(kvm, 11))
1853 read_lock(&kvm->arch.sca_lock);
1854 topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1855 read_unlock(&kvm->arch.sca_lock);
1857 return put_user(topo, (u8 __user *)attr->addr);
1860 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1864 switch (attr->group) {
1865 case KVM_S390_VM_MEM_CTRL:
1866 ret = kvm_s390_set_mem_control(kvm, attr);
1868 case KVM_S390_VM_TOD:
1869 ret = kvm_s390_set_tod(kvm, attr);
1871 case KVM_S390_VM_CPU_MODEL:
1872 ret = kvm_s390_set_cpu_model(kvm, attr);
1874 case KVM_S390_VM_CRYPTO:
1875 ret = kvm_s390_vm_set_crypto(kvm, attr);
1877 case KVM_S390_VM_MIGRATION:
1878 ret = kvm_s390_vm_set_migration(kvm, attr);
1880 case KVM_S390_VM_CPU_TOPOLOGY:
1881 ret = kvm_s390_set_topo_change_indication(kvm, attr);
1891 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1895 switch (attr->group) {
1896 case KVM_S390_VM_MEM_CTRL:
1897 ret = kvm_s390_get_mem_control(kvm, attr);
1899 case KVM_S390_VM_TOD:
1900 ret = kvm_s390_get_tod(kvm, attr);
1902 case KVM_S390_VM_CPU_MODEL:
1903 ret = kvm_s390_get_cpu_model(kvm, attr);
1905 case KVM_S390_VM_MIGRATION:
1906 ret = kvm_s390_vm_get_migration(kvm, attr);
1908 case KVM_S390_VM_CPU_TOPOLOGY:
1909 ret = kvm_s390_get_topo_change_indication(kvm, attr);
1919 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1923 switch (attr->group) {
1924 case KVM_S390_VM_MEM_CTRL:
1925 switch (attr->attr) {
1926 case KVM_S390_VM_MEM_ENABLE_CMMA:
1927 case KVM_S390_VM_MEM_CLR_CMMA:
1928 ret = sclp.has_cmma ? 0 : -ENXIO;
1930 case KVM_S390_VM_MEM_LIMIT_SIZE:
1938 case KVM_S390_VM_TOD:
1939 switch (attr->attr) {
1940 case KVM_S390_VM_TOD_LOW:
1941 case KVM_S390_VM_TOD_HIGH:
1949 case KVM_S390_VM_CPU_MODEL:
1950 switch (attr->attr) {
1951 case KVM_S390_VM_CPU_PROCESSOR:
1952 case KVM_S390_VM_CPU_MACHINE:
1953 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1954 case KVM_S390_VM_CPU_MACHINE_FEAT:
1955 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1956 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1964 case KVM_S390_VM_CRYPTO:
1965 switch (attr->attr) {
1966 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1967 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1968 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1969 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1972 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1973 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1974 ret = ap_instructions_available() ? 0 : -ENXIO;
1981 case KVM_S390_VM_MIGRATION:
1984 case KVM_S390_VM_CPU_TOPOLOGY:
1985 ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
1995 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1999 int srcu_idx, i, r = 0;
2001 if (args->flags != 0)
2004 /* Is this guest using storage keys? */
2005 if (!mm_uses_skeys(current->mm))
2006 return KVM_S390_GET_SKEYS_NONE;
2008 /* Enforce sane limit on memory allocation */
2009 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2012 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2016 mmap_read_lock(current->mm);
2017 srcu_idx = srcu_read_lock(&kvm->srcu);
2018 for (i = 0; i < args->count; i++) {
2019 hva = gfn_to_hva(kvm, args->start_gfn + i);
2020 if (kvm_is_error_hva(hva)) {
2025 r = get_guest_storage_key(current->mm, hva, &keys[i]);
2029 srcu_read_unlock(&kvm->srcu, srcu_idx);
2030 mmap_read_unlock(current->mm);
2033 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2034 sizeof(uint8_t) * args->count);
2043 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2047 int srcu_idx, i, r = 0;
2050 if (args->flags != 0)
2053 /* Enforce sane limit on memory allocation */
2054 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2057 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2061 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2062 sizeof(uint8_t) * args->count);
2068 /* Enable storage key handling for the guest */
2069 r = s390_enable_skey();
2074 mmap_read_lock(current->mm);
2075 srcu_idx = srcu_read_lock(&kvm->srcu);
2076 while (i < args->count) {
2078 hva = gfn_to_hva(kvm, args->start_gfn + i);
2079 if (kvm_is_error_hva(hva)) {
2084 /* Lowest order bit is reserved */
2085 if (keys[i] & 0x01) {
2090 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2092 r = fixup_user_fault(current->mm, hva,
2093 FAULT_FLAG_WRITE, &unlocked);
2100 srcu_read_unlock(&kvm->srcu, srcu_idx);
2101 mmap_read_unlock(current->mm);
2108 * Base address and length must be sent at the start of each block, therefore
2109 * it's cheaper to send some clean data, as long as it's less than the size of
2112 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2113 /* for consistency */
2114 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2116 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2117 u8 *res, unsigned long bufsize)
2119 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2122 while (args->count < bufsize) {
2123 hva = gfn_to_hva(kvm, cur_gfn);
2125 * We return an error if the first value was invalid, but we
2126 * return successfully if at least one value was copied.
2128 if (kvm_is_error_hva(hva))
2129 return args->count ? 0 : -EFAULT;
2130 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2132 res[args->count++] = (pgstev >> 24) & 0x43;
2139 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2142 return ____gfn_to_memslot(slots, gfn, true);
2145 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2146 unsigned long cur_gfn)
2148 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2149 unsigned long ofs = cur_gfn - ms->base_gfn;
2150 struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2152 if (ms->base_gfn + ms->npages <= cur_gfn) {
2153 mnode = rb_next(mnode);
2154 /* If we are above the highest slot, wrap around */
2156 mnode = rb_first(&slots->gfn_tree);
2158 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2161 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2162 while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2163 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2164 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2166 return ms->base_gfn + ofs;
2169 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2170 u8 *res, unsigned long bufsize)
2172 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2173 struct kvm_memslots *slots = kvm_memslots(kvm);
2174 struct kvm_memory_slot *ms;
2176 if (unlikely(kvm_memslots_empty(slots)))
2179 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2180 ms = gfn_to_memslot(kvm, cur_gfn);
2182 args->start_gfn = cur_gfn;
2185 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2186 mem_end = kvm_s390_get_gfn_end(slots);
2188 while (args->count < bufsize) {
2189 hva = gfn_to_hva(kvm, cur_gfn);
2190 if (kvm_is_error_hva(hva))
2192 /* Decrement only if we actually flipped the bit to 0 */
2193 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2194 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2195 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2197 /* Save the value */
2198 res[args->count++] = (pgstev >> 24) & 0x43;
2199 /* If the next bit is too far away, stop. */
2200 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2202 /* If we reached the previous "next", find the next one */
2203 if (cur_gfn == next_gfn)
2204 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2205 /* Reached the end of memory or of the buffer, stop */
2206 if ((next_gfn >= mem_end) ||
2207 (next_gfn - args->start_gfn >= bufsize))
2210 /* Reached the end of the current memslot, take the next one. */
2211 if (cur_gfn - ms->base_gfn >= ms->npages) {
2212 ms = gfn_to_memslot(kvm, cur_gfn);
2221 * This function searches for the next page with dirty CMMA attributes, and
2222 * saves the attributes in the buffer up to either the end of the buffer or
2223 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2224 * no trailing clean bytes are saved.
2225 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2226 * output buffer will indicate 0 as length.
2228 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2229 struct kvm_s390_cmma_log *args)
2231 unsigned long bufsize;
2232 int srcu_idx, peek, ret;
2235 if (!kvm->arch.use_cmma)
2237 /* Invalid/unsupported flags were specified */
2238 if (args->flags & ~KVM_S390_CMMA_PEEK)
2240 /* Migration mode query, and we are not doing a migration */
2241 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2242 if (!peek && !kvm->arch.migration_mode)
2244 /* CMMA is disabled or was not used, or the buffer has length zero */
2245 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2246 if (!bufsize || !kvm->mm->context.uses_cmm) {
2247 memset(args, 0, sizeof(*args));
2250 /* We are not peeking, and there are no dirty pages */
2251 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2252 memset(args, 0, sizeof(*args));
2256 values = vmalloc(bufsize);
2260 mmap_read_lock(kvm->mm);
2261 srcu_idx = srcu_read_lock(&kvm->srcu);
2263 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2265 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2266 srcu_read_unlock(&kvm->srcu, srcu_idx);
2267 mmap_read_unlock(kvm->mm);
2269 if (kvm->arch.migration_mode)
2270 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2272 args->remaining = 0;
2274 if (copy_to_user((void __user *)args->values, values, args->count))
2282 * This function sets the CMMA attributes for the given pages. If the input
2283 * buffer has zero length, no action is taken, otherwise the attributes are
2284 * set and the mm->context.uses_cmm flag is set.
2286 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2287 const struct kvm_s390_cmma_log *args)
2289 unsigned long hva, mask, pgstev, i;
2291 int srcu_idx, r = 0;
2295 if (!kvm->arch.use_cmma)
2297 /* invalid/unsupported flags */
2298 if (args->flags != 0)
2300 /* Enforce sane limit on memory allocation */
2301 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2304 if (args->count == 0)
2307 bits = vmalloc(array_size(sizeof(*bits), args->count));
2311 r = copy_from_user(bits, (void __user *)args->values, args->count);
2317 mmap_read_lock(kvm->mm);
2318 srcu_idx = srcu_read_lock(&kvm->srcu);
2319 for (i = 0; i < args->count; i++) {
2320 hva = gfn_to_hva(kvm, args->start_gfn + i);
2321 if (kvm_is_error_hva(hva)) {
2327 pgstev = pgstev << 24;
2328 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2329 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2331 srcu_read_unlock(&kvm->srcu, srcu_idx);
2332 mmap_read_unlock(kvm->mm);
2334 if (!kvm->mm->context.uses_cmm) {
2335 mmap_write_lock(kvm->mm);
2336 kvm->mm->context.uses_cmm = 1;
2337 mmap_write_unlock(kvm->mm);
2345 * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2347 * @kvm: the VM whose protected vCPUs are to be converted
2348 * @rc: return value for the RC field of the UVC (in case of error)
2349 * @rrc: return value for the RRC field of the UVC (in case of error)
2351 * Does not stop in case of error, tries to convert as many
2352 * CPUs as possible. In case of error, the RC and RRC of the last error are
2355 * Return: 0 in case of success, otherwise -EIO
2357 int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2359 struct kvm_vcpu *vcpu;
2365 * We ignore failures and try to destroy as many CPUs as possible.
2366 * At the same time we must not free the assigned resources when
2367 * this fails, as the ultravisor has still access to that memory.
2368 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2370 * We want to return the first failure rc and rrc, though.
2372 kvm_for_each_vcpu(i, vcpu, kvm) {
2373 mutex_lock(&vcpu->mutex);
2374 if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2379 mutex_unlock(&vcpu->mutex);
2381 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2383 kvm_s390_gisa_enable(kvm);
2388 * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2390 * @kvm: the VM whose protected vCPUs are to be converted
2391 * @rc: return value for the RC field of the UVC (in case of error)
2392 * @rrc: return value for the RRC field of the UVC (in case of error)
2394 * Tries to undo the conversion in case of error.
2396 * Return: 0 in case of success, otherwise -EIO
2398 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2404 struct kvm_vcpu *vcpu;
2406 /* Disable the GISA if the ultravisor does not support AIV. */
2407 if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2408 kvm_s390_gisa_disable(kvm);
2410 kvm_for_each_vcpu(i, vcpu, kvm) {
2411 mutex_lock(&vcpu->mutex);
2412 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2413 mutex_unlock(&vcpu->mutex);
2418 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2423 * Here we provide user space with a direct interface to query UV
2424 * related data like UV maxima and available features as well as
2425 * feature specific data.
2427 * To facilitate future extension of the data structures we'll try to
2428 * write data up to the maximum requested length.
2430 static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2434 switch (info->header.id) {
2435 case KVM_PV_INFO_VM: {
2436 len_min = sizeof(info->header) + sizeof(info->vm);
2438 if (info->header.len_max < len_min)
2441 memcpy(info->vm.inst_calls_list,
2442 uv_info.inst_calls_list,
2443 sizeof(uv_info.inst_calls_list));
2445 /* It's max cpuid not max cpus, so it's off by one */
2446 info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2447 info->vm.max_guests = uv_info.max_num_sec_conf;
2448 info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2449 info->vm.feature_indication = uv_info.uv_feature_indications;
2453 case KVM_PV_INFO_DUMP: {
2454 len_min = sizeof(info->header) + sizeof(info->dump);
2456 if (info->header.len_max < len_min)
2459 info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2460 info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2461 info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2469 static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2470 struct kvm_s390_pv_dmp dmp)
2473 void __user *result_buff = (void __user *)dmp.buff_addr;
2475 switch (dmp.subcmd) {
2476 case KVM_PV_DUMP_INIT: {
2477 if (kvm->arch.pv.dumping)
2481 * Block SIE entry as concurrent dump UVCs could lead
2484 kvm_s390_vcpu_block_all(kvm);
2486 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2487 UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2488 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2491 kvm->arch.pv.dumping = true;
2493 kvm_s390_vcpu_unblock_all(kvm);
2498 case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2499 if (!kvm->arch.pv.dumping)
2503 * gaddr is an output parameter since we might stop
2504 * early. As dmp will be copied back in our caller, we
2505 * don't need to do it ourselves.
2507 r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2508 &cmd->rc, &cmd->rrc);
2511 case KVM_PV_DUMP_COMPLETE: {
2512 if (!kvm->arch.pv.dumping)
2516 if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2519 r = kvm_s390_pv_dump_complete(kvm, result_buff,
2520 &cmd->rc, &cmd->rrc);
2531 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2533 const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
2534 void __user *argp = (void __user *)cmd->data;
2539 mutex_lock(&kvm->lock);
2542 case KVM_PV_ENABLE: {
2544 if (kvm_s390_pv_is_protected(kvm))
2548 * FMT 4 SIE needs esca. As we never switch back to bsca from
2549 * esca, we need no cleanup in the error cases below
2551 r = sca_switch_to_extended(kvm);
2555 mmap_write_lock(current->mm);
2556 r = gmap_mark_unmergeable();
2557 mmap_write_unlock(current->mm);
2561 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2565 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2567 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2569 /* we need to block service interrupts from now on */
2570 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2573 case KVM_PV_ASYNC_CLEANUP_PREPARE:
2575 if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
2578 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2580 * If a CPU could not be destroyed, destroy VM will also fail.
2581 * There is no point in trying to destroy it. Instead return
2582 * the rc and rrc from the first CPU that failed destroying.
2586 r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
2588 /* no need to block service interrupts any more */
2589 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2591 case KVM_PV_ASYNC_CLEANUP_PERFORM:
2595 /* kvm->lock must not be held; this is asserted inside the function. */
2596 r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
2598 case KVM_PV_DISABLE: {
2600 if (!kvm_s390_pv_is_protected(kvm))
2603 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2605 * If a CPU could not be destroyed, destroy VM will also fail.
2606 * There is no point in trying to destroy it. Instead return
2607 * the rc and rrc from the first CPU that failed destroying.
2611 r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
2613 /* no need to block service interrupts any more */
2614 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2617 case KVM_PV_SET_SEC_PARMS: {
2618 struct kvm_s390_pv_sec_parm parms = {};
2622 if (!kvm_s390_pv_is_protected(kvm))
2626 if (copy_from_user(&parms, argp, sizeof(parms)))
2629 /* Currently restricted to 8KB */
2631 if (parms.length > PAGE_SIZE * 2)
2635 hdr = vmalloc(parms.length);
2640 if (!copy_from_user(hdr, (void __user *)parms.origin,
2642 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2643 &cmd->rc, &cmd->rrc);
2648 case KVM_PV_UNPACK: {
2649 struct kvm_s390_pv_unp unp = {};
2652 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2656 if (copy_from_user(&unp, argp, sizeof(unp)))
2659 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2660 &cmd->rc, &cmd->rrc);
2663 case KVM_PV_VERIFY: {
2665 if (!kvm_s390_pv_is_protected(kvm))
2668 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2669 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2670 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2674 case KVM_PV_PREP_RESET: {
2676 if (!kvm_s390_pv_is_protected(kvm))
2679 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2680 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2681 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2685 case KVM_PV_UNSHARE_ALL: {
2687 if (!kvm_s390_pv_is_protected(kvm))
2690 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2691 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2692 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2697 struct kvm_s390_pv_info info = {};
2701 * No need to check the VM protection here.
2703 * Maybe user space wants to query some of the data
2704 * when the VM is still unprotected. If we see the
2705 * need to fence a new data command we can still
2706 * return an error in the info handler.
2710 if (copy_from_user(&info, argp, sizeof(info.header)))
2714 if (info.header.len_max < sizeof(info.header))
2717 data_len = kvm_s390_handle_pv_info(&info);
2723 * If a data command struct is extended (multiple
2724 * times) this can be used to determine how much of it
2727 info.header.len_written = data_len;
2730 if (copy_to_user(argp, &info, data_len))
2737 struct kvm_s390_pv_dmp dmp;
2740 if (!kvm_s390_pv_is_protected(kvm))
2744 if (copy_from_user(&dmp, argp, sizeof(dmp)))
2747 r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2751 if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2762 mutex_unlock(&kvm->lock);
2767 static bool access_key_invalid(u8 access_key)
2769 return access_key > 0xf;
2772 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2774 void __user *uaddr = (void __user *)mop->buf;
2775 u64 supported_flags;
2776 void *tmpbuf = NULL;
2779 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2780 | KVM_S390_MEMOP_F_CHECK_ONLY;
2781 if (mop->flags & ~supported_flags || !mop->size)
2783 if (mop->size > MEM_OP_MAX_SIZE)
2786 * This is technically a heuristic only, if the kvm->lock is not
2787 * taken, it is not guaranteed that the vm is/remains non-protected.
2788 * This is ok from a kernel perspective, wrongdoing is detected
2789 * on the access, -EFAULT is returned and the vm may crash the
2790 * next time it accesses the memory in question.
2791 * There is no sane usecase to do switching and a memop on two
2792 * different CPUs at the same time.
2794 if (kvm_s390_pv_get_handle(kvm))
2796 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2797 if (access_key_invalid(mop->key))
2802 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2803 tmpbuf = vmalloc(mop->size);
2808 srcu_idx = srcu_read_lock(&kvm->srcu);
2810 if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2816 case KVM_S390_MEMOP_ABSOLUTE_READ: {
2817 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2818 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2820 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2821 mop->size, GACC_FETCH, mop->key);
2823 if (copy_to_user(uaddr, tmpbuf, mop->size))
2829 case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2830 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2831 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2833 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2837 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2838 mop->size, GACC_STORE, mop->key);
2847 srcu_read_unlock(&kvm->srcu, srcu_idx);
2853 long kvm_arch_vm_ioctl(struct file *filp,
2854 unsigned int ioctl, unsigned long arg)
2856 struct kvm *kvm = filp->private_data;
2857 void __user *argp = (void __user *)arg;
2858 struct kvm_device_attr attr;
2862 case KVM_S390_INTERRUPT: {
2863 struct kvm_s390_interrupt s390int;
2866 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2868 r = kvm_s390_inject_vm(kvm, &s390int);
2871 case KVM_CREATE_IRQCHIP: {
2872 struct kvm_irq_routing_entry routing;
2875 if (kvm->arch.use_irqchip) {
2876 /* Set up dummy routing. */
2877 memset(&routing, 0, sizeof(routing));
2878 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2882 case KVM_SET_DEVICE_ATTR: {
2884 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2886 r = kvm_s390_vm_set_attr(kvm, &attr);
2889 case KVM_GET_DEVICE_ATTR: {
2891 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2893 r = kvm_s390_vm_get_attr(kvm, &attr);
2896 case KVM_HAS_DEVICE_ATTR: {
2898 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2900 r = kvm_s390_vm_has_attr(kvm, &attr);
2903 case KVM_S390_GET_SKEYS: {
2904 struct kvm_s390_skeys args;
2907 if (copy_from_user(&args, argp,
2908 sizeof(struct kvm_s390_skeys)))
2910 r = kvm_s390_get_skeys(kvm, &args);
2913 case KVM_S390_SET_SKEYS: {
2914 struct kvm_s390_skeys args;
2917 if (copy_from_user(&args, argp,
2918 sizeof(struct kvm_s390_skeys)))
2920 r = kvm_s390_set_skeys(kvm, &args);
2923 case KVM_S390_GET_CMMA_BITS: {
2924 struct kvm_s390_cmma_log args;
2927 if (copy_from_user(&args, argp, sizeof(args)))
2929 mutex_lock(&kvm->slots_lock);
2930 r = kvm_s390_get_cmma_bits(kvm, &args);
2931 mutex_unlock(&kvm->slots_lock);
2933 r = copy_to_user(argp, &args, sizeof(args));
2939 case KVM_S390_SET_CMMA_BITS: {
2940 struct kvm_s390_cmma_log args;
2943 if (copy_from_user(&args, argp, sizeof(args)))
2945 mutex_lock(&kvm->slots_lock);
2946 r = kvm_s390_set_cmma_bits(kvm, &args);
2947 mutex_unlock(&kvm->slots_lock);
2950 case KVM_S390_PV_COMMAND: {
2951 struct kvm_pv_cmd args;
2953 /* protvirt means user cpu state */
2954 kvm_s390_set_user_cpu_state_ctrl(kvm);
2956 if (!is_prot_virt_host()) {
2960 if (copy_from_user(&args, argp, sizeof(args))) {
2968 /* must be called without kvm->lock */
2969 r = kvm_s390_handle_pv(kvm, &args);
2970 if (copy_to_user(argp, &args, sizeof(args))) {
2976 case KVM_S390_MEM_OP: {
2977 struct kvm_s390_mem_op mem_op;
2979 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2980 r = kvm_s390_vm_mem_op(kvm, &mem_op);
2985 case KVM_S390_ZPCI_OP: {
2986 struct kvm_s390_zpci_op args;
2989 if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
2991 if (copy_from_user(&args, argp, sizeof(args))) {
2995 r = kvm_s390_pci_zpci_op(kvm, &args);
3005 static int kvm_s390_apxa_installed(void)
3007 struct ap_config_info info;
3009 if (ap_instructions_available()) {
3010 if (ap_qci(&info) == 0)
3018 * The format of the crypto control block (CRYCB) is specified in the 3 low
3019 * order bits of the CRYCB designation (CRYCBD) field as follows:
3020 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
3021 * AP extended addressing (APXA) facility are installed.
3022 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
3023 * Format 2: Both the APXA and MSAX3 facilities are installed
3025 static void kvm_s390_set_crycb_format(struct kvm *kvm)
3027 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
3029 /* Clear the CRYCB format bits - i.e., set format 0 by default */
3030 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
3032 /* Check whether MSAX3 is installed */
3033 if (!test_kvm_facility(kvm, 76))
3036 if (kvm_s390_apxa_installed())
3037 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
3039 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
3043 * kvm_arch_crypto_set_masks
3045 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3047 * @apm: the mask identifying the accessible AP adapters
3048 * @aqm: the mask identifying the accessible AP domains
3049 * @adm: the mask identifying the accessible AP control domains
3051 * Set the masks that identify the adapters, domains and control domains to
3052 * which the KVM guest is granted access.
3054 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3057 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3058 unsigned long *aqm, unsigned long *adm)
3060 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3062 kvm_s390_vcpu_block_all(kvm);
3064 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3065 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3066 memcpy(crycb->apcb1.apm, apm, 32);
3067 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3068 apm[0], apm[1], apm[2], apm[3]);
3069 memcpy(crycb->apcb1.aqm, aqm, 32);
3070 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3071 aqm[0], aqm[1], aqm[2], aqm[3]);
3072 memcpy(crycb->apcb1.adm, adm, 32);
3073 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3074 adm[0], adm[1], adm[2], adm[3]);
3077 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3078 memcpy(crycb->apcb0.apm, apm, 8);
3079 memcpy(crycb->apcb0.aqm, aqm, 2);
3080 memcpy(crycb->apcb0.adm, adm, 2);
3081 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3082 apm[0], *((unsigned short *)aqm),
3083 *((unsigned short *)adm));
3085 default: /* Can not happen */
3089 /* recreate the shadow crycb for each vcpu */
3090 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3091 kvm_s390_vcpu_unblock_all(kvm);
3093 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3096 * kvm_arch_crypto_clear_masks
3098 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3101 * Clear the masks that identify the adapters, domains and control domains to
3102 * which the KVM guest is granted access.
3104 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3107 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3109 kvm_s390_vcpu_block_all(kvm);
3111 memset(&kvm->arch.crypto.crycb->apcb0, 0,
3112 sizeof(kvm->arch.crypto.crycb->apcb0));
3113 memset(&kvm->arch.crypto.crycb->apcb1, 0,
3114 sizeof(kvm->arch.crypto.crycb->apcb1));
3116 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3117 /* recreate the shadow crycb for each vcpu */
3118 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3119 kvm_s390_vcpu_unblock_all(kvm);
3121 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3123 static u64 kvm_s390_get_initial_cpuid(void)
3128 cpuid.version = 0xff;
3129 return *((u64 *) &cpuid);
3132 static void kvm_s390_crypto_init(struct kvm *kvm)
3134 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3135 kvm_s390_set_crycb_format(kvm);
3136 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3138 if (!test_kvm_facility(kvm, 76))
3141 /* Enable AES/DEA protected key functions by default */
3142 kvm->arch.crypto.aes_kw = 1;
3143 kvm->arch.crypto.dea_kw = 1;
3144 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3145 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3146 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3147 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3150 static void sca_dispose(struct kvm *kvm)
3152 if (kvm->arch.use_esca)
3153 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3155 free_page((unsigned long)(kvm->arch.sca));
3156 kvm->arch.sca = NULL;
3159 void kvm_arch_free_vm(struct kvm *kvm)
3161 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3162 kvm_s390_pci_clear_list(kvm);
3164 __kvm_arch_free_vm(kvm);
3167 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3169 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3171 char debug_name[16];
3172 static unsigned long sca_offset;
3175 #ifdef CONFIG_KVM_S390_UCONTROL
3176 if (type & ~KVM_VM_S390_UCONTROL)
3178 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3185 rc = s390_enable_sie();
3191 if (!sclp.has_64bscao)
3192 alloc_flags |= GFP_DMA;
3193 rwlock_init(&kvm->arch.sca_lock);
3194 /* start with basic SCA */
3195 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3198 mutex_lock(&kvm_lock);
3200 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3202 kvm->arch.sca = (struct bsca_block *)
3203 ((char *) kvm->arch.sca + sca_offset);
3204 mutex_unlock(&kvm_lock);
3206 sprintf(debug_name, "kvm-%u", current->pid);
3208 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3212 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3213 kvm->arch.sie_page2 =
3214 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3215 if (!kvm->arch.sie_page2)
3218 kvm->arch.sie_page2->kvm = kvm;
3219 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3221 for (i = 0; i < kvm_s390_fac_size(); i++) {
3222 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3223 (kvm_s390_fac_base[i] |
3224 kvm_s390_fac_ext[i]);
3225 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3226 kvm_s390_fac_base[i];
3228 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3230 /* we are always in czam mode - even on pre z14 machines */
3231 set_kvm_facility(kvm->arch.model.fac_mask, 138);
3232 set_kvm_facility(kvm->arch.model.fac_list, 138);
3233 /* we emulate STHYI in kvm */
3234 set_kvm_facility(kvm->arch.model.fac_mask, 74);
3235 set_kvm_facility(kvm->arch.model.fac_list, 74);
3236 if (MACHINE_HAS_TLB_GUEST) {
3237 set_kvm_facility(kvm->arch.model.fac_mask, 147);
3238 set_kvm_facility(kvm->arch.model.fac_list, 147);
3241 if (css_general_characteristics.aiv && test_facility(65))
3242 set_kvm_facility(kvm->arch.model.fac_mask, 65);
3244 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3245 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3247 kvm_s390_crypto_init(kvm);
3249 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3250 mutex_lock(&kvm->lock);
3251 kvm_s390_pci_init_list(kvm);
3252 kvm_s390_vcpu_pci_enable_interp(kvm);
3253 mutex_unlock(&kvm->lock);
3256 mutex_init(&kvm->arch.float_int.ais_lock);
3257 spin_lock_init(&kvm->arch.float_int.lock);
3258 for (i = 0; i < FIRQ_LIST_COUNT; i++)
3259 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3260 init_waitqueue_head(&kvm->arch.ipte_wq);
3261 mutex_init(&kvm->arch.ipte_mutex);
3263 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3264 VM_EVENT(kvm, 3, "vm created with type %lu", type);
3266 if (type & KVM_VM_S390_UCONTROL) {
3267 kvm->arch.gmap = NULL;
3268 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3270 if (sclp.hamax == U64_MAX)
3271 kvm->arch.mem_limit = TASK_SIZE_MAX;
3273 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3275 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3276 if (!kvm->arch.gmap)
3278 kvm->arch.gmap->private = kvm;
3279 kvm->arch.gmap->pfault_enabled = 0;
3282 kvm->arch.use_pfmfi = sclp.has_pfmfi;
3283 kvm->arch.use_skf = sclp.has_skey;
3284 spin_lock_init(&kvm->arch.start_stop_lock);
3285 kvm_s390_vsie_init(kvm);
3287 kvm_s390_gisa_init(kvm);
3288 INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
3289 kvm->arch.pv.set_aside = NULL;
3290 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
3294 free_page((unsigned long)kvm->arch.sie_page2);
3295 debug_unregister(kvm->arch.dbf);
3297 KVM_EVENT(3, "creation of vm failed: %d", rc);
3301 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3305 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3306 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3307 kvm_s390_clear_local_irqs(vcpu);
3308 kvm_clear_async_pf_completion_queue(vcpu);
3309 if (!kvm_is_ucontrol(vcpu->kvm))
3311 kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3313 if (kvm_is_ucontrol(vcpu->kvm))
3314 gmap_remove(vcpu->arch.gmap);
3316 if (vcpu->kvm->arch.use_cmma)
3317 kvm_s390_vcpu_unsetup_cmma(vcpu);
3318 /* We can not hold the vcpu mutex here, we are already dying */
3319 if (kvm_s390_pv_cpu_get_handle(vcpu))
3320 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3321 free_page((unsigned long)(vcpu->arch.sie_block));
3324 void kvm_arch_destroy_vm(struct kvm *kvm)
3328 kvm_destroy_vcpus(kvm);
3330 kvm_s390_gisa_destroy(kvm);
3332 * We are already at the end of life and kvm->lock is not taken.
3333 * This is ok as the file descriptor is closed by now and nobody
3334 * can mess with the pv state.
3336 kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
3338 * Remove the mmu notifier only when the whole KVM VM is torn down,
3339 * and only if one was registered to begin with. If the VM is
3340 * currently not protected, but has been previously been protected,
3341 * then it's possible that the notifier is still registered.
3343 if (kvm->arch.pv.mmu_notifier.ops)
3344 mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3346 debug_unregister(kvm->arch.dbf);
3347 free_page((unsigned long)kvm->arch.sie_page2);
3348 if (!kvm_is_ucontrol(kvm))
3349 gmap_remove(kvm->arch.gmap);
3350 kvm_s390_destroy_adapters(kvm);
3351 kvm_s390_clear_float_irqs(kvm);
3352 kvm_s390_vsie_destroy(kvm);
3353 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
3356 /* Section: vcpu related */
3357 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3359 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3360 if (!vcpu->arch.gmap)
3362 vcpu->arch.gmap->private = vcpu->kvm;
3367 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3369 if (!kvm_s390_use_sca_entries())
3371 read_lock(&vcpu->kvm->arch.sca_lock);
3372 if (vcpu->kvm->arch.use_esca) {
3373 struct esca_block *sca = vcpu->kvm->arch.sca;
3375 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3376 sca->cpu[vcpu->vcpu_id].sda = 0;
3378 struct bsca_block *sca = vcpu->kvm->arch.sca;
3380 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3381 sca->cpu[vcpu->vcpu_id].sda = 0;
3383 read_unlock(&vcpu->kvm->arch.sca_lock);
3386 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3388 if (!kvm_s390_use_sca_entries()) {
3389 phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
3391 /* we still need the basic sca for the ipte control */
3392 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3393 vcpu->arch.sie_block->scaol = sca_phys;
3396 read_lock(&vcpu->kvm->arch.sca_lock);
3397 if (vcpu->kvm->arch.use_esca) {
3398 struct esca_block *sca = vcpu->kvm->arch.sca;
3399 phys_addr_t sca_phys = virt_to_phys(sca);
3401 sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3402 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3403 vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
3404 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3405 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3407 struct bsca_block *sca = vcpu->kvm->arch.sca;
3408 phys_addr_t sca_phys = virt_to_phys(sca);
3410 sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3411 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3412 vcpu->arch.sie_block->scaol = sca_phys;
3413 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3415 read_unlock(&vcpu->kvm->arch.sca_lock);
3418 /* Basic SCA to Extended SCA data copy routines */
3419 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3422 d->sigp_ctrl.c = s->sigp_ctrl.c;
3423 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3426 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3430 d->ipte_control = s->ipte_control;
3432 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3433 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3436 static int sca_switch_to_extended(struct kvm *kvm)
3438 struct bsca_block *old_sca = kvm->arch.sca;
3439 struct esca_block *new_sca;
3440 struct kvm_vcpu *vcpu;
3441 unsigned long vcpu_idx;
3443 phys_addr_t new_sca_phys;
3445 if (kvm->arch.use_esca)
3448 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3452 new_sca_phys = virt_to_phys(new_sca);
3453 scaoh = new_sca_phys >> 32;
3454 scaol = new_sca_phys & ESCA_SCAOL_MASK;
3456 kvm_s390_vcpu_block_all(kvm);
3457 write_lock(&kvm->arch.sca_lock);
3459 sca_copy_b_to_e(new_sca, old_sca);
3461 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3462 vcpu->arch.sie_block->scaoh = scaoh;
3463 vcpu->arch.sie_block->scaol = scaol;
3464 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3466 kvm->arch.sca = new_sca;
3467 kvm->arch.use_esca = 1;
3469 write_unlock(&kvm->arch.sca_lock);
3470 kvm_s390_vcpu_unblock_all(kvm);
3472 free_page((unsigned long)old_sca);
3474 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3475 old_sca, kvm->arch.sca);
3479 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3483 if (!kvm_s390_use_sca_entries()) {
3484 if (id < KVM_MAX_VCPUS)
3488 if (id < KVM_S390_BSCA_CPU_SLOTS)
3490 if (!sclp.has_esca || !sclp.has_64bscao)
3493 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3495 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3498 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3499 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3501 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3502 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3503 vcpu->arch.cputm_start = get_tod_clock_fast();
3504 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3507 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3508 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3510 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3511 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3512 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3513 vcpu->arch.cputm_start = 0;
3514 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3517 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3518 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3520 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3521 vcpu->arch.cputm_enabled = true;
3522 __start_cpu_timer_accounting(vcpu);
3525 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3526 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3528 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3529 __stop_cpu_timer_accounting(vcpu);
3530 vcpu->arch.cputm_enabled = false;
3533 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3535 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3536 __enable_cpu_timer_accounting(vcpu);
3540 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3542 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3543 __disable_cpu_timer_accounting(vcpu);
3547 /* set the cpu timer - may only be called from the VCPU thread itself */
3548 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3550 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3551 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3552 if (vcpu->arch.cputm_enabled)
3553 vcpu->arch.cputm_start = get_tod_clock_fast();
3554 vcpu->arch.sie_block->cputm = cputm;
3555 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3559 /* update and get the cpu timer - can also be called from other VCPU threads */
3560 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3565 if (unlikely(!vcpu->arch.cputm_enabled))
3566 return vcpu->arch.sie_block->cputm;
3568 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3570 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3572 * If the writer would ever execute a read in the critical
3573 * section, e.g. in irq context, we have a deadlock.
3575 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3576 value = vcpu->arch.sie_block->cputm;
3577 /* if cputm_start is 0, accounting is being started/stopped */
3578 if (likely(vcpu->arch.cputm_start))
3579 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3580 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3585 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3588 gmap_enable(vcpu->arch.enabled_gmap);
3589 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3590 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3591 __start_cpu_timer_accounting(vcpu);
3595 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3598 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3599 __stop_cpu_timer_accounting(vcpu);
3600 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3601 vcpu->arch.enabled_gmap = gmap_get_enabled();
3602 gmap_disable(vcpu->arch.enabled_gmap);
3606 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3608 mutex_lock(&vcpu->kvm->lock);
3610 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3611 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3613 mutex_unlock(&vcpu->kvm->lock);
3614 if (!kvm_is_ucontrol(vcpu->kvm)) {
3615 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3618 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3619 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3620 /* make vcpu_load load the right gmap on the first trigger */
3621 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3624 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3626 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3627 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3632 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3634 /* At least one ECC subfunction must be present */
3635 return kvm_has_pckmo_subfunc(kvm, 32) ||
3636 kvm_has_pckmo_subfunc(kvm, 33) ||
3637 kvm_has_pckmo_subfunc(kvm, 34) ||
3638 kvm_has_pckmo_subfunc(kvm, 40) ||
3639 kvm_has_pckmo_subfunc(kvm, 41);
3643 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3646 * If the AP instructions are not being interpreted and the MSAX3
3647 * facility is not configured for the guest, there is nothing to set up.
3649 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3652 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3653 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3654 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3655 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3657 if (vcpu->kvm->arch.crypto.apie)
3658 vcpu->arch.sie_block->eca |= ECA_APIE;
3660 /* Set up protected key support */
3661 if (vcpu->kvm->arch.crypto.aes_kw) {
3662 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3663 /* ecc is also wrapped with AES key */
3664 if (kvm_has_pckmo_ecc(vcpu->kvm))
3665 vcpu->arch.sie_block->ecd |= ECD_ECC;
3668 if (vcpu->kvm->arch.crypto.dea_kw)
3669 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3672 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3674 free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
3675 vcpu->arch.sie_block->cbrlo = 0;
3678 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3680 void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3685 vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
3689 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3691 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3693 vcpu->arch.sie_block->ibc = model->ibc;
3694 if (test_kvm_facility(vcpu->kvm, 7))
3695 vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
3698 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3703 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3707 if (test_kvm_facility(vcpu->kvm, 78))
3708 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3709 else if (test_kvm_facility(vcpu->kvm, 8))
3710 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3712 kvm_s390_vcpu_setup_model(vcpu);
3714 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3715 if (MACHINE_HAS_ESOP)
3716 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3717 if (test_kvm_facility(vcpu->kvm, 9))
3718 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3719 if (test_kvm_facility(vcpu->kvm, 11))
3720 vcpu->arch.sie_block->ecb |= ECB_PTF;
3721 if (test_kvm_facility(vcpu->kvm, 73))
3722 vcpu->arch.sie_block->ecb |= ECB_TE;
3723 if (!kvm_is_ucontrol(vcpu->kvm))
3724 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3726 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3727 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3728 if (test_kvm_facility(vcpu->kvm, 130))
3729 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3730 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3732 vcpu->arch.sie_block->eca |= ECA_CEI;
3734 vcpu->arch.sie_block->eca |= ECA_IB;
3736 vcpu->arch.sie_block->eca |= ECA_SII;
3737 if (sclp.has_sigpif)
3738 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3739 if (test_kvm_facility(vcpu->kvm, 129)) {
3740 vcpu->arch.sie_block->eca |= ECA_VX;
3741 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3743 if (test_kvm_facility(vcpu->kvm, 139))
3744 vcpu->arch.sie_block->ecd |= ECD_MEF;
3745 if (test_kvm_facility(vcpu->kvm, 156))
3746 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3747 if (vcpu->arch.sie_block->gd) {
3748 vcpu->arch.sie_block->eca |= ECA_AIV;
3749 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3750 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3752 vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
3753 vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
3756 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3758 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3760 if (vcpu->kvm->arch.use_cmma) {
3761 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3765 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3766 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3768 vcpu->arch.sie_block->hpid = HPID_KVM;
3770 kvm_s390_vcpu_crypto_setup(vcpu);
3772 kvm_s390_vcpu_pci_setup(vcpu);
3774 mutex_lock(&vcpu->kvm->lock);
3775 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3776 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3778 kvm_s390_vcpu_unsetup_cmma(vcpu);
3780 mutex_unlock(&vcpu->kvm->lock);
3785 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3787 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3792 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3794 struct sie_page *sie_page;
3797 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3798 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3802 vcpu->arch.sie_block = &sie_page->sie_block;
3803 vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
3805 /* the real guest size will always be smaller than msl */
3806 vcpu->arch.sie_block->mso = 0;
3807 vcpu->arch.sie_block->msl = sclp.hamax;
3809 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3810 spin_lock_init(&vcpu->arch.local_int.lock);
3811 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3812 seqcount_init(&vcpu->arch.cputm_seqcount);
3814 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3815 kvm_clear_async_pf_completion_queue(vcpu);
3816 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3823 kvm_s390_set_prefix(vcpu, 0);
3824 if (test_kvm_facility(vcpu->kvm, 64))
3825 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3826 if (test_kvm_facility(vcpu->kvm, 82))
3827 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3828 if (test_kvm_facility(vcpu->kvm, 133))
3829 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3830 if (test_kvm_facility(vcpu->kvm, 156))
3831 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3832 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3833 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3836 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3838 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3840 if (kvm_is_ucontrol(vcpu->kvm)) {
3841 rc = __kvm_ucontrol_vcpu_init(vcpu);
3843 goto out_free_sie_block;
3846 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3847 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3848 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3850 rc = kvm_s390_vcpu_setup(vcpu);
3852 goto out_ucontrol_uninit;
3854 kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3857 out_ucontrol_uninit:
3858 if (kvm_is_ucontrol(vcpu->kvm))
3859 gmap_remove(vcpu->arch.gmap);
3861 free_page((unsigned long)(vcpu->arch.sie_block));
3865 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3867 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3868 return kvm_s390_vcpu_has_irq(vcpu, 0);
3871 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3873 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3876 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3878 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3882 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3884 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3887 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3889 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3893 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3895 return atomic_read(&vcpu->arch.sie_block->prog20) &
3896 (PROG_BLOCK_SIE | PROG_REQUEST);
3899 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3901 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3905 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3906 * If the CPU is not running (e.g. waiting as idle) the function will
3907 * return immediately. */
3908 void exit_sie(struct kvm_vcpu *vcpu)
3910 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3911 kvm_s390_vsie_kick(vcpu);
3912 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3916 /* Kick a guest cpu out of SIE to process a request synchronously */
3917 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3919 __kvm_make_request(req, vcpu);
3920 kvm_s390_vcpu_request(vcpu);
3923 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3926 struct kvm *kvm = gmap->private;
3927 struct kvm_vcpu *vcpu;
3928 unsigned long prefix;
3931 if (gmap_is_shadow(gmap))
3933 if (start >= 1UL << 31)
3934 /* We are only interested in prefix pages */
3936 kvm_for_each_vcpu(i, vcpu, kvm) {
3937 /* match against both prefix pages */
3938 prefix = kvm_s390_get_prefix(vcpu);
3939 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3940 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3942 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3947 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3949 /* do not poll with more than halt_poll_max_steal percent of steal time */
3950 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3951 READ_ONCE(halt_poll_max_steal)) {
3952 vcpu->stat.halt_no_poll_steal++;
3958 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3960 /* kvm common code refers to this, but never calls it */
3965 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3966 struct kvm_one_reg *reg)
3971 case KVM_REG_S390_TODPR:
3972 r = put_user(vcpu->arch.sie_block->todpr,
3973 (u32 __user *)reg->addr);
3975 case KVM_REG_S390_EPOCHDIFF:
3976 r = put_user(vcpu->arch.sie_block->epoch,
3977 (u64 __user *)reg->addr);
3979 case KVM_REG_S390_CPU_TIMER:
3980 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3981 (u64 __user *)reg->addr);
3983 case KVM_REG_S390_CLOCK_COMP:
3984 r = put_user(vcpu->arch.sie_block->ckc,
3985 (u64 __user *)reg->addr);
3987 case KVM_REG_S390_PFTOKEN:
3988 r = put_user(vcpu->arch.pfault_token,
3989 (u64 __user *)reg->addr);
3991 case KVM_REG_S390_PFCOMPARE:
3992 r = put_user(vcpu->arch.pfault_compare,
3993 (u64 __user *)reg->addr);
3995 case KVM_REG_S390_PFSELECT:
3996 r = put_user(vcpu->arch.pfault_select,
3997 (u64 __user *)reg->addr);
3999 case KVM_REG_S390_PP:
4000 r = put_user(vcpu->arch.sie_block->pp,
4001 (u64 __user *)reg->addr);
4003 case KVM_REG_S390_GBEA:
4004 r = put_user(vcpu->arch.sie_block->gbea,
4005 (u64 __user *)reg->addr);
4014 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
4015 struct kvm_one_reg *reg)
4021 case KVM_REG_S390_TODPR:
4022 r = get_user(vcpu->arch.sie_block->todpr,
4023 (u32 __user *)reg->addr);
4025 case KVM_REG_S390_EPOCHDIFF:
4026 r = get_user(vcpu->arch.sie_block->epoch,
4027 (u64 __user *)reg->addr);
4029 case KVM_REG_S390_CPU_TIMER:
4030 r = get_user(val, (u64 __user *)reg->addr);
4032 kvm_s390_set_cpu_timer(vcpu, val);
4034 case KVM_REG_S390_CLOCK_COMP:
4035 r = get_user(vcpu->arch.sie_block->ckc,
4036 (u64 __user *)reg->addr);
4038 case KVM_REG_S390_PFTOKEN:
4039 r = get_user(vcpu->arch.pfault_token,
4040 (u64 __user *)reg->addr);
4041 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4042 kvm_clear_async_pf_completion_queue(vcpu);
4044 case KVM_REG_S390_PFCOMPARE:
4045 r = get_user(vcpu->arch.pfault_compare,
4046 (u64 __user *)reg->addr);
4048 case KVM_REG_S390_PFSELECT:
4049 r = get_user(vcpu->arch.pfault_select,
4050 (u64 __user *)reg->addr);
4052 case KVM_REG_S390_PP:
4053 r = get_user(vcpu->arch.sie_block->pp,
4054 (u64 __user *)reg->addr);
4056 case KVM_REG_S390_GBEA:
4057 r = get_user(vcpu->arch.sie_block->gbea,
4058 (u64 __user *)reg->addr);
4067 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4069 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4070 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4071 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4073 kvm_clear_async_pf_completion_queue(vcpu);
4074 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4075 kvm_s390_vcpu_stop(vcpu);
4076 kvm_s390_clear_local_irqs(vcpu);
4079 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4081 /* Initial reset is a superset of the normal reset */
4082 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4085 * This equals initial cpu reset in pop, but we don't switch to ESA.
4086 * We do not only reset the internal data, but also ...
4088 vcpu->arch.sie_block->gpsw.mask = 0;
4089 vcpu->arch.sie_block->gpsw.addr = 0;
4090 kvm_s390_set_prefix(vcpu, 0);
4091 kvm_s390_set_cpu_timer(vcpu, 0);
4092 vcpu->arch.sie_block->ckc = 0;
4093 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4094 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4095 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4097 /* ... the data in sync regs */
4098 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4099 vcpu->run->s.regs.ckc = 0;
4100 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4101 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4102 vcpu->run->psw_addr = 0;
4103 vcpu->run->psw_mask = 0;
4104 vcpu->run->s.regs.todpr = 0;
4105 vcpu->run->s.regs.cputm = 0;
4106 vcpu->run->s.regs.ckc = 0;
4107 vcpu->run->s.regs.pp = 0;
4108 vcpu->run->s.regs.gbea = 1;
4109 vcpu->run->s.regs.fpc = 0;
4111 * Do not reset these registers in the protected case, as some of
4112 * them are overlayed and they are not accessible in this case
4115 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4116 vcpu->arch.sie_block->gbea = 1;
4117 vcpu->arch.sie_block->pp = 0;
4118 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4119 vcpu->arch.sie_block->todpr = 0;
4123 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4125 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4127 /* Clear reset is a superset of the initial reset */
4128 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4130 memset(®s->gprs, 0, sizeof(regs->gprs));
4131 memset(®s->vrs, 0, sizeof(regs->vrs));
4132 memset(®s->acrs, 0, sizeof(regs->acrs));
4133 memset(®s->gscb, 0, sizeof(regs->gscb));
4136 regs->etoken_extension = 0;
4139 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4142 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
4147 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4150 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4155 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4156 struct kvm_sregs *sregs)
4160 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4161 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4167 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4168 struct kvm_sregs *sregs)
4172 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4173 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4179 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4185 if (test_fp_ctl(fpu->fpc)) {
4189 vcpu->run->s.regs.fpc = fpu->fpc;
4191 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4192 (freg_t *) fpu->fprs);
4194 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4201 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4205 /* make sure we have the latest values */
4208 convert_vx_to_fp((freg_t *) fpu->fprs,
4209 (__vector128 *) vcpu->run->s.regs.vrs);
4211 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4212 fpu->fpc = vcpu->run->s.regs.fpc;
4218 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4222 if (!is_vcpu_stopped(vcpu))
4225 vcpu->run->psw_mask = psw.mask;
4226 vcpu->run->psw_addr = psw.addr;
4231 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4232 struct kvm_translation *tr)
4234 return -EINVAL; /* not implemented yet */
4237 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4238 KVM_GUESTDBG_USE_HW_BP | \
4239 KVM_GUESTDBG_ENABLE)
4241 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4242 struct kvm_guest_debug *dbg)
4248 vcpu->guest_debug = 0;
4249 kvm_s390_clear_bp_data(vcpu);
4251 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4255 if (!sclp.has_gpere) {
4260 if (dbg->control & KVM_GUESTDBG_ENABLE) {
4261 vcpu->guest_debug = dbg->control;
4262 /* enforce guest PER */
4263 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4265 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4266 rc = kvm_s390_import_bp_data(vcpu, dbg);
4268 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4269 vcpu->arch.guestdbg.last_bp = 0;
4273 vcpu->guest_debug = 0;
4274 kvm_s390_clear_bp_data(vcpu);
4275 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4283 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4284 struct kvm_mp_state *mp_state)
4290 /* CHECK_STOP and LOAD are not supported yet */
4291 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4292 KVM_MP_STATE_OPERATING;
4298 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4299 struct kvm_mp_state *mp_state)
4305 /* user space knows about this interface - let it control the state */
4306 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4308 switch (mp_state->mp_state) {
4309 case KVM_MP_STATE_STOPPED:
4310 rc = kvm_s390_vcpu_stop(vcpu);
4312 case KVM_MP_STATE_OPERATING:
4313 rc = kvm_s390_vcpu_start(vcpu);
4315 case KVM_MP_STATE_LOAD:
4316 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4320 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4322 case KVM_MP_STATE_CHECK_STOP:
4323 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
4332 static bool ibs_enabled(struct kvm_vcpu *vcpu)
4334 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4337 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4340 kvm_s390_vcpu_request_handled(vcpu);
4341 if (!kvm_request_pending(vcpu))
4344 * If the guest prefix changed, re-arm the ipte notifier for the
4345 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4346 * This ensures that the ipte instruction for this request has
4347 * already finished. We might race against a second unmapper that
4348 * wants to set the blocking bit. Lets just retry the request loop.
4350 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4352 rc = gmap_mprotect_notify(vcpu->arch.gmap,
4353 kvm_s390_get_prefix(vcpu),
4354 PAGE_SIZE * 2, PROT_WRITE);
4356 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4362 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4363 vcpu->arch.sie_block->ihcpu = 0xffff;
4367 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4368 if (!ibs_enabled(vcpu)) {
4369 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4370 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4375 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4376 if (ibs_enabled(vcpu)) {
4377 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4378 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4383 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4384 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4388 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4390 * Disable CMM virtualization; we will emulate the ESSA
4391 * instruction manually, in order to provide additional
4392 * functionalities needed for live migration.
4394 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4398 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4400 * Re-enable CMM virtualization if CMMA is available and
4401 * CMM has been used.
4403 if ((vcpu->kvm->arch.use_cmma) &&
4404 (vcpu->kvm->mm->context.uses_cmm))
4405 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4409 /* we left the vsie handler, nothing to do, just clear the request */
4410 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4415 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4417 struct kvm_vcpu *vcpu;
4418 union tod_clock clk;
4423 store_tod_clock_ext(&clk);
4425 kvm->arch.epoch = gtod->tod - clk.tod;
4427 if (test_kvm_facility(kvm, 139)) {
4428 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4429 if (kvm->arch.epoch > gtod->tod)
4430 kvm->arch.epdx -= 1;
4433 kvm_s390_vcpu_block_all(kvm);
4434 kvm_for_each_vcpu(i, vcpu, kvm) {
4435 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4436 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
4439 kvm_s390_vcpu_unblock_all(kvm);
4443 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4445 if (!mutex_trylock(&kvm->lock))
4447 __kvm_s390_set_tod_clock(kvm, gtod);
4448 mutex_unlock(&kvm->lock);
4453 * kvm_arch_fault_in_page - fault-in guest page if necessary
4454 * @vcpu: The corresponding virtual cpu
4455 * @gpa: Guest physical address
4456 * @writable: Whether the page should be writable or not
4458 * Make sure that a guest page has been faulted-in on the host.
4460 * Return: Zero on success, negative error code otherwise.
4462 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4464 return gmap_fault(vcpu->arch.gmap, gpa,
4465 writable ? FAULT_FLAG_WRITE : 0);
4468 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4469 unsigned long token)
4471 struct kvm_s390_interrupt inti;
4472 struct kvm_s390_irq irq;
4475 irq.u.ext.ext_params2 = token;
4476 irq.type = KVM_S390_INT_PFAULT_INIT;
4477 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4479 inti.type = KVM_S390_INT_PFAULT_DONE;
4480 inti.parm64 = token;
4481 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4485 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4486 struct kvm_async_pf *work)
4488 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4489 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4494 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4495 struct kvm_async_pf *work)
4497 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4498 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4501 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4502 struct kvm_async_pf *work)
4504 /* s390 will always inject the page directly */
4507 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4510 * s390 will always inject the page directly,
4511 * but we still want check_async_completion to cleanup
4516 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4519 struct kvm_arch_async_pf arch;
4521 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4523 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4524 vcpu->arch.pfault_compare)
4526 if (psw_extint_disabled(vcpu))
4528 if (kvm_s390_vcpu_has_irq(vcpu, 0))
4530 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4532 if (!vcpu->arch.gmap->pfault_enabled)
4535 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4536 hva += current->thread.gmap_addr & ~PAGE_MASK;
4537 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4540 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4543 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4548 * On s390 notifications for arriving pages will be delivered directly
4549 * to the guest but the house keeping for completed pfaults is
4550 * handled outside the worker.
4552 kvm_check_async_pf_completion(vcpu);
4554 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4555 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4560 if (!kvm_is_ucontrol(vcpu->kvm)) {
4561 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4566 rc = kvm_s390_handle_requests(vcpu);
4570 if (guestdbg_enabled(vcpu)) {
4571 kvm_s390_backup_guest_per_regs(vcpu);
4572 kvm_s390_patch_guest_per_regs(vcpu);
4575 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4577 vcpu->arch.sie_block->icptcode = 0;
4578 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4579 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4580 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4585 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4587 struct kvm_s390_pgm_info pgm_info = {
4588 .code = PGM_ADDRESSING,
4593 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4594 trace_kvm_s390_sie_fault(vcpu);
4597 * We want to inject an addressing exception, which is defined as a
4598 * suppressing or terminating exception. However, since we came here
4599 * by a DAT access exception, the PSW still points to the faulting
4600 * instruction since DAT exceptions are nullifying. So we've got
4601 * to look up the current opcode to get the length of the instruction
4602 * to be able to forward the PSW.
4604 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4605 ilen = insn_length(opcode);
4609 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4610 * Forward by arbitrary ilc, injection will take care of
4611 * nullification if necessary.
4613 pgm_info = vcpu->arch.pgm;
4616 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4617 kvm_s390_forward_psw(vcpu, ilen);
4618 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4621 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4623 struct mcck_volatile_info *mcck_info;
4624 struct sie_page *sie_page;
4626 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4627 vcpu->arch.sie_block->icptcode);
4628 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4630 if (guestdbg_enabled(vcpu))
4631 kvm_s390_restore_guest_per_regs(vcpu);
4633 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4634 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4636 if (exit_reason == -EINTR) {
4637 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4638 sie_page = container_of(vcpu->arch.sie_block,
4639 struct sie_page, sie_block);
4640 mcck_info = &sie_page->mcck_info;
4641 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4645 if (vcpu->arch.sie_block->icptcode > 0) {
4646 int rc = kvm_handle_sie_intercept(vcpu);
4648 if (rc != -EOPNOTSUPP)
4650 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4651 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4652 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4653 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4655 } else if (exit_reason != -EFAULT) {
4656 vcpu->stat.exit_null++;
4658 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4659 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4660 vcpu->run->s390_ucontrol.trans_exc_code =
4661 current->thread.gmap_addr;
4662 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4664 } else if (current->thread.gmap_pfault) {
4665 trace_kvm_s390_major_guest_pfault(vcpu);
4666 current->thread.gmap_pfault = 0;
4667 if (kvm_arch_setup_async_pf(vcpu))
4669 vcpu->stat.pfault_sync++;
4670 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4672 return vcpu_post_run_fault_in_sie(vcpu);
4675 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4676 static int __vcpu_run(struct kvm_vcpu *vcpu)
4678 int rc, exit_reason;
4679 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4682 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4683 * ning the guest), so that memslots (and other stuff) are protected
4685 kvm_vcpu_srcu_read_lock(vcpu);
4688 rc = vcpu_pre_run(vcpu);
4692 kvm_vcpu_srcu_read_unlock(vcpu);
4694 * As PF_VCPU will be used in fault handler, between
4695 * guest_enter and guest_exit should be no uaccess.
4697 local_irq_disable();
4698 guest_enter_irqoff();
4699 __disable_cpu_timer_accounting(vcpu);
4701 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4702 memcpy(sie_page->pv_grregs,
4703 vcpu->run->s.regs.gprs,
4704 sizeof(sie_page->pv_grregs));
4706 if (test_cpu_flag(CIF_FPU))
4708 exit_reason = sie64a(vcpu->arch.sie_block,
4709 vcpu->run->s.regs.gprs);
4710 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4711 memcpy(vcpu->run->s.regs.gprs,
4712 sie_page->pv_grregs,
4713 sizeof(sie_page->pv_grregs));
4715 * We're not allowed to inject interrupts on intercepts
4716 * that leave the guest state in an "in-between" state
4717 * where the next SIE entry will do a continuation.
4718 * Fence interrupts in our "internal" PSW.
4720 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4721 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4722 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4725 local_irq_disable();
4726 __enable_cpu_timer_accounting(vcpu);
4727 guest_exit_irqoff();
4729 kvm_vcpu_srcu_read_lock(vcpu);
4731 rc = vcpu_post_run(vcpu, exit_reason);
4732 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4734 kvm_vcpu_srcu_read_unlock(vcpu);
4738 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4740 struct kvm_run *kvm_run = vcpu->run;
4741 struct runtime_instr_cb *riccb;
4744 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4745 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4746 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4747 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4748 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4749 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4750 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4751 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4753 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4754 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4755 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4756 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4757 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4758 kvm_clear_async_pf_completion_queue(vcpu);
4760 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4761 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4762 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4763 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4766 * If userspace sets the riccb (e.g. after migration) to a valid state,
4767 * we should enable RI here instead of doing the lazy enablement.
4769 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4770 test_kvm_facility(vcpu->kvm, 64) &&
4772 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4773 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4774 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4777 * If userspace sets the gscb (e.g. after migration) to non-zero,
4778 * we should enable GS here instead of doing the lazy enablement.
4780 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4781 test_kvm_facility(vcpu->kvm, 133) &&
4783 !vcpu->arch.gs_enabled) {
4784 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4785 vcpu->arch.sie_block->ecb |= ECB_GS;
4786 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4787 vcpu->arch.gs_enabled = 1;
4789 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4790 test_kvm_facility(vcpu->kvm, 82)) {
4791 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4792 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4794 if (MACHINE_HAS_GS) {
4796 __ctl_set_bit(2, 4);
4797 if (current->thread.gs_cb) {
4798 vcpu->arch.host_gscb = current->thread.gs_cb;
4799 save_gs_cb(vcpu->arch.host_gscb);
4801 if (vcpu->arch.gs_enabled) {
4802 current->thread.gs_cb = (struct gs_cb *)
4803 &vcpu->run->s.regs.gscb;
4804 restore_gs_cb(current->thread.gs_cb);
4808 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4811 static void sync_regs(struct kvm_vcpu *vcpu)
4813 struct kvm_run *kvm_run = vcpu->run;
4815 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4816 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4817 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4818 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4819 /* some control register changes require a tlb flush */
4820 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4822 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4823 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4824 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4826 save_access_regs(vcpu->arch.host_acrs);
4827 restore_access_regs(vcpu->run->s.regs.acrs);
4828 /* save host (userspace) fprs/vrs */
4830 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4831 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4833 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4835 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4836 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4837 if (test_fp_ctl(current->thread.fpu.fpc))
4838 /* User space provided an invalid FPC, let's clear it */
4839 current->thread.fpu.fpc = 0;
4841 /* Sync fmt2 only data */
4842 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4843 sync_regs_fmt2(vcpu);
4846 * In several places we have to modify our internal view to
4847 * not do things that are disallowed by the ultravisor. For
4848 * example we must not inject interrupts after specific exits
4849 * (e.g. 112 prefix page not secure). We do this by turning
4850 * off the machine check, external and I/O interrupt bits
4851 * of our PSW copy. To avoid getting validity intercepts, we
4852 * do only accept the condition code from userspace.
4854 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4855 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4859 kvm_run->kvm_dirty_regs = 0;
4862 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4864 struct kvm_run *kvm_run = vcpu->run;
4866 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4867 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4868 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4869 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4870 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4871 if (MACHINE_HAS_GS) {
4873 __ctl_set_bit(2, 4);
4874 if (vcpu->arch.gs_enabled)
4875 save_gs_cb(current->thread.gs_cb);
4876 current->thread.gs_cb = vcpu->arch.host_gscb;
4877 restore_gs_cb(vcpu->arch.host_gscb);
4878 if (!vcpu->arch.host_gscb)
4879 __ctl_clear_bit(2, 4);
4880 vcpu->arch.host_gscb = NULL;
4883 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4886 static void store_regs(struct kvm_vcpu *vcpu)
4888 struct kvm_run *kvm_run = vcpu->run;
4890 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4891 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4892 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4893 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4894 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4895 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4896 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4897 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4898 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4899 save_access_regs(vcpu->run->s.regs.acrs);
4900 restore_access_regs(vcpu->arch.host_acrs);
4901 /* Save guest register state */
4903 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4904 /* Restore will be done lazily at return */
4905 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4906 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4907 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4908 store_regs_fmt2(vcpu);
4911 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4913 struct kvm_run *kvm_run = vcpu->run;
4917 * Running a VM while dumping always has the potential to
4918 * produce inconsistent dump data. But for PV vcpus a SIE
4919 * entry while dumping could also lead to a fatal validity
4920 * intercept which we absolutely want to avoid.
4922 if (vcpu->kvm->arch.pv.dumping)
4925 if (kvm_run->immediate_exit)
4928 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4929 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4934 if (guestdbg_exit_pending(vcpu)) {
4935 kvm_s390_prepare_debug_exit(vcpu);
4940 kvm_sigset_activate(vcpu);
4943 * no need to check the return value of vcpu_start as it can only have
4944 * an error for protvirt, but protvirt means user cpu state
4946 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4947 kvm_s390_vcpu_start(vcpu);
4948 } else if (is_vcpu_stopped(vcpu)) {
4949 pr_err_ratelimited("can't run stopped vcpu %d\n",
4956 enable_cpu_timer_accounting(vcpu);
4959 rc = __vcpu_run(vcpu);
4961 if (signal_pending(current) && !rc) {
4962 kvm_run->exit_reason = KVM_EXIT_INTR;
4966 if (guestdbg_exit_pending(vcpu) && !rc) {
4967 kvm_s390_prepare_debug_exit(vcpu);
4971 if (rc == -EREMOTE) {
4972 /* userspace support is needed, kvm_run has been prepared */
4976 disable_cpu_timer_accounting(vcpu);
4979 kvm_sigset_deactivate(vcpu);
4981 vcpu->stat.exit_userspace++;
4988 * store status at address
4989 * we use have two special cases:
4990 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4991 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4993 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4995 unsigned char archmode = 1;
4996 freg_t fprs[NUM_FPRS];
5001 px = kvm_s390_get_prefix(vcpu);
5002 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
5003 if (write_guest_abs(vcpu, 163, &archmode, 1))
5006 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
5007 if (write_guest_real(vcpu, 163, &archmode, 1))
5011 gpa -= __LC_FPREGS_SAVE_AREA;
5013 /* manually convert vector registers if necessary */
5014 if (MACHINE_HAS_VX) {
5015 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
5016 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5019 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5020 vcpu->run->s.regs.fprs, 128);
5022 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
5023 vcpu->run->s.regs.gprs, 128);
5024 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
5025 &vcpu->arch.sie_block->gpsw, 16);
5026 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
5028 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
5029 &vcpu->run->s.regs.fpc, 4);
5030 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
5031 &vcpu->arch.sie_block->todpr, 4);
5032 cputm = kvm_s390_get_cpu_timer(vcpu);
5033 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
5035 clkcomp = vcpu->arch.sie_block->ckc >> 8;
5036 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
5038 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
5039 &vcpu->run->s.regs.acrs, 64);
5040 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
5041 &vcpu->arch.sie_block->gcr, 128);
5042 return rc ? -EFAULT : 0;
5045 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
5048 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5049 * switch in the run ioctl. Let's update our copies before we save
5050 * it into the save area
5053 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
5054 save_access_regs(vcpu->run->s.regs.acrs);
5056 return kvm_s390_store_status_unloaded(vcpu, addr);
5059 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5061 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5062 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5065 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5068 struct kvm_vcpu *vcpu;
5070 kvm_for_each_vcpu(i, vcpu, kvm) {
5071 __disable_ibs_on_vcpu(vcpu);
5075 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5079 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5080 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5083 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5085 int i, online_vcpus, r = 0, started_vcpus = 0;
5087 if (!is_vcpu_stopped(vcpu))
5090 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5091 /* Only one cpu at a time may enter/leave the STOPPED state. */
5092 spin_lock(&vcpu->kvm->arch.start_stop_lock);
5093 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5095 /* Let's tell the UV that we want to change into the operating state */
5096 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5097 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5099 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5104 for (i = 0; i < online_vcpus; i++) {
5105 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5109 if (started_vcpus == 0) {
5110 /* we're the only active VCPU -> speed it up */
5111 __enable_ibs_on_vcpu(vcpu);
5112 } else if (started_vcpus == 1) {
5114 * As we are starting a second VCPU, we have to disable
5115 * the IBS facility on all VCPUs to remove potentially
5116 * outstanding ENABLE requests.
5118 __disable_ibs_on_all_vcpus(vcpu->kvm);
5121 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5123 * The real PSW might have changed due to a RESTART interpreted by the
5124 * ultravisor. We block all interrupts and let the next sie exit
5127 if (kvm_s390_pv_cpu_is_protected(vcpu))
5128 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5130 * Another VCPU might have used IBS while we were offline.
5131 * Let's play safe and flush the VCPU at startup.
5133 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5134 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5138 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5140 int i, online_vcpus, r = 0, started_vcpus = 0;
5141 struct kvm_vcpu *started_vcpu = NULL;
5143 if (is_vcpu_stopped(vcpu))
5146 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5147 /* Only one cpu at a time may enter/leave the STOPPED state. */
5148 spin_lock(&vcpu->kvm->arch.start_stop_lock);
5149 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5151 /* Let's tell the UV that we want to change into the stopped state */
5152 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5153 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5155 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5161 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5162 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5163 * have been fully processed. This will ensure that the VCPU
5164 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5166 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5167 kvm_s390_clear_stop_irq(vcpu);
5169 __disable_ibs_on_vcpu(vcpu);
5171 for (i = 0; i < online_vcpus; i++) {
5172 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5174 if (!is_vcpu_stopped(tmp)) {
5180 if (started_vcpus == 1) {
5182 * As we only have one VCPU left, we want to enable the
5183 * IBS facility for that VCPU to speed it up.
5185 __enable_ibs_on_vcpu(started_vcpu);
5188 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5192 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5193 struct kvm_enable_cap *cap)
5201 case KVM_CAP_S390_CSS_SUPPORT:
5202 if (!vcpu->kvm->arch.css_support) {
5203 vcpu->kvm->arch.css_support = 1;
5204 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5205 trace_kvm_s390_enable_css(vcpu->kvm);
5216 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5217 struct kvm_s390_mem_op *mop)
5219 void __user *uaddr = (void __user *)mop->buf;
5223 if (mop->flags || !mop->size)
5225 if (mop->size + mop->sida_offset < mop->size)
5227 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5229 if (!kvm_s390_pv_cpu_is_protected(vcpu))
5232 sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
5235 case KVM_S390_MEMOP_SIDA_READ:
5236 if (copy_to_user(uaddr, sida_addr, mop->size))
5240 case KVM_S390_MEMOP_SIDA_WRITE:
5241 if (copy_from_user(sida_addr, uaddr, mop->size))
5248 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5249 struct kvm_s390_mem_op *mop)
5251 void __user *uaddr = (void __user *)mop->buf;
5252 void *tmpbuf = NULL;
5254 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
5255 | KVM_S390_MEMOP_F_CHECK_ONLY
5256 | KVM_S390_MEMOP_F_SKEY_PROTECTION;
5258 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
5260 if (mop->size > MEM_OP_MAX_SIZE)
5262 if (kvm_s390_pv_cpu_is_protected(vcpu))
5264 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
5265 if (access_key_invalid(mop->key))
5270 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5271 tmpbuf = vmalloc(mop->size);
5277 case KVM_S390_MEMOP_LOGICAL_READ:
5278 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5279 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5280 GACC_FETCH, mop->key);
5283 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5284 mop->size, mop->key);
5286 if (copy_to_user(uaddr, tmpbuf, mop->size))
5290 case KVM_S390_MEMOP_LOGICAL_WRITE:
5291 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5292 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5293 GACC_STORE, mop->key);
5296 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5300 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5301 mop->size, mop->key);
5305 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5306 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5312 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5313 struct kvm_s390_mem_op *mop)
5317 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5320 case KVM_S390_MEMOP_LOGICAL_READ:
5321 case KVM_S390_MEMOP_LOGICAL_WRITE:
5322 r = kvm_s390_vcpu_mem_op(vcpu, mop);
5324 case KVM_S390_MEMOP_SIDA_READ:
5325 case KVM_S390_MEMOP_SIDA_WRITE:
5326 /* we are locked against sida going away by the vcpu->mutex */
5327 r = kvm_s390_vcpu_sida_op(vcpu, mop);
5333 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5337 long kvm_arch_vcpu_async_ioctl(struct file *filp,
5338 unsigned int ioctl, unsigned long arg)
5340 struct kvm_vcpu *vcpu = filp->private_data;
5341 void __user *argp = (void __user *)arg;
5344 case KVM_S390_IRQ: {
5345 struct kvm_s390_irq s390irq;
5347 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5349 return kvm_s390_inject_vcpu(vcpu, &s390irq);
5351 case KVM_S390_INTERRUPT: {
5352 struct kvm_s390_interrupt s390int;
5353 struct kvm_s390_irq s390irq = {};
5355 if (copy_from_user(&s390int, argp, sizeof(s390int)))
5357 if (s390int_to_s390irq(&s390int, &s390irq))
5359 return kvm_s390_inject_vcpu(vcpu, &s390irq);
5362 return -ENOIOCTLCMD;
5365 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5366 struct kvm_pv_cmd *cmd)
5368 struct kvm_s390_pv_dmp dmp;
5372 /* Dump initialization is a prerequisite */
5373 if (!vcpu->kvm->arch.pv.dumping)
5376 if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5379 /* We only handle this subcmd right now */
5380 if (dmp.subcmd != KVM_PV_DUMP_CPU)
5383 /* CPU dump length is the same as create cpu storage donation. */
5384 if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5387 data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5391 ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5393 VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5394 vcpu->vcpu_id, cmd->rc, cmd->rrc);
5399 /* On success copy over the dump data */
5400 if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5407 long kvm_arch_vcpu_ioctl(struct file *filp,
5408 unsigned int ioctl, unsigned long arg)
5410 struct kvm_vcpu *vcpu = filp->private_data;
5411 void __user *argp = (void __user *)arg;
5419 case KVM_S390_STORE_STATUS:
5420 idx = srcu_read_lock(&vcpu->kvm->srcu);
5421 r = kvm_s390_store_status_unloaded(vcpu, arg);
5422 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5424 case KVM_S390_SET_INITIAL_PSW: {
5428 if (copy_from_user(&psw, argp, sizeof(psw)))
5430 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5433 case KVM_S390_CLEAR_RESET:
5435 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5436 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5437 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5438 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5439 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5443 case KVM_S390_INITIAL_RESET:
5445 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5446 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5447 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5448 UVC_CMD_CPU_RESET_INITIAL,
5450 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5454 case KVM_S390_NORMAL_RESET:
5456 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5457 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5458 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5459 UVC_CMD_CPU_RESET, &rc, &rrc);
5460 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5464 case KVM_SET_ONE_REG:
5465 case KVM_GET_ONE_REG: {
5466 struct kvm_one_reg reg;
5468 if (kvm_s390_pv_cpu_is_protected(vcpu))
5471 if (copy_from_user(®, argp, sizeof(reg)))
5473 if (ioctl == KVM_SET_ONE_REG)
5474 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
5476 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
5479 #ifdef CONFIG_KVM_S390_UCONTROL
5480 case KVM_S390_UCAS_MAP: {
5481 struct kvm_s390_ucas_mapping ucasmap;
5483 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5488 if (!kvm_is_ucontrol(vcpu->kvm)) {
5493 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5494 ucasmap.vcpu_addr, ucasmap.length);
5497 case KVM_S390_UCAS_UNMAP: {
5498 struct kvm_s390_ucas_mapping ucasmap;
5500 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5505 if (!kvm_is_ucontrol(vcpu->kvm)) {
5510 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5515 case KVM_S390_VCPU_FAULT: {
5516 r = gmap_fault(vcpu->arch.gmap, arg, 0);
5519 case KVM_ENABLE_CAP:
5521 struct kvm_enable_cap cap;
5523 if (copy_from_user(&cap, argp, sizeof(cap)))
5525 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5528 case KVM_S390_MEM_OP: {
5529 struct kvm_s390_mem_op mem_op;
5531 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5532 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5537 case KVM_S390_SET_IRQ_STATE: {
5538 struct kvm_s390_irq_state irq_state;
5541 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5543 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5544 irq_state.len == 0 ||
5545 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5549 /* do not use irq_state.flags, it will break old QEMUs */
5550 r = kvm_s390_set_irq_state(vcpu,
5551 (void __user *) irq_state.buf,
5555 case KVM_S390_GET_IRQ_STATE: {
5556 struct kvm_s390_irq_state irq_state;
5559 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5561 if (irq_state.len == 0) {
5565 /* do not use irq_state.flags, it will break old QEMUs */
5566 r = kvm_s390_get_irq_state(vcpu,
5567 (__u8 __user *) irq_state.buf,
5571 case KVM_S390_PV_CPU_COMMAND: {
5572 struct kvm_pv_cmd cmd;
5575 if (!is_prot_virt_host())
5579 if (copy_from_user(&cmd, argp, sizeof(cmd)))
5586 /* We only handle this cmd right now */
5587 if (cmd.cmd != KVM_PV_DUMP)
5590 r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
5592 /* Always copy over UV rc / rrc data */
5593 if (copy_to_user((__u8 __user *)argp, &cmd.rc,
5594 sizeof(cmd.rc) + sizeof(cmd.rrc)))
5606 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5608 #ifdef CONFIG_KVM_S390_UCONTROL
5609 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5610 && (kvm_is_ucontrol(vcpu->kvm))) {
5611 vmf->page = virt_to_page(vcpu->arch.sie_block);
5612 get_page(vmf->page);
5616 return VM_FAULT_SIGBUS;
5619 bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
5624 /* Section: memory related */
5625 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5626 const struct kvm_memory_slot *old,
5627 struct kvm_memory_slot *new,
5628 enum kvm_mr_change change)
5632 /* When we are protected, we should not change the memory slots */
5633 if (kvm_s390_pv_get_handle(kvm))
5636 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5639 /* A few sanity checks. We can have memory slots which have to be
5640 located/ended at a segment boundary (1MB). The memory in userland is
5641 ok to be fragmented into various different vmas. It is okay to mmap()
5642 and munmap() stuff in this slot after doing this call at any time */
5644 if (new->userspace_addr & 0xffffful)
5647 size = new->npages * PAGE_SIZE;
5648 if (size & 0xffffful)
5651 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5657 void kvm_arch_commit_memory_region(struct kvm *kvm,
5658 struct kvm_memory_slot *old,
5659 const struct kvm_memory_slot *new,
5660 enum kvm_mr_change change)
5666 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5667 old->npages * PAGE_SIZE);
5670 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5671 old->npages * PAGE_SIZE);
5676 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5677 new->base_gfn * PAGE_SIZE,
5678 new->npages * PAGE_SIZE);
5680 case KVM_MR_FLAGS_ONLY:
5683 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5686 pr_warn("failed to commit memory region\n");
5690 static inline unsigned long nonhyp_mask(int i)
5692 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5694 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5697 static int __init kvm_s390_init(void)
5701 if (!sclp.has_sief2) {
5702 pr_info("SIE is not available\n");
5706 if (nested && hpage) {
5707 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5711 for (i = 0; i < 16; i++)
5712 kvm_s390_fac_base[i] |=
5713 stfle_fac_list[i] & nonhyp_mask(i);
5715 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5718 static void __exit kvm_s390_exit(void)
5723 module_init(kvm_s390_init);
5724 module_exit(kvm_s390_exit);
5727 * Enable autoloading of the kvm module.
5728 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5729 * since x86 takes a different approach.
5731 #include <linux/miscdevice.h>
5732 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5733 MODULE_ALIAS("devname:kvm");