]> Git Repo - linux.git/blob - arch/s390/kvm/kvm-s390.c
scsi: zfcp: Trace when request remove fails after qdio send fails
[linux.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <[email protected]>
8  *               Christian Borntraeger <[email protected]>
9  *               Christian Ehrhardt <[email protected]>
10  *               Jason J. Herne <[email protected]>
11  */
12
13 #define KMSG_COMPONENT "kvm-s390"
14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15
16 #include <linux/compiler.h>
17 #include <linux/err.h>
18 #include <linux/fs.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/mman.h>
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/pgtable.h>
34 #include <linux/mmu_notifier.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 #include "pci.h"
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60                            (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
63         KVM_GENERIC_VM_STATS(),
64         STATS_DESC_COUNTER(VM, inject_io),
65         STATS_DESC_COUNTER(VM, inject_float_mchk),
66         STATS_DESC_COUNTER(VM, inject_pfault_done),
67         STATS_DESC_COUNTER(VM, inject_service_signal),
68         STATS_DESC_COUNTER(VM, inject_virtio),
69         STATS_DESC_COUNTER(VM, aen_forward)
70 };
71
72 const struct kvm_stats_header kvm_vm_stats_header = {
73         .name_size = KVM_STATS_NAME_SIZE,
74         .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
75         .id_offset = sizeof(struct kvm_stats_header),
76         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
77         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
78                        sizeof(kvm_vm_stats_desc),
79 };
80
81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
82         KVM_GENERIC_VCPU_STATS(),
83         STATS_DESC_COUNTER(VCPU, exit_userspace),
84         STATS_DESC_COUNTER(VCPU, exit_null),
85         STATS_DESC_COUNTER(VCPU, exit_external_request),
86         STATS_DESC_COUNTER(VCPU, exit_io_request),
87         STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
88         STATS_DESC_COUNTER(VCPU, exit_stop_request),
89         STATS_DESC_COUNTER(VCPU, exit_validity),
90         STATS_DESC_COUNTER(VCPU, exit_instruction),
91         STATS_DESC_COUNTER(VCPU, exit_pei),
92         STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
93         STATS_DESC_COUNTER(VCPU, instruction_lctl),
94         STATS_DESC_COUNTER(VCPU, instruction_lctlg),
95         STATS_DESC_COUNTER(VCPU, instruction_stctl),
96         STATS_DESC_COUNTER(VCPU, instruction_stctg),
97         STATS_DESC_COUNTER(VCPU, exit_program_interruption),
98         STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
99         STATS_DESC_COUNTER(VCPU, exit_operation_exception),
100         STATS_DESC_COUNTER(VCPU, deliver_ckc),
101         STATS_DESC_COUNTER(VCPU, deliver_cputm),
102         STATS_DESC_COUNTER(VCPU, deliver_external_call),
103         STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
104         STATS_DESC_COUNTER(VCPU, deliver_service_signal),
105         STATS_DESC_COUNTER(VCPU, deliver_virtio),
106         STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
107         STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
108         STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
109         STATS_DESC_COUNTER(VCPU, deliver_program),
110         STATS_DESC_COUNTER(VCPU, deliver_io),
111         STATS_DESC_COUNTER(VCPU, deliver_machine_check),
112         STATS_DESC_COUNTER(VCPU, exit_wait_state),
113         STATS_DESC_COUNTER(VCPU, inject_ckc),
114         STATS_DESC_COUNTER(VCPU, inject_cputm),
115         STATS_DESC_COUNTER(VCPU, inject_external_call),
116         STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
117         STATS_DESC_COUNTER(VCPU, inject_mchk),
118         STATS_DESC_COUNTER(VCPU, inject_pfault_init),
119         STATS_DESC_COUNTER(VCPU, inject_program),
120         STATS_DESC_COUNTER(VCPU, inject_restart),
121         STATS_DESC_COUNTER(VCPU, inject_set_prefix),
122         STATS_DESC_COUNTER(VCPU, inject_stop_signal),
123         STATS_DESC_COUNTER(VCPU, instruction_epsw),
124         STATS_DESC_COUNTER(VCPU, instruction_gs),
125         STATS_DESC_COUNTER(VCPU, instruction_io_other),
126         STATS_DESC_COUNTER(VCPU, instruction_lpsw),
127         STATS_DESC_COUNTER(VCPU, instruction_lpswe),
128         STATS_DESC_COUNTER(VCPU, instruction_pfmf),
129         STATS_DESC_COUNTER(VCPU, instruction_ptff),
130         STATS_DESC_COUNTER(VCPU, instruction_sck),
131         STATS_DESC_COUNTER(VCPU, instruction_sckpf),
132         STATS_DESC_COUNTER(VCPU, instruction_stidp),
133         STATS_DESC_COUNTER(VCPU, instruction_spx),
134         STATS_DESC_COUNTER(VCPU, instruction_stpx),
135         STATS_DESC_COUNTER(VCPU, instruction_stap),
136         STATS_DESC_COUNTER(VCPU, instruction_iske),
137         STATS_DESC_COUNTER(VCPU, instruction_ri),
138         STATS_DESC_COUNTER(VCPU, instruction_rrbe),
139         STATS_DESC_COUNTER(VCPU, instruction_sske),
140         STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
141         STATS_DESC_COUNTER(VCPU, instruction_stsi),
142         STATS_DESC_COUNTER(VCPU, instruction_stfl),
143         STATS_DESC_COUNTER(VCPU, instruction_tb),
144         STATS_DESC_COUNTER(VCPU, instruction_tpi),
145         STATS_DESC_COUNTER(VCPU, instruction_tprot),
146         STATS_DESC_COUNTER(VCPU, instruction_tsch),
147         STATS_DESC_COUNTER(VCPU, instruction_sie),
148         STATS_DESC_COUNTER(VCPU, instruction_essa),
149         STATS_DESC_COUNTER(VCPU, instruction_sthyi),
150         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
151         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
152         STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
153         STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
154         STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
155         STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
156         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
157         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
158         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
159         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
160         STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
161         STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
162         STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
163         STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
164         STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
165         STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
166         STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
167         STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
168         STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
169         STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
170         STATS_DESC_COUNTER(VCPU, diag_9c_forward),
171         STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
172         STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
173         STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
174         STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
175         STATS_DESC_COUNTER(VCPU, pfault_sync)
176 };
177
178 const struct kvm_stats_header kvm_vcpu_stats_header = {
179         .name_size = KVM_STATS_NAME_SIZE,
180         .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
181         .id_offset = sizeof(struct kvm_stats_header),
182         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
183         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
184                        sizeof(kvm_vcpu_stats_desc),
185 };
186
187 /* allow nested virtualization in KVM (if enabled by user space) */
188 static int nested;
189 module_param(nested, int, S_IRUGO);
190 MODULE_PARM_DESC(nested, "Nested virtualization support");
191
192 /* allow 1m huge page guest backing, if !nested */
193 static int hpage;
194 module_param(hpage, int, 0444);
195 MODULE_PARM_DESC(hpage, "1m huge page backing support");
196
197 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
198 static u8 halt_poll_max_steal = 10;
199 module_param(halt_poll_max_steal, byte, 0644);
200 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
201
202 /* if set to true, the GISA will be initialized and used if available */
203 static bool use_gisa  = true;
204 module_param(use_gisa, bool, 0644);
205 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
206
207 /* maximum diag9c forwarding per second */
208 unsigned int diag9c_forwarding_hz;
209 module_param(diag9c_forwarding_hz, uint, 0644);
210 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
211
212 /*
213  * allow asynchronous deinit for protected guests; enable by default since
214  * the feature is opt-in anyway
215  */
216 static int async_destroy = 1;
217 module_param(async_destroy, int, 0444);
218 MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
219
220 /*
221  * For now we handle at most 16 double words as this is what the s390 base
222  * kernel handles and stores in the prefix page. If we ever need to go beyond
223  * this, this requires changes to code, but the external uapi can stay.
224  */
225 #define SIZE_INTERNAL 16
226
227 /*
228  * Base feature mask that defines default mask for facilities. Consists of the
229  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
230  */
231 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
232 /*
233  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
234  * and defines the facilities that can be enabled via a cpu model.
235  */
236 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
237
238 static unsigned long kvm_s390_fac_size(void)
239 {
240         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
241         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
242         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
243                 sizeof(stfle_fac_list));
244
245         return SIZE_INTERNAL;
246 }
247
248 /* available cpu features supported by kvm */
249 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
250 /* available subfunctions indicated via query / "test bit" */
251 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
252
253 static struct gmap_notifier gmap_notifier;
254 static struct gmap_notifier vsie_gmap_notifier;
255 debug_info_t *kvm_s390_dbf;
256 debug_info_t *kvm_s390_dbf_uv;
257
258 /* Section: not file related */
259 int kvm_arch_hardware_enable(void)
260 {
261         /* every s390 is virtualization enabled ;-) */
262         return 0;
263 }
264
265 int kvm_arch_check_processor_compat(void *opaque)
266 {
267         return 0;
268 }
269
270 /* forward declarations */
271 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
272                               unsigned long end);
273 static int sca_switch_to_extended(struct kvm *kvm);
274
275 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
276 {
277         u8 delta_idx = 0;
278
279         /*
280          * The TOD jumps by delta, we have to compensate this by adding
281          * -delta to the epoch.
282          */
283         delta = -delta;
284
285         /* sign-extension - we're adding to signed values below */
286         if ((s64)delta < 0)
287                 delta_idx = -1;
288
289         scb->epoch += delta;
290         if (scb->ecd & ECD_MEF) {
291                 scb->epdx += delta_idx;
292                 if (scb->epoch < delta)
293                         scb->epdx += 1;
294         }
295 }
296
297 /*
298  * This callback is executed during stop_machine(). All CPUs are therefore
299  * temporarily stopped. In order not to change guest behavior, we have to
300  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
301  * so a CPU won't be stopped while calculating with the epoch.
302  */
303 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
304                           void *v)
305 {
306         struct kvm *kvm;
307         struct kvm_vcpu *vcpu;
308         unsigned long i;
309         unsigned long long *delta = v;
310
311         list_for_each_entry(kvm, &vm_list, vm_list) {
312                 kvm_for_each_vcpu(i, vcpu, kvm) {
313                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
314                         if (i == 0) {
315                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
316                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
317                         }
318                         if (vcpu->arch.cputm_enabled)
319                                 vcpu->arch.cputm_start += *delta;
320                         if (vcpu->arch.vsie_block)
321                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
322                                                    *delta);
323                 }
324         }
325         return NOTIFY_OK;
326 }
327
328 static struct notifier_block kvm_clock_notifier = {
329         .notifier_call = kvm_clock_sync,
330 };
331
332 int kvm_arch_hardware_setup(void *opaque)
333 {
334         gmap_notifier.notifier_call = kvm_gmap_notifier;
335         gmap_register_pte_notifier(&gmap_notifier);
336         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
337         gmap_register_pte_notifier(&vsie_gmap_notifier);
338         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
339                                        &kvm_clock_notifier);
340         return 0;
341 }
342
343 void kvm_arch_hardware_unsetup(void)
344 {
345         gmap_unregister_pte_notifier(&gmap_notifier);
346         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
347         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
348                                          &kvm_clock_notifier);
349 }
350
351 static void allow_cpu_feat(unsigned long nr)
352 {
353         set_bit_inv(nr, kvm_s390_available_cpu_feat);
354 }
355
356 static inline int plo_test_bit(unsigned char nr)
357 {
358         unsigned long function = (unsigned long)nr | 0x100;
359         int cc;
360
361         asm volatile(
362                 "       lgr     0,%[function]\n"
363                 /* Parameter registers are ignored for "test bit" */
364                 "       plo     0,0,0,0(0)\n"
365                 "       ipm     %0\n"
366                 "       srl     %0,28\n"
367                 : "=d" (cc)
368                 : [function] "d" (function)
369                 : "cc", "0");
370         return cc == 0;
371 }
372
373 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
374 {
375         asm volatile(
376                 "       lghi    0,0\n"
377                 "       lgr     1,%[query]\n"
378                 /* Parameter registers are ignored */
379                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
380                 :
381                 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
382                 : "cc", "memory", "0", "1");
383 }
384
385 #define INSN_SORTL 0xb938
386 #define INSN_DFLTCC 0xb939
387
388 static void kvm_s390_cpu_feat_init(void)
389 {
390         int i;
391
392         for (i = 0; i < 256; ++i) {
393                 if (plo_test_bit(i))
394                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
395         }
396
397         if (test_facility(28)) /* TOD-clock steering */
398                 ptff(kvm_s390_available_subfunc.ptff,
399                      sizeof(kvm_s390_available_subfunc.ptff),
400                      PTFF_QAF);
401
402         if (test_facility(17)) { /* MSA */
403                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
404                               kvm_s390_available_subfunc.kmac);
405                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
406                               kvm_s390_available_subfunc.kmc);
407                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
408                               kvm_s390_available_subfunc.km);
409                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
410                               kvm_s390_available_subfunc.kimd);
411                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
412                               kvm_s390_available_subfunc.klmd);
413         }
414         if (test_facility(76)) /* MSA3 */
415                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
416                               kvm_s390_available_subfunc.pckmo);
417         if (test_facility(77)) { /* MSA4 */
418                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
419                               kvm_s390_available_subfunc.kmctr);
420                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
421                               kvm_s390_available_subfunc.kmf);
422                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
423                               kvm_s390_available_subfunc.kmo);
424                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
425                               kvm_s390_available_subfunc.pcc);
426         }
427         if (test_facility(57)) /* MSA5 */
428                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
429                               kvm_s390_available_subfunc.ppno);
430
431         if (test_facility(146)) /* MSA8 */
432                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
433                               kvm_s390_available_subfunc.kma);
434
435         if (test_facility(155)) /* MSA9 */
436                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
437                               kvm_s390_available_subfunc.kdsa);
438
439         if (test_facility(150)) /* SORTL */
440                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
441
442         if (test_facility(151)) /* DFLTCC */
443                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
444
445         if (MACHINE_HAS_ESOP)
446                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
447         /*
448          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
449          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
450          */
451         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
452             !test_facility(3) || !nested)
453                 return;
454         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
455         if (sclp.has_64bscao)
456                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
457         if (sclp.has_siif)
458                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
459         if (sclp.has_gpere)
460                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
461         if (sclp.has_gsls)
462                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
463         if (sclp.has_ib)
464                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
465         if (sclp.has_cei)
466                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
467         if (sclp.has_ibs)
468                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
469         if (sclp.has_kss)
470                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
471         /*
472          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
473          * all skey handling functions read/set the skey from the PGSTE
474          * instead of the real storage key.
475          *
476          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
477          * pages being detected as preserved although they are resident.
478          *
479          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
480          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
481          *
482          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
483          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
484          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
485          *
486          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
487          * cannot easily shadow the SCA because of the ipte lock.
488          */
489 }
490
491 int kvm_arch_init(void *opaque)
492 {
493         int rc = -ENOMEM;
494
495         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
496         if (!kvm_s390_dbf)
497                 return -ENOMEM;
498
499         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
500         if (!kvm_s390_dbf_uv)
501                 goto out;
502
503         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
504             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
505                 goto out;
506
507         kvm_s390_cpu_feat_init();
508
509         /* Register floating interrupt controller interface. */
510         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
511         if (rc) {
512                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
513                 goto out;
514         }
515
516         if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
517                 rc = kvm_s390_pci_init();
518                 if (rc) {
519                         pr_err("Unable to allocate AIFT for PCI\n");
520                         goto out;
521                 }
522         }
523
524         rc = kvm_s390_gib_init(GAL_ISC);
525         if (rc)
526                 goto out;
527
528         return 0;
529
530 out:
531         kvm_arch_exit();
532         return rc;
533 }
534
535 void kvm_arch_exit(void)
536 {
537         kvm_s390_gib_destroy();
538         if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
539                 kvm_s390_pci_exit();
540         debug_unregister(kvm_s390_dbf);
541         debug_unregister(kvm_s390_dbf_uv);
542 }
543
544 /* Section: device related */
545 long kvm_arch_dev_ioctl(struct file *filp,
546                         unsigned int ioctl, unsigned long arg)
547 {
548         if (ioctl == KVM_S390_ENABLE_SIE)
549                 return s390_enable_sie();
550         return -EINVAL;
551 }
552
553 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
554 {
555         int r;
556
557         switch (ext) {
558         case KVM_CAP_S390_PSW:
559         case KVM_CAP_S390_GMAP:
560         case KVM_CAP_SYNC_MMU:
561 #ifdef CONFIG_KVM_S390_UCONTROL
562         case KVM_CAP_S390_UCONTROL:
563 #endif
564         case KVM_CAP_ASYNC_PF:
565         case KVM_CAP_SYNC_REGS:
566         case KVM_CAP_ONE_REG:
567         case KVM_CAP_ENABLE_CAP:
568         case KVM_CAP_S390_CSS_SUPPORT:
569         case KVM_CAP_IOEVENTFD:
570         case KVM_CAP_DEVICE_CTRL:
571         case KVM_CAP_S390_IRQCHIP:
572         case KVM_CAP_VM_ATTRIBUTES:
573         case KVM_CAP_MP_STATE:
574         case KVM_CAP_IMMEDIATE_EXIT:
575         case KVM_CAP_S390_INJECT_IRQ:
576         case KVM_CAP_S390_USER_SIGP:
577         case KVM_CAP_S390_USER_STSI:
578         case KVM_CAP_S390_SKEYS:
579         case KVM_CAP_S390_IRQ_STATE:
580         case KVM_CAP_S390_USER_INSTR0:
581         case KVM_CAP_S390_CMMA_MIGRATION:
582         case KVM_CAP_S390_AIS:
583         case KVM_CAP_S390_AIS_MIGRATION:
584         case KVM_CAP_S390_VCPU_RESETS:
585         case KVM_CAP_SET_GUEST_DEBUG:
586         case KVM_CAP_S390_DIAG318:
587         case KVM_CAP_S390_MEM_OP_EXTENSION:
588                 r = 1;
589                 break;
590         case KVM_CAP_SET_GUEST_DEBUG2:
591                 r = KVM_GUESTDBG_VALID_MASK;
592                 break;
593         case KVM_CAP_S390_HPAGE_1M:
594                 r = 0;
595                 if (hpage && !kvm_is_ucontrol(kvm))
596                         r = 1;
597                 break;
598         case KVM_CAP_S390_MEM_OP:
599                 r = MEM_OP_MAX_SIZE;
600                 break;
601         case KVM_CAP_NR_VCPUS:
602         case KVM_CAP_MAX_VCPUS:
603         case KVM_CAP_MAX_VCPU_ID:
604                 r = KVM_S390_BSCA_CPU_SLOTS;
605                 if (!kvm_s390_use_sca_entries())
606                         r = KVM_MAX_VCPUS;
607                 else if (sclp.has_esca && sclp.has_64bscao)
608                         r = KVM_S390_ESCA_CPU_SLOTS;
609                 if (ext == KVM_CAP_NR_VCPUS)
610                         r = min_t(unsigned int, num_online_cpus(), r);
611                 break;
612         case KVM_CAP_S390_COW:
613                 r = MACHINE_HAS_ESOP;
614                 break;
615         case KVM_CAP_S390_VECTOR_REGISTERS:
616                 r = MACHINE_HAS_VX;
617                 break;
618         case KVM_CAP_S390_RI:
619                 r = test_facility(64);
620                 break;
621         case KVM_CAP_S390_GS:
622                 r = test_facility(133);
623                 break;
624         case KVM_CAP_S390_BPB:
625                 r = test_facility(82);
626                 break;
627         case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
628                 r = async_destroy && is_prot_virt_host();
629                 break;
630         case KVM_CAP_S390_PROTECTED:
631                 r = is_prot_virt_host();
632                 break;
633         case KVM_CAP_S390_PROTECTED_DUMP: {
634                 u64 pv_cmds_dump[] = {
635                         BIT_UVC_CMD_DUMP_INIT,
636                         BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
637                         BIT_UVC_CMD_DUMP_CPU,
638                         BIT_UVC_CMD_DUMP_COMPLETE,
639                 };
640                 int i;
641
642                 r = is_prot_virt_host();
643
644                 for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
645                         if (!test_bit_inv(pv_cmds_dump[i],
646                                           (unsigned long *)&uv_info.inst_calls_list)) {
647                                 r = 0;
648                                 break;
649                         }
650                 }
651                 break;
652         }
653         case KVM_CAP_S390_ZPCI_OP:
654                 r = kvm_s390_pci_interp_allowed();
655                 break;
656         case KVM_CAP_S390_CPU_TOPOLOGY:
657                 r = test_facility(11);
658                 break;
659         default:
660                 r = 0;
661         }
662         return r;
663 }
664
665 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
666 {
667         int i;
668         gfn_t cur_gfn, last_gfn;
669         unsigned long gaddr, vmaddr;
670         struct gmap *gmap = kvm->arch.gmap;
671         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
672
673         /* Loop over all guest segments */
674         cur_gfn = memslot->base_gfn;
675         last_gfn = memslot->base_gfn + memslot->npages;
676         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
677                 gaddr = gfn_to_gpa(cur_gfn);
678                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
679                 if (kvm_is_error_hva(vmaddr))
680                         continue;
681
682                 bitmap_zero(bitmap, _PAGE_ENTRIES);
683                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
684                 for (i = 0; i < _PAGE_ENTRIES; i++) {
685                         if (test_bit(i, bitmap))
686                                 mark_page_dirty(kvm, cur_gfn + i);
687                 }
688
689                 if (fatal_signal_pending(current))
690                         return;
691                 cond_resched();
692         }
693 }
694
695 /* Section: vm related */
696 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
697
698 /*
699  * Get (and clear) the dirty memory log for a memory slot.
700  */
701 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
702                                struct kvm_dirty_log *log)
703 {
704         int r;
705         unsigned long n;
706         struct kvm_memory_slot *memslot;
707         int is_dirty;
708
709         if (kvm_is_ucontrol(kvm))
710                 return -EINVAL;
711
712         mutex_lock(&kvm->slots_lock);
713
714         r = -EINVAL;
715         if (log->slot >= KVM_USER_MEM_SLOTS)
716                 goto out;
717
718         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
719         if (r)
720                 goto out;
721
722         /* Clear the dirty log */
723         if (is_dirty) {
724                 n = kvm_dirty_bitmap_bytes(memslot);
725                 memset(memslot->dirty_bitmap, 0, n);
726         }
727         r = 0;
728 out:
729         mutex_unlock(&kvm->slots_lock);
730         return r;
731 }
732
733 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
734 {
735         unsigned long i;
736         struct kvm_vcpu *vcpu;
737
738         kvm_for_each_vcpu(i, vcpu, kvm) {
739                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
740         }
741 }
742
743 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
744 {
745         int r;
746
747         if (cap->flags)
748                 return -EINVAL;
749
750         switch (cap->cap) {
751         case KVM_CAP_S390_IRQCHIP:
752                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
753                 kvm->arch.use_irqchip = 1;
754                 r = 0;
755                 break;
756         case KVM_CAP_S390_USER_SIGP:
757                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
758                 kvm->arch.user_sigp = 1;
759                 r = 0;
760                 break;
761         case KVM_CAP_S390_VECTOR_REGISTERS:
762                 mutex_lock(&kvm->lock);
763                 if (kvm->created_vcpus) {
764                         r = -EBUSY;
765                 } else if (MACHINE_HAS_VX) {
766                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
767                         set_kvm_facility(kvm->arch.model.fac_list, 129);
768                         if (test_facility(134)) {
769                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
770                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
771                         }
772                         if (test_facility(135)) {
773                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
774                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
775                         }
776                         if (test_facility(148)) {
777                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
778                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
779                         }
780                         if (test_facility(152)) {
781                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
782                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
783                         }
784                         if (test_facility(192)) {
785                                 set_kvm_facility(kvm->arch.model.fac_mask, 192);
786                                 set_kvm_facility(kvm->arch.model.fac_list, 192);
787                         }
788                         r = 0;
789                 } else
790                         r = -EINVAL;
791                 mutex_unlock(&kvm->lock);
792                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
793                          r ? "(not available)" : "(success)");
794                 break;
795         case KVM_CAP_S390_RI:
796                 r = -EINVAL;
797                 mutex_lock(&kvm->lock);
798                 if (kvm->created_vcpus) {
799                         r = -EBUSY;
800                 } else if (test_facility(64)) {
801                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
802                         set_kvm_facility(kvm->arch.model.fac_list, 64);
803                         r = 0;
804                 }
805                 mutex_unlock(&kvm->lock);
806                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
807                          r ? "(not available)" : "(success)");
808                 break;
809         case KVM_CAP_S390_AIS:
810                 mutex_lock(&kvm->lock);
811                 if (kvm->created_vcpus) {
812                         r = -EBUSY;
813                 } else {
814                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
815                         set_kvm_facility(kvm->arch.model.fac_list, 72);
816                         r = 0;
817                 }
818                 mutex_unlock(&kvm->lock);
819                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
820                          r ? "(not available)" : "(success)");
821                 break;
822         case KVM_CAP_S390_GS:
823                 r = -EINVAL;
824                 mutex_lock(&kvm->lock);
825                 if (kvm->created_vcpus) {
826                         r = -EBUSY;
827                 } else if (test_facility(133)) {
828                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
829                         set_kvm_facility(kvm->arch.model.fac_list, 133);
830                         r = 0;
831                 }
832                 mutex_unlock(&kvm->lock);
833                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
834                          r ? "(not available)" : "(success)");
835                 break;
836         case KVM_CAP_S390_HPAGE_1M:
837                 mutex_lock(&kvm->lock);
838                 if (kvm->created_vcpus)
839                         r = -EBUSY;
840                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
841                         r = -EINVAL;
842                 else {
843                         r = 0;
844                         mmap_write_lock(kvm->mm);
845                         kvm->mm->context.allow_gmap_hpage_1m = 1;
846                         mmap_write_unlock(kvm->mm);
847                         /*
848                          * We might have to create fake 4k page
849                          * tables. To avoid that the hardware works on
850                          * stale PGSTEs, we emulate these instructions.
851                          */
852                         kvm->arch.use_skf = 0;
853                         kvm->arch.use_pfmfi = 0;
854                 }
855                 mutex_unlock(&kvm->lock);
856                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
857                          r ? "(not available)" : "(success)");
858                 break;
859         case KVM_CAP_S390_USER_STSI:
860                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
861                 kvm->arch.user_stsi = 1;
862                 r = 0;
863                 break;
864         case KVM_CAP_S390_USER_INSTR0:
865                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
866                 kvm->arch.user_instr0 = 1;
867                 icpt_operexc_on_all_vcpus(kvm);
868                 r = 0;
869                 break;
870         case KVM_CAP_S390_CPU_TOPOLOGY:
871                 r = -EINVAL;
872                 mutex_lock(&kvm->lock);
873                 if (kvm->created_vcpus) {
874                         r = -EBUSY;
875                 } else if (test_facility(11)) {
876                         set_kvm_facility(kvm->arch.model.fac_mask, 11);
877                         set_kvm_facility(kvm->arch.model.fac_list, 11);
878                         r = 0;
879                 }
880                 mutex_unlock(&kvm->lock);
881                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
882                          r ? "(not available)" : "(success)");
883                 break;
884         default:
885                 r = -EINVAL;
886                 break;
887         }
888         return r;
889 }
890
891 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
892 {
893         int ret;
894
895         switch (attr->attr) {
896         case KVM_S390_VM_MEM_LIMIT_SIZE:
897                 ret = 0;
898                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
899                          kvm->arch.mem_limit);
900                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
901                         ret = -EFAULT;
902                 break;
903         default:
904                 ret = -ENXIO;
905                 break;
906         }
907         return ret;
908 }
909
910 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
911 {
912         int ret;
913         unsigned int idx;
914         switch (attr->attr) {
915         case KVM_S390_VM_MEM_ENABLE_CMMA:
916                 ret = -ENXIO;
917                 if (!sclp.has_cmma)
918                         break;
919
920                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
921                 mutex_lock(&kvm->lock);
922                 if (kvm->created_vcpus)
923                         ret = -EBUSY;
924                 else if (kvm->mm->context.allow_gmap_hpage_1m)
925                         ret = -EINVAL;
926                 else {
927                         kvm->arch.use_cmma = 1;
928                         /* Not compatible with cmma. */
929                         kvm->arch.use_pfmfi = 0;
930                         ret = 0;
931                 }
932                 mutex_unlock(&kvm->lock);
933                 break;
934         case KVM_S390_VM_MEM_CLR_CMMA:
935                 ret = -ENXIO;
936                 if (!sclp.has_cmma)
937                         break;
938                 ret = -EINVAL;
939                 if (!kvm->arch.use_cmma)
940                         break;
941
942                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
943                 mutex_lock(&kvm->lock);
944                 idx = srcu_read_lock(&kvm->srcu);
945                 s390_reset_cmma(kvm->arch.gmap->mm);
946                 srcu_read_unlock(&kvm->srcu, idx);
947                 mutex_unlock(&kvm->lock);
948                 ret = 0;
949                 break;
950         case KVM_S390_VM_MEM_LIMIT_SIZE: {
951                 unsigned long new_limit;
952
953                 if (kvm_is_ucontrol(kvm))
954                         return -EINVAL;
955
956                 if (get_user(new_limit, (u64 __user *)attr->addr))
957                         return -EFAULT;
958
959                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
960                     new_limit > kvm->arch.mem_limit)
961                         return -E2BIG;
962
963                 if (!new_limit)
964                         return -EINVAL;
965
966                 /* gmap_create takes last usable address */
967                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
968                         new_limit -= 1;
969
970                 ret = -EBUSY;
971                 mutex_lock(&kvm->lock);
972                 if (!kvm->created_vcpus) {
973                         /* gmap_create will round the limit up */
974                         struct gmap *new = gmap_create(current->mm, new_limit);
975
976                         if (!new) {
977                                 ret = -ENOMEM;
978                         } else {
979                                 gmap_remove(kvm->arch.gmap);
980                                 new->private = kvm;
981                                 kvm->arch.gmap = new;
982                                 ret = 0;
983                         }
984                 }
985                 mutex_unlock(&kvm->lock);
986                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
987                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
988                          (void *) kvm->arch.gmap->asce);
989                 break;
990         }
991         default:
992                 ret = -ENXIO;
993                 break;
994         }
995         return ret;
996 }
997
998 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
999
1000 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
1001 {
1002         struct kvm_vcpu *vcpu;
1003         unsigned long i;
1004
1005         kvm_s390_vcpu_block_all(kvm);
1006
1007         kvm_for_each_vcpu(i, vcpu, kvm) {
1008                 kvm_s390_vcpu_crypto_setup(vcpu);
1009                 /* recreate the shadow crycb by leaving the VSIE handler */
1010                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1011         }
1012
1013         kvm_s390_vcpu_unblock_all(kvm);
1014 }
1015
1016 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1017 {
1018         mutex_lock(&kvm->lock);
1019         switch (attr->attr) {
1020         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1021                 if (!test_kvm_facility(kvm, 76)) {
1022                         mutex_unlock(&kvm->lock);
1023                         return -EINVAL;
1024                 }
1025                 get_random_bytes(
1026                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1027                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1028                 kvm->arch.crypto.aes_kw = 1;
1029                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1030                 break;
1031         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1032                 if (!test_kvm_facility(kvm, 76)) {
1033                         mutex_unlock(&kvm->lock);
1034                         return -EINVAL;
1035                 }
1036                 get_random_bytes(
1037                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1038                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1039                 kvm->arch.crypto.dea_kw = 1;
1040                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1041                 break;
1042         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1043                 if (!test_kvm_facility(kvm, 76)) {
1044                         mutex_unlock(&kvm->lock);
1045                         return -EINVAL;
1046                 }
1047                 kvm->arch.crypto.aes_kw = 0;
1048                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1049                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1050                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1051                 break;
1052         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1053                 if (!test_kvm_facility(kvm, 76)) {
1054                         mutex_unlock(&kvm->lock);
1055                         return -EINVAL;
1056                 }
1057                 kvm->arch.crypto.dea_kw = 0;
1058                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1059                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1060                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1061                 break;
1062         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1063                 if (!ap_instructions_available()) {
1064                         mutex_unlock(&kvm->lock);
1065                         return -EOPNOTSUPP;
1066                 }
1067                 kvm->arch.crypto.apie = 1;
1068                 break;
1069         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1070                 if (!ap_instructions_available()) {
1071                         mutex_unlock(&kvm->lock);
1072                         return -EOPNOTSUPP;
1073                 }
1074                 kvm->arch.crypto.apie = 0;
1075                 break;
1076         default:
1077                 mutex_unlock(&kvm->lock);
1078                 return -ENXIO;
1079         }
1080
1081         kvm_s390_vcpu_crypto_reset_all(kvm);
1082         mutex_unlock(&kvm->lock);
1083         return 0;
1084 }
1085
1086 static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1087 {
1088         /* Only set the ECB bits after guest requests zPCI interpretation */
1089         if (!vcpu->kvm->arch.use_zpci_interp)
1090                 return;
1091
1092         vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1093         vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1094 }
1095
1096 void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1097 {
1098         struct kvm_vcpu *vcpu;
1099         unsigned long i;
1100
1101         lockdep_assert_held(&kvm->lock);
1102
1103         if (!kvm_s390_pci_interp_allowed())
1104                 return;
1105
1106         /*
1107          * If host is configured for PCI and the necessary facilities are
1108          * available, turn on interpretation for the life of this guest
1109          */
1110         kvm->arch.use_zpci_interp = 1;
1111
1112         kvm_s390_vcpu_block_all(kvm);
1113
1114         kvm_for_each_vcpu(i, vcpu, kvm) {
1115                 kvm_s390_vcpu_pci_setup(vcpu);
1116                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1117         }
1118
1119         kvm_s390_vcpu_unblock_all(kvm);
1120 }
1121
1122 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1123 {
1124         unsigned long cx;
1125         struct kvm_vcpu *vcpu;
1126
1127         kvm_for_each_vcpu(cx, vcpu, kvm)
1128                 kvm_s390_sync_request(req, vcpu);
1129 }
1130
1131 /*
1132  * Must be called with kvm->srcu held to avoid races on memslots, and with
1133  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1134  */
1135 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1136 {
1137         struct kvm_memory_slot *ms;
1138         struct kvm_memslots *slots;
1139         unsigned long ram_pages = 0;
1140         int bkt;
1141
1142         /* migration mode already enabled */
1143         if (kvm->arch.migration_mode)
1144                 return 0;
1145         slots = kvm_memslots(kvm);
1146         if (!slots || kvm_memslots_empty(slots))
1147                 return -EINVAL;
1148
1149         if (!kvm->arch.use_cmma) {
1150                 kvm->arch.migration_mode = 1;
1151                 return 0;
1152         }
1153         /* mark all the pages in active slots as dirty */
1154         kvm_for_each_memslot(ms, bkt, slots) {
1155                 if (!ms->dirty_bitmap)
1156                         return -EINVAL;
1157                 /*
1158                  * The second half of the bitmap is only used on x86,
1159                  * and would be wasted otherwise, so we put it to good
1160                  * use here to keep track of the state of the storage
1161                  * attributes.
1162                  */
1163                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1164                 ram_pages += ms->npages;
1165         }
1166         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1167         kvm->arch.migration_mode = 1;
1168         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1169         return 0;
1170 }
1171
1172 /*
1173  * Must be called with kvm->slots_lock to avoid races with ourselves and
1174  * kvm_s390_vm_start_migration.
1175  */
1176 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1177 {
1178         /* migration mode already disabled */
1179         if (!kvm->arch.migration_mode)
1180                 return 0;
1181         kvm->arch.migration_mode = 0;
1182         if (kvm->arch.use_cmma)
1183                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1184         return 0;
1185 }
1186
1187 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1188                                      struct kvm_device_attr *attr)
1189 {
1190         int res = -ENXIO;
1191
1192         mutex_lock(&kvm->slots_lock);
1193         switch (attr->attr) {
1194         case KVM_S390_VM_MIGRATION_START:
1195                 res = kvm_s390_vm_start_migration(kvm);
1196                 break;
1197         case KVM_S390_VM_MIGRATION_STOP:
1198                 res = kvm_s390_vm_stop_migration(kvm);
1199                 break;
1200         default:
1201                 break;
1202         }
1203         mutex_unlock(&kvm->slots_lock);
1204
1205         return res;
1206 }
1207
1208 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1209                                      struct kvm_device_attr *attr)
1210 {
1211         u64 mig = kvm->arch.migration_mode;
1212
1213         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1214                 return -ENXIO;
1215
1216         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1217                 return -EFAULT;
1218         return 0;
1219 }
1220
1221 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1222
1223 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1224 {
1225         struct kvm_s390_vm_tod_clock gtod;
1226
1227         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1228                 return -EFAULT;
1229
1230         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1231                 return -EINVAL;
1232         __kvm_s390_set_tod_clock(kvm, &gtod);
1233
1234         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1235                 gtod.epoch_idx, gtod.tod);
1236
1237         return 0;
1238 }
1239
1240 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1241 {
1242         u8 gtod_high;
1243
1244         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1245                                            sizeof(gtod_high)))
1246                 return -EFAULT;
1247
1248         if (gtod_high != 0)
1249                 return -EINVAL;
1250         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1251
1252         return 0;
1253 }
1254
1255 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1256 {
1257         struct kvm_s390_vm_tod_clock gtod = { 0 };
1258
1259         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1260                            sizeof(gtod.tod)))
1261                 return -EFAULT;
1262
1263         __kvm_s390_set_tod_clock(kvm, &gtod);
1264         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1265         return 0;
1266 }
1267
1268 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1269 {
1270         int ret;
1271
1272         if (attr->flags)
1273                 return -EINVAL;
1274
1275         mutex_lock(&kvm->lock);
1276         /*
1277          * For protected guests, the TOD is managed by the ultravisor, so trying
1278          * to change it will never bring the expected results.
1279          */
1280         if (kvm_s390_pv_is_protected(kvm)) {
1281                 ret = -EOPNOTSUPP;
1282                 goto out_unlock;
1283         }
1284
1285         switch (attr->attr) {
1286         case KVM_S390_VM_TOD_EXT:
1287                 ret = kvm_s390_set_tod_ext(kvm, attr);
1288                 break;
1289         case KVM_S390_VM_TOD_HIGH:
1290                 ret = kvm_s390_set_tod_high(kvm, attr);
1291                 break;
1292         case KVM_S390_VM_TOD_LOW:
1293                 ret = kvm_s390_set_tod_low(kvm, attr);
1294                 break;
1295         default:
1296                 ret = -ENXIO;
1297                 break;
1298         }
1299
1300 out_unlock:
1301         mutex_unlock(&kvm->lock);
1302         return ret;
1303 }
1304
1305 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1306                                    struct kvm_s390_vm_tod_clock *gtod)
1307 {
1308         union tod_clock clk;
1309
1310         preempt_disable();
1311
1312         store_tod_clock_ext(&clk);
1313
1314         gtod->tod = clk.tod + kvm->arch.epoch;
1315         gtod->epoch_idx = 0;
1316         if (test_kvm_facility(kvm, 139)) {
1317                 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1318                 if (gtod->tod < clk.tod)
1319                         gtod->epoch_idx += 1;
1320         }
1321
1322         preempt_enable();
1323 }
1324
1325 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1326 {
1327         struct kvm_s390_vm_tod_clock gtod;
1328
1329         memset(&gtod, 0, sizeof(gtod));
1330         kvm_s390_get_tod_clock(kvm, &gtod);
1331         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1332                 return -EFAULT;
1333
1334         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1335                 gtod.epoch_idx, gtod.tod);
1336         return 0;
1337 }
1338
1339 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1340 {
1341         u8 gtod_high = 0;
1342
1343         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1344                                          sizeof(gtod_high)))
1345                 return -EFAULT;
1346         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1347
1348         return 0;
1349 }
1350
1351 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1352 {
1353         u64 gtod;
1354
1355         gtod = kvm_s390_get_tod_clock_fast(kvm);
1356         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1357                 return -EFAULT;
1358         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1359
1360         return 0;
1361 }
1362
1363 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1364 {
1365         int ret;
1366
1367         if (attr->flags)
1368                 return -EINVAL;
1369
1370         switch (attr->attr) {
1371         case KVM_S390_VM_TOD_EXT:
1372                 ret = kvm_s390_get_tod_ext(kvm, attr);
1373                 break;
1374         case KVM_S390_VM_TOD_HIGH:
1375                 ret = kvm_s390_get_tod_high(kvm, attr);
1376                 break;
1377         case KVM_S390_VM_TOD_LOW:
1378                 ret = kvm_s390_get_tod_low(kvm, attr);
1379                 break;
1380         default:
1381                 ret = -ENXIO;
1382                 break;
1383         }
1384         return ret;
1385 }
1386
1387 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1388 {
1389         struct kvm_s390_vm_cpu_processor *proc;
1390         u16 lowest_ibc, unblocked_ibc;
1391         int ret = 0;
1392
1393         mutex_lock(&kvm->lock);
1394         if (kvm->created_vcpus) {
1395                 ret = -EBUSY;
1396                 goto out;
1397         }
1398         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1399         if (!proc) {
1400                 ret = -ENOMEM;
1401                 goto out;
1402         }
1403         if (!copy_from_user(proc, (void __user *)attr->addr,
1404                             sizeof(*proc))) {
1405                 kvm->arch.model.cpuid = proc->cpuid;
1406                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1407                 unblocked_ibc = sclp.ibc & 0xfff;
1408                 if (lowest_ibc && proc->ibc) {
1409                         if (proc->ibc > unblocked_ibc)
1410                                 kvm->arch.model.ibc = unblocked_ibc;
1411                         else if (proc->ibc < lowest_ibc)
1412                                 kvm->arch.model.ibc = lowest_ibc;
1413                         else
1414                                 kvm->arch.model.ibc = proc->ibc;
1415                 }
1416                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1417                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1418                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1419                          kvm->arch.model.ibc,
1420                          kvm->arch.model.cpuid);
1421                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1422                          kvm->arch.model.fac_list[0],
1423                          kvm->arch.model.fac_list[1],
1424                          kvm->arch.model.fac_list[2]);
1425         } else
1426                 ret = -EFAULT;
1427         kfree(proc);
1428 out:
1429         mutex_unlock(&kvm->lock);
1430         return ret;
1431 }
1432
1433 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1434                                        struct kvm_device_attr *attr)
1435 {
1436         struct kvm_s390_vm_cpu_feat data;
1437
1438         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1439                 return -EFAULT;
1440         if (!bitmap_subset((unsigned long *) data.feat,
1441                            kvm_s390_available_cpu_feat,
1442                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1443                 return -EINVAL;
1444
1445         mutex_lock(&kvm->lock);
1446         if (kvm->created_vcpus) {
1447                 mutex_unlock(&kvm->lock);
1448                 return -EBUSY;
1449         }
1450         bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1451         mutex_unlock(&kvm->lock);
1452         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1453                          data.feat[0],
1454                          data.feat[1],
1455                          data.feat[2]);
1456         return 0;
1457 }
1458
1459 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1460                                           struct kvm_device_attr *attr)
1461 {
1462         mutex_lock(&kvm->lock);
1463         if (kvm->created_vcpus) {
1464                 mutex_unlock(&kvm->lock);
1465                 return -EBUSY;
1466         }
1467
1468         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1469                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1470                 mutex_unlock(&kvm->lock);
1471                 return -EFAULT;
1472         }
1473         mutex_unlock(&kvm->lock);
1474
1475         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1476                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1477                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1478                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1479                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1480         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1481                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1482                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1483         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1484                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1485                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1486         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1487                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1488                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1489         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1490                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1491                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1492         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1493                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1494                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1495         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1496                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1497                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1498         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1499                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1500                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1501         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1502                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1503                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1504         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1505                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1506                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1507         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1508                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1509                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1510         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1511                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1512                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1513         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1514                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1515                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1516         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1519         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1520                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1522         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1525                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1526                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1527         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1528                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1529                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1531                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1532
1533         return 0;
1534 }
1535
1536 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1537 {
1538         int ret = -ENXIO;
1539
1540         switch (attr->attr) {
1541         case KVM_S390_VM_CPU_PROCESSOR:
1542                 ret = kvm_s390_set_processor(kvm, attr);
1543                 break;
1544         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1545                 ret = kvm_s390_set_processor_feat(kvm, attr);
1546                 break;
1547         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1548                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1549                 break;
1550         }
1551         return ret;
1552 }
1553
1554 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1555 {
1556         struct kvm_s390_vm_cpu_processor *proc;
1557         int ret = 0;
1558
1559         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1560         if (!proc) {
1561                 ret = -ENOMEM;
1562                 goto out;
1563         }
1564         proc->cpuid = kvm->arch.model.cpuid;
1565         proc->ibc = kvm->arch.model.ibc;
1566         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1567                S390_ARCH_FAC_LIST_SIZE_BYTE);
1568         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1569                  kvm->arch.model.ibc,
1570                  kvm->arch.model.cpuid);
1571         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1572                  kvm->arch.model.fac_list[0],
1573                  kvm->arch.model.fac_list[1],
1574                  kvm->arch.model.fac_list[2]);
1575         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1576                 ret = -EFAULT;
1577         kfree(proc);
1578 out:
1579         return ret;
1580 }
1581
1582 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1583 {
1584         struct kvm_s390_vm_cpu_machine *mach;
1585         int ret = 0;
1586
1587         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1588         if (!mach) {
1589                 ret = -ENOMEM;
1590                 goto out;
1591         }
1592         get_cpu_id((struct cpuid *) &mach->cpuid);
1593         mach->ibc = sclp.ibc;
1594         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1595                S390_ARCH_FAC_LIST_SIZE_BYTE);
1596         memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1597                sizeof(stfle_fac_list));
1598         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1599                  kvm->arch.model.ibc,
1600                  kvm->arch.model.cpuid);
1601         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1602                  mach->fac_mask[0],
1603                  mach->fac_mask[1],
1604                  mach->fac_mask[2]);
1605         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1606                  mach->fac_list[0],
1607                  mach->fac_list[1],
1608                  mach->fac_list[2]);
1609         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1610                 ret = -EFAULT;
1611         kfree(mach);
1612 out:
1613         return ret;
1614 }
1615
1616 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1617                                        struct kvm_device_attr *attr)
1618 {
1619         struct kvm_s390_vm_cpu_feat data;
1620
1621         bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1622         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1623                 return -EFAULT;
1624         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1625                          data.feat[0],
1626                          data.feat[1],
1627                          data.feat[2]);
1628         return 0;
1629 }
1630
1631 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1632                                      struct kvm_device_attr *attr)
1633 {
1634         struct kvm_s390_vm_cpu_feat data;
1635
1636         bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1637         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1638                 return -EFAULT;
1639         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1640                          data.feat[0],
1641                          data.feat[1],
1642                          data.feat[2]);
1643         return 0;
1644 }
1645
1646 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1647                                           struct kvm_device_attr *attr)
1648 {
1649         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1650             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1651                 return -EFAULT;
1652
1653         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1654                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1655                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1656                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1657                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1658         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1659                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1660                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1661         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1662                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1663                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1664         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1665                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1666                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1667         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1668                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1669                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1670         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1671                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1672                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1673         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1674                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1675                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1676         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1677                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1678                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1679         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1680                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1681                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1682         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1683                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1684                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1685         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1686                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1687                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1688         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1689                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1690                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1691         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1692                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1693                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1694         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1695                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1696                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1697         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1698                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1699                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1700         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1701                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1702                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1703                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1704                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1705         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1706                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1707                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1708                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1709                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1710
1711         return 0;
1712 }
1713
1714 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1715                                         struct kvm_device_attr *attr)
1716 {
1717         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1718             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1719                 return -EFAULT;
1720
1721         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1722                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1723                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1724                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1725                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1726         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1727                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1728                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1729         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1730                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1731                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1732         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1733                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1734                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1735         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1736                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1737                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1738         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1739                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1740                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1741         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1742                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1743                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1744         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1745                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1746                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1747         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1748                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1749                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1750         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1751                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1752                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1753         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1754                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1755                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1756         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1757                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1758                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1759         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1760                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1761                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1762         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1763                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1764                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1765         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1766                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1767                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1768         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1769                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1770                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1771                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1772                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1773         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1774                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1775                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1776                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1777                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1778
1779         return 0;
1780 }
1781
1782 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1783 {
1784         int ret = -ENXIO;
1785
1786         switch (attr->attr) {
1787         case KVM_S390_VM_CPU_PROCESSOR:
1788                 ret = kvm_s390_get_processor(kvm, attr);
1789                 break;
1790         case KVM_S390_VM_CPU_MACHINE:
1791                 ret = kvm_s390_get_machine(kvm, attr);
1792                 break;
1793         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1794                 ret = kvm_s390_get_processor_feat(kvm, attr);
1795                 break;
1796         case KVM_S390_VM_CPU_MACHINE_FEAT:
1797                 ret = kvm_s390_get_machine_feat(kvm, attr);
1798                 break;
1799         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1800                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1801                 break;
1802         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1803                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1804                 break;
1805         }
1806         return ret;
1807 }
1808
1809 /**
1810  * kvm_s390_update_topology_change_report - update CPU topology change report
1811  * @kvm: guest KVM description
1812  * @val: set or clear the MTCR bit
1813  *
1814  * Updates the Multiprocessor Topology-Change-Report bit to signal
1815  * the guest with a topology change.
1816  * This is only relevant if the topology facility is present.
1817  *
1818  * The SCA version, bsca or esca, doesn't matter as offset is the same.
1819  */
1820 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1821 {
1822         union sca_utility new, old;
1823         struct bsca_block *sca;
1824
1825         read_lock(&kvm->arch.sca_lock);
1826         sca = kvm->arch.sca;
1827         do {
1828                 old = READ_ONCE(sca->utility);
1829                 new = old;
1830                 new.mtcr = val;
1831         } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
1832         read_unlock(&kvm->arch.sca_lock);
1833 }
1834
1835 static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1836                                                struct kvm_device_attr *attr)
1837 {
1838         if (!test_kvm_facility(kvm, 11))
1839                 return -ENXIO;
1840
1841         kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1842         return 0;
1843 }
1844
1845 static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1846                                                struct kvm_device_attr *attr)
1847 {
1848         u8 topo;
1849
1850         if (!test_kvm_facility(kvm, 11))
1851                 return -ENXIO;
1852
1853         read_lock(&kvm->arch.sca_lock);
1854         topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1855         read_unlock(&kvm->arch.sca_lock);
1856
1857         return put_user(topo, (u8 __user *)attr->addr);
1858 }
1859
1860 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1861 {
1862         int ret;
1863
1864         switch (attr->group) {
1865         case KVM_S390_VM_MEM_CTRL:
1866                 ret = kvm_s390_set_mem_control(kvm, attr);
1867                 break;
1868         case KVM_S390_VM_TOD:
1869                 ret = kvm_s390_set_tod(kvm, attr);
1870                 break;
1871         case KVM_S390_VM_CPU_MODEL:
1872                 ret = kvm_s390_set_cpu_model(kvm, attr);
1873                 break;
1874         case KVM_S390_VM_CRYPTO:
1875                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1876                 break;
1877         case KVM_S390_VM_MIGRATION:
1878                 ret = kvm_s390_vm_set_migration(kvm, attr);
1879                 break;
1880         case KVM_S390_VM_CPU_TOPOLOGY:
1881                 ret = kvm_s390_set_topo_change_indication(kvm, attr);
1882                 break;
1883         default:
1884                 ret = -ENXIO;
1885                 break;
1886         }
1887
1888         return ret;
1889 }
1890
1891 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1892 {
1893         int ret;
1894
1895         switch (attr->group) {
1896         case KVM_S390_VM_MEM_CTRL:
1897                 ret = kvm_s390_get_mem_control(kvm, attr);
1898                 break;
1899         case KVM_S390_VM_TOD:
1900                 ret = kvm_s390_get_tod(kvm, attr);
1901                 break;
1902         case KVM_S390_VM_CPU_MODEL:
1903                 ret = kvm_s390_get_cpu_model(kvm, attr);
1904                 break;
1905         case KVM_S390_VM_MIGRATION:
1906                 ret = kvm_s390_vm_get_migration(kvm, attr);
1907                 break;
1908         case KVM_S390_VM_CPU_TOPOLOGY:
1909                 ret = kvm_s390_get_topo_change_indication(kvm, attr);
1910                 break;
1911         default:
1912                 ret = -ENXIO;
1913                 break;
1914         }
1915
1916         return ret;
1917 }
1918
1919 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1920 {
1921         int ret;
1922
1923         switch (attr->group) {
1924         case KVM_S390_VM_MEM_CTRL:
1925                 switch (attr->attr) {
1926                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1927                 case KVM_S390_VM_MEM_CLR_CMMA:
1928                         ret = sclp.has_cmma ? 0 : -ENXIO;
1929                         break;
1930                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1931                         ret = 0;
1932                         break;
1933                 default:
1934                         ret = -ENXIO;
1935                         break;
1936                 }
1937                 break;
1938         case KVM_S390_VM_TOD:
1939                 switch (attr->attr) {
1940                 case KVM_S390_VM_TOD_LOW:
1941                 case KVM_S390_VM_TOD_HIGH:
1942                         ret = 0;
1943                         break;
1944                 default:
1945                         ret = -ENXIO;
1946                         break;
1947                 }
1948                 break;
1949         case KVM_S390_VM_CPU_MODEL:
1950                 switch (attr->attr) {
1951                 case KVM_S390_VM_CPU_PROCESSOR:
1952                 case KVM_S390_VM_CPU_MACHINE:
1953                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1954                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1955                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1956                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1957                         ret = 0;
1958                         break;
1959                 default:
1960                         ret = -ENXIO;
1961                         break;
1962                 }
1963                 break;
1964         case KVM_S390_VM_CRYPTO:
1965                 switch (attr->attr) {
1966                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1967                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1968                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1969                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1970                         ret = 0;
1971                         break;
1972                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1973                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1974                         ret = ap_instructions_available() ? 0 : -ENXIO;
1975                         break;
1976                 default:
1977                         ret = -ENXIO;
1978                         break;
1979                 }
1980                 break;
1981         case KVM_S390_VM_MIGRATION:
1982                 ret = 0;
1983                 break;
1984         case KVM_S390_VM_CPU_TOPOLOGY:
1985                 ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
1986                 break;
1987         default:
1988                 ret = -ENXIO;
1989                 break;
1990         }
1991
1992         return ret;
1993 }
1994
1995 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1996 {
1997         uint8_t *keys;
1998         uint64_t hva;
1999         int srcu_idx, i, r = 0;
2000
2001         if (args->flags != 0)
2002                 return -EINVAL;
2003
2004         /* Is this guest using storage keys? */
2005         if (!mm_uses_skeys(current->mm))
2006                 return KVM_S390_GET_SKEYS_NONE;
2007
2008         /* Enforce sane limit on memory allocation */
2009         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2010                 return -EINVAL;
2011
2012         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2013         if (!keys)
2014                 return -ENOMEM;
2015
2016         mmap_read_lock(current->mm);
2017         srcu_idx = srcu_read_lock(&kvm->srcu);
2018         for (i = 0; i < args->count; i++) {
2019                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2020                 if (kvm_is_error_hva(hva)) {
2021                         r = -EFAULT;
2022                         break;
2023                 }
2024
2025                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
2026                 if (r)
2027                         break;
2028         }
2029         srcu_read_unlock(&kvm->srcu, srcu_idx);
2030         mmap_read_unlock(current->mm);
2031
2032         if (!r) {
2033                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2034                                  sizeof(uint8_t) * args->count);
2035                 if (r)
2036                         r = -EFAULT;
2037         }
2038
2039         kvfree(keys);
2040         return r;
2041 }
2042
2043 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2044 {
2045         uint8_t *keys;
2046         uint64_t hva;
2047         int srcu_idx, i, r = 0;
2048         bool unlocked;
2049
2050         if (args->flags != 0)
2051                 return -EINVAL;
2052
2053         /* Enforce sane limit on memory allocation */
2054         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2055                 return -EINVAL;
2056
2057         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2058         if (!keys)
2059                 return -ENOMEM;
2060
2061         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2062                            sizeof(uint8_t) * args->count);
2063         if (r) {
2064                 r = -EFAULT;
2065                 goto out;
2066         }
2067
2068         /* Enable storage key handling for the guest */
2069         r = s390_enable_skey();
2070         if (r)
2071                 goto out;
2072
2073         i = 0;
2074         mmap_read_lock(current->mm);
2075         srcu_idx = srcu_read_lock(&kvm->srcu);
2076         while (i < args->count) {
2077                 unlocked = false;
2078                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2079                 if (kvm_is_error_hva(hva)) {
2080                         r = -EFAULT;
2081                         break;
2082                 }
2083
2084                 /* Lowest order bit is reserved */
2085                 if (keys[i] & 0x01) {
2086                         r = -EINVAL;
2087                         break;
2088                 }
2089
2090                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2091                 if (r) {
2092                         r = fixup_user_fault(current->mm, hva,
2093                                              FAULT_FLAG_WRITE, &unlocked);
2094                         if (r)
2095                                 break;
2096                 }
2097                 if (!r)
2098                         i++;
2099         }
2100         srcu_read_unlock(&kvm->srcu, srcu_idx);
2101         mmap_read_unlock(current->mm);
2102 out:
2103         kvfree(keys);
2104         return r;
2105 }
2106
2107 /*
2108  * Base address and length must be sent at the start of each block, therefore
2109  * it's cheaper to send some clean data, as long as it's less than the size of
2110  * two longs.
2111  */
2112 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2113 /* for consistency */
2114 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2115
2116 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2117                               u8 *res, unsigned long bufsize)
2118 {
2119         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2120
2121         args->count = 0;
2122         while (args->count < bufsize) {
2123                 hva = gfn_to_hva(kvm, cur_gfn);
2124                 /*
2125                  * We return an error if the first value was invalid, but we
2126                  * return successfully if at least one value was copied.
2127                  */
2128                 if (kvm_is_error_hva(hva))
2129                         return args->count ? 0 : -EFAULT;
2130                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2131                         pgstev = 0;
2132                 res[args->count++] = (pgstev >> 24) & 0x43;
2133                 cur_gfn++;
2134         }
2135
2136         return 0;
2137 }
2138
2139 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2140                                                      gfn_t gfn)
2141 {
2142         return ____gfn_to_memslot(slots, gfn, true);
2143 }
2144
2145 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2146                                               unsigned long cur_gfn)
2147 {
2148         struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2149         unsigned long ofs = cur_gfn - ms->base_gfn;
2150         struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2151
2152         if (ms->base_gfn + ms->npages <= cur_gfn) {
2153                 mnode = rb_next(mnode);
2154                 /* If we are above the highest slot, wrap around */
2155                 if (!mnode)
2156                         mnode = rb_first(&slots->gfn_tree);
2157
2158                 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2159                 ofs = 0;
2160         }
2161         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2162         while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2163                 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2164                 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2165         }
2166         return ms->base_gfn + ofs;
2167 }
2168
2169 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2170                              u8 *res, unsigned long bufsize)
2171 {
2172         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2173         struct kvm_memslots *slots = kvm_memslots(kvm);
2174         struct kvm_memory_slot *ms;
2175
2176         if (unlikely(kvm_memslots_empty(slots)))
2177                 return 0;
2178
2179         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2180         ms = gfn_to_memslot(kvm, cur_gfn);
2181         args->count = 0;
2182         args->start_gfn = cur_gfn;
2183         if (!ms)
2184                 return 0;
2185         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2186         mem_end = kvm_s390_get_gfn_end(slots);
2187
2188         while (args->count < bufsize) {
2189                 hva = gfn_to_hva(kvm, cur_gfn);
2190                 if (kvm_is_error_hva(hva))
2191                         return 0;
2192                 /* Decrement only if we actually flipped the bit to 0 */
2193                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2194                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2195                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2196                         pgstev = 0;
2197                 /* Save the value */
2198                 res[args->count++] = (pgstev >> 24) & 0x43;
2199                 /* If the next bit is too far away, stop. */
2200                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2201                         return 0;
2202                 /* If we reached the previous "next", find the next one */
2203                 if (cur_gfn == next_gfn)
2204                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2205                 /* Reached the end of memory or of the buffer, stop */
2206                 if ((next_gfn >= mem_end) ||
2207                     (next_gfn - args->start_gfn >= bufsize))
2208                         return 0;
2209                 cur_gfn++;
2210                 /* Reached the end of the current memslot, take the next one. */
2211                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2212                         ms = gfn_to_memslot(kvm, cur_gfn);
2213                         if (!ms)
2214                                 return 0;
2215                 }
2216         }
2217         return 0;
2218 }
2219
2220 /*
2221  * This function searches for the next page with dirty CMMA attributes, and
2222  * saves the attributes in the buffer up to either the end of the buffer or
2223  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2224  * no trailing clean bytes are saved.
2225  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2226  * output buffer will indicate 0 as length.
2227  */
2228 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2229                                   struct kvm_s390_cmma_log *args)
2230 {
2231         unsigned long bufsize;
2232         int srcu_idx, peek, ret;
2233         u8 *values;
2234
2235         if (!kvm->arch.use_cmma)
2236                 return -ENXIO;
2237         /* Invalid/unsupported flags were specified */
2238         if (args->flags & ~KVM_S390_CMMA_PEEK)
2239                 return -EINVAL;
2240         /* Migration mode query, and we are not doing a migration */
2241         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2242         if (!peek && !kvm->arch.migration_mode)
2243                 return -EINVAL;
2244         /* CMMA is disabled or was not used, or the buffer has length zero */
2245         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2246         if (!bufsize || !kvm->mm->context.uses_cmm) {
2247                 memset(args, 0, sizeof(*args));
2248                 return 0;
2249         }
2250         /* We are not peeking, and there are no dirty pages */
2251         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2252                 memset(args, 0, sizeof(*args));
2253                 return 0;
2254         }
2255
2256         values = vmalloc(bufsize);
2257         if (!values)
2258                 return -ENOMEM;
2259
2260         mmap_read_lock(kvm->mm);
2261         srcu_idx = srcu_read_lock(&kvm->srcu);
2262         if (peek)
2263                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2264         else
2265                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2266         srcu_read_unlock(&kvm->srcu, srcu_idx);
2267         mmap_read_unlock(kvm->mm);
2268
2269         if (kvm->arch.migration_mode)
2270                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2271         else
2272                 args->remaining = 0;
2273
2274         if (copy_to_user((void __user *)args->values, values, args->count))
2275                 ret = -EFAULT;
2276
2277         vfree(values);
2278         return ret;
2279 }
2280
2281 /*
2282  * This function sets the CMMA attributes for the given pages. If the input
2283  * buffer has zero length, no action is taken, otherwise the attributes are
2284  * set and the mm->context.uses_cmm flag is set.
2285  */
2286 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2287                                   const struct kvm_s390_cmma_log *args)
2288 {
2289         unsigned long hva, mask, pgstev, i;
2290         uint8_t *bits;
2291         int srcu_idx, r = 0;
2292
2293         mask = args->mask;
2294
2295         if (!kvm->arch.use_cmma)
2296                 return -ENXIO;
2297         /* invalid/unsupported flags */
2298         if (args->flags != 0)
2299                 return -EINVAL;
2300         /* Enforce sane limit on memory allocation */
2301         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2302                 return -EINVAL;
2303         /* Nothing to do */
2304         if (args->count == 0)
2305                 return 0;
2306
2307         bits = vmalloc(array_size(sizeof(*bits), args->count));
2308         if (!bits)
2309                 return -ENOMEM;
2310
2311         r = copy_from_user(bits, (void __user *)args->values, args->count);
2312         if (r) {
2313                 r = -EFAULT;
2314                 goto out;
2315         }
2316
2317         mmap_read_lock(kvm->mm);
2318         srcu_idx = srcu_read_lock(&kvm->srcu);
2319         for (i = 0; i < args->count; i++) {
2320                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2321                 if (kvm_is_error_hva(hva)) {
2322                         r = -EFAULT;
2323                         break;
2324                 }
2325
2326                 pgstev = bits[i];
2327                 pgstev = pgstev << 24;
2328                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2329                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2330         }
2331         srcu_read_unlock(&kvm->srcu, srcu_idx);
2332         mmap_read_unlock(kvm->mm);
2333
2334         if (!kvm->mm->context.uses_cmm) {
2335                 mmap_write_lock(kvm->mm);
2336                 kvm->mm->context.uses_cmm = 1;
2337                 mmap_write_unlock(kvm->mm);
2338         }
2339 out:
2340         vfree(bits);
2341         return r;
2342 }
2343
2344 /**
2345  * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2346  * non protected.
2347  * @kvm: the VM whose protected vCPUs are to be converted
2348  * @rc: return value for the RC field of the UVC (in case of error)
2349  * @rrc: return value for the RRC field of the UVC (in case of error)
2350  *
2351  * Does not stop in case of error, tries to convert as many
2352  * CPUs as possible. In case of error, the RC and RRC of the last error are
2353  * returned.
2354  *
2355  * Return: 0 in case of success, otherwise -EIO
2356  */
2357 int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2358 {
2359         struct kvm_vcpu *vcpu;
2360         unsigned long i;
2361         u16 _rc, _rrc;
2362         int ret = 0;
2363
2364         /*
2365          * We ignore failures and try to destroy as many CPUs as possible.
2366          * At the same time we must not free the assigned resources when
2367          * this fails, as the ultravisor has still access to that memory.
2368          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2369          * behind.
2370          * We want to return the first failure rc and rrc, though.
2371          */
2372         kvm_for_each_vcpu(i, vcpu, kvm) {
2373                 mutex_lock(&vcpu->mutex);
2374                 if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2375                         *rc = _rc;
2376                         *rrc = _rrc;
2377                         ret = -EIO;
2378                 }
2379                 mutex_unlock(&vcpu->mutex);
2380         }
2381         /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2382         if (use_gisa)
2383                 kvm_s390_gisa_enable(kvm);
2384         return ret;
2385 }
2386
2387 /**
2388  * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2389  * to protected.
2390  * @kvm: the VM whose protected vCPUs are to be converted
2391  * @rc: return value for the RC field of the UVC (in case of error)
2392  * @rrc: return value for the RRC field of the UVC (in case of error)
2393  *
2394  * Tries to undo the conversion in case of error.
2395  *
2396  * Return: 0 in case of success, otherwise -EIO
2397  */
2398 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2399 {
2400         unsigned long i;
2401         int r = 0;
2402         u16 dummy;
2403
2404         struct kvm_vcpu *vcpu;
2405
2406         /* Disable the GISA if the ultravisor does not support AIV. */
2407         if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2408                 kvm_s390_gisa_disable(kvm);
2409
2410         kvm_for_each_vcpu(i, vcpu, kvm) {
2411                 mutex_lock(&vcpu->mutex);
2412                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2413                 mutex_unlock(&vcpu->mutex);
2414                 if (r)
2415                         break;
2416         }
2417         if (r)
2418                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2419         return r;
2420 }
2421
2422 /*
2423  * Here we provide user space with a direct interface to query UV
2424  * related data like UV maxima and available features as well as
2425  * feature specific data.
2426  *
2427  * To facilitate future extension of the data structures we'll try to
2428  * write data up to the maximum requested length.
2429  */
2430 static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2431 {
2432         ssize_t len_min;
2433
2434         switch (info->header.id) {
2435         case KVM_PV_INFO_VM: {
2436                 len_min =  sizeof(info->header) + sizeof(info->vm);
2437
2438                 if (info->header.len_max < len_min)
2439                         return -EINVAL;
2440
2441                 memcpy(info->vm.inst_calls_list,
2442                        uv_info.inst_calls_list,
2443                        sizeof(uv_info.inst_calls_list));
2444
2445                 /* It's max cpuid not max cpus, so it's off by one */
2446                 info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2447                 info->vm.max_guests = uv_info.max_num_sec_conf;
2448                 info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2449                 info->vm.feature_indication = uv_info.uv_feature_indications;
2450
2451                 return len_min;
2452         }
2453         case KVM_PV_INFO_DUMP: {
2454                 len_min =  sizeof(info->header) + sizeof(info->dump);
2455
2456                 if (info->header.len_max < len_min)
2457                         return -EINVAL;
2458
2459                 info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2460                 info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2461                 info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2462                 return len_min;
2463         }
2464         default:
2465                 return -EINVAL;
2466         }
2467 }
2468
2469 static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2470                            struct kvm_s390_pv_dmp dmp)
2471 {
2472         int r = -EINVAL;
2473         void __user *result_buff = (void __user *)dmp.buff_addr;
2474
2475         switch (dmp.subcmd) {
2476         case KVM_PV_DUMP_INIT: {
2477                 if (kvm->arch.pv.dumping)
2478                         break;
2479
2480                 /*
2481                  * Block SIE entry as concurrent dump UVCs could lead
2482                  * to validities.
2483                  */
2484                 kvm_s390_vcpu_block_all(kvm);
2485
2486                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2487                                   UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2488                 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2489                              cmd->rc, cmd->rrc);
2490                 if (!r) {
2491                         kvm->arch.pv.dumping = true;
2492                 } else {
2493                         kvm_s390_vcpu_unblock_all(kvm);
2494                         r = -EINVAL;
2495                 }
2496                 break;
2497         }
2498         case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2499                 if (!kvm->arch.pv.dumping)
2500                         break;
2501
2502                 /*
2503                  * gaddr is an output parameter since we might stop
2504                  * early. As dmp will be copied back in our caller, we
2505                  * don't need to do it ourselves.
2506                  */
2507                 r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2508                                                 &cmd->rc, &cmd->rrc);
2509                 break;
2510         }
2511         case KVM_PV_DUMP_COMPLETE: {
2512                 if (!kvm->arch.pv.dumping)
2513                         break;
2514
2515                 r = -EINVAL;
2516                 if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2517                         break;
2518
2519                 r = kvm_s390_pv_dump_complete(kvm, result_buff,
2520                                               &cmd->rc, &cmd->rrc);
2521                 break;
2522         }
2523         default:
2524                 r = -ENOTTY;
2525                 break;
2526         }
2527
2528         return r;
2529 }
2530
2531 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2532 {
2533         const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
2534         void __user *argp = (void __user *)cmd->data;
2535         int r = 0;
2536         u16 dummy;
2537
2538         if (need_lock)
2539                 mutex_lock(&kvm->lock);
2540
2541         switch (cmd->cmd) {
2542         case KVM_PV_ENABLE: {
2543                 r = -EINVAL;
2544                 if (kvm_s390_pv_is_protected(kvm))
2545                         break;
2546
2547                 /*
2548                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2549                  *  esca, we need no cleanup in the error cases below
2550                  */
2551                 r = sca_switch_to_extended(kvm);
2552                 if (r)
2553                         break;
2554
2555                 mmap_write_lock(current->mm);
2556                 r = gmap_mark_unmergeable();
2557                 mmap_write_unlock(current->mm);
2558                 if (r)
2559                         break;
2560
2561                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2562                 if (r)
2563                         break;
2564
2565                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2566                 if (r)
2567                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2568
2569                 /* we need to block service interrupts from now on */
2570                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2571                 break;
2572         }
2573         case KVM_PV_ASYNC_CLEANUP_PREPARE:
2574                 r = -EINVAL;
2575                 if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
2576                         break;
2577
2578                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2579                 /*
2580                  * If a CPU could not be destroyed, destroy VM will also fail.
2581                  * There is no point in trying to destroy it. Instead return
2582                  * the rc and rrc from the first CPU that failed destroying.
2583                  */
2584                 if (r)
2585                         break;
2586                 r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
2587
2588                 /* no need to block service interrupts any more */
2589                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2590                 break;
2591         case KVM_PV_ASYNC_CLEANUP_PERFORM:
2592                 r = -EINVAL;
2593                 if (!async_destroy)
2594                         break;
2595                 /* kvm->lock must not be held; this is asserted inside the function. */
2596                 r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
2597                 break;
2598         case KVM_PV_DISABLE: {
2599                 r = -EINVAL;
2600                 if (!kvm_s390_pv_is_protected(kvm))
2601                         break;
2602
2603                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2604                 /*
2605                  * If a CPU could not be destroyed, destroy VM will also fail.
2606                  * There is no point in trying to destroy it. Instead return
2607                  * the rc and rrc from the first CPU that failed destroying.
2608                  */
2609                 if (r)
2610                         break;
2611                 r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
2612
2613                 /* no need to block service interrupts any more */
2614                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2615                 break;
2616         }
2617         case KVM_PV_SET_SEC_PARMS: {
2618                 struct kvm_s390_pv_sec_parm parms = {};
2619                 void *hdr;
2620
2621                 r = -EINVAL;
2622                 if (!kvm_s390_pv_is_protected(kvm))
2623                         break;
2624
2625                 r = -EFAULT;
2626                 if (copy_from_user(&parms, argp, sizeof(parms)))
2627                         break;
2628
2629                 /* Currently restricted to 8KB */
2630                 r = -EINVAL;
2631                 if (parms.length > PAGE_SIZE * 2)
2632                         break;
2633
2634                 r = -ENOMEM;
2635                 hdr = vmalloc(parms.length);
2636                 if (!hdr)
2637                         break;
2638
2639                 r = -EFAULT;
2640                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2641                                     parms.length))
2642                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2643                                                       &cmd->rc, &cmd->rrc);
2644
2645                 vfree(hdr);
2646                 break;
2647         }
2648         case KVM_PV_UNPACK: {
2649                 struct kvm_s390_pv_unp unp = {};
2650
2651                 r = -EINVAL;
2652                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2653                         break;
2654
2655                 r = -EFAULT;
2656                 if (copy_from_user(&unp, argp, sizeof(unp)))
2657                         break;
2658
2659                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2660                                        &cmd->rc, &cmd->rrc);
2661                 break;
2662         }
2663         case KVM_PV_VERIFY: {
2664                 r = -EINVAL;
2665                 if (!kvm_s390_pv_is_protected(kvm))
2666                         break;
2667
2668                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2669                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2670                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2671                              cmd->rrc);
2672                 break;
2673         }
2674         case KVM_PV_PREP_RESET: {
2675                 r = -EINVAL;
2676                 if (!kvm_s390_pv_is_protected(kvm))
2677                         break;
2678
2679                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2680                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2681                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2682                              cmd->rc, cmd->rrc);
2683                 break;
2684         }
2685         case KVM_PV_UNSHARE_ALL: {
2686                 r = -EINVAL;
2687                 if (!kvm_s390_pv_is_protected(kvm))
2688                         break;
2689
2690                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2691                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2692                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2693                              cmd->rc, cmd->rrc);
2694                 break;
2695         }
2696         case KVM_PV_INFO: {
2697                 struct kvm_s390_pv_info info = {};
2698                 ssize_t data_len;
2699
2700                 /*
2701                  * No need to check the VM protection here.
2702                  *
2703                  * Maybe user space wants to query some of the data
2704                  * when the VM is still unprotected. If we see the
2705                  * need to fence a new data command we can still
2706                  * return an error in the info handler.
2707                  */
2708
2709                 r = -EFAULT;
2710                 if (copy_from_user(&info, argp, sizeof(info.header)))
2711                         break;
2712
2713                 r = -EINVAL;
2714                 if (info.header.len_max < sizeof(info.header))
2715                         break;
2716
2717                 data_len = kvm_s390_handle_pv_info(&info);
2718                 if (data_len < 0) {
2719                         r = data_len;
2720                         break;
2721                 }
2722                 /*
2723                  * If a data command struct is extended (multiple
2724                  * times) this can be used to determine how much of it
2725                  * is valid.
2726                  */
2727                 info.header.len_written = data_len;
2728
2729                 r = -EFAULT;
2730                 if (copy_to_user(argp, &info, data_len))
2731                         break;
2732
2733                 r = 0;
2734                 break;
2735         }
2736         case KVM_PV_DUMP: {
2737                 struct kvm_s390_pv_dmp dmp;
2738
2739                 r = -EINVAL;
2740                 if (!kvm_s390_pv_is_protected(kvm))
2741                         break;
2742
2743                 r = -EFAULT;
2744                 if (copy_from_user(&dmp, argp, sizeof(dmp)))
2745                         break;
2746
2747                 r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2748                 if (r)
2749                         break;
2750
2751                 if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2752                         r = -EFAULT;
2753                         break;
2754                 }
2755
2756                 break;
2757         }
2758         default:
2759                 r = -ENOTTY;
2760         }
2761         if (need_lock)
2762                 mutex_unlock(&kvm->lock);
2763
2764         return r;
2765 }
2766
2767 static bool access_key_invalid(u8 access_key)
2768 {
2769         return access_key > 0xf;
2770 }
2771
2772 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2773 {
2774         void __user *uaddr = (void __user *)mop->buf;
2775         u64 supported_flags;
2776         void *tmpbuf = NULL;
2777         int r, srcu_idx;
2778
2779         supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2780                           | KVM_S390_MEMOP_F_CHECK_ONLY;
2781         if (mop->flags & ~supported_flags || !mop->size)
2782                 return -EINVAL;
2783         if (mop->size > MEM_OP_MAX_SIZE)
2784                 return -E2BIG;
2785         /*
2786          * This is technically a heuristic only, if the kvm->lock is not
2787          * taken, it is not guaranteed that the vm is/remains non-protected.
2788          * This is ok from a kernel perspective, wrongdoing is detected
2789          * on the access, -EFAULT is returned and the vm may crash the
2790          * next time it accesses the memory in question.
2791          * There is no sane usecase to do switching and a memop on two
2792          * different CPUs at the same time.
2793          */
2794         if (kvm_s390_pv_get_handle(kvm))
2795                 return -EINVAL;
2796         if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2797                 if (access_key_invalid(mop->key))
2798                         return -EINVAL;
2799         } else {
2800                 mop->key = 0;
2801         }
2802         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2803                 tmpbuf = vmalloc(mop->size);
2804                 if (!tmpbuf)
2805                         return -ENOMEM;
2806         }
2807
2808         srcu_idx = srcu_read_lock(&kvm->srcu);
2809
2810         if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2811                 r = PGM_ADDRESSING;
2812                 goto out_unlock;
2813         }
2814
2815         switch (mop->op) {
2816         case KVM_S390_MEMOP_ABSOLUTE_READ: {
2817                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2818                         r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2819                 } else {
2820                         r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2821                                                       mop->size, GACC_FETCH, mop->key);
2822                         if (r == 0) {
2823                                 if (copy_to_user(uaddr, tmpbuf, mop->size))
2824                                         r = -EFAULT;
2825                         }
2826                 }
2827                 break;
2828         }
2829         case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2830                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2831                         r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2832                 } else {
2833                         if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2834                                 r = -EFAULT;
2835                                 break;
2836                         }
2837                         r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2838                                                       mop->size, GACC_STORE, mop->key);
2839                 }
2840                 break;
2841         }
2842         default:
2843                 r = -EINVAL;
2844         }
2845
2846 out_unlock:
2847         srcu_read_unlock(&kvm->srcu, srcu_idx);
2848
2849         vfree(tmpbuf);
2850         return r;
2851 }
2852
2853 long kvm_arch_vm_ioctl(struct file *filp,
2854                        unsigned int ioctl, unsigned long arg)
2855 {
2856         struct kvm *kvm = filp->private_data;
2857         void __user *argp = (void __user *)arg;
2858         struct kvm_device_attr attr;
2859         int r;
2860
2861         switch (ioctl) {
2862         case KVM_S390_INTERRUPT: {
2863                 struct kvm_s390_interrupt s390int;
2864
2865                 r = -EFAULT;
2866                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2867                         break;
2868                 r = kvm_s390_inject_vm(kvm, &s390int);
2869                 break;
2870         }
2871         case KVM_CREATE_IRQCHIP: {
2872                 struct kvm_irq_routing_entry routing;
2873
2874                 r = -EINVAL;
2875                 if (kvm->arch.use_irqchip) {
2876                         /* Set up dummy routing. */
2877                         memset(&routing, 0, sizeof(routing));
2878                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2879                 }
2880                 break;
2881         }
2882         case KVM_SET_DEVICE_ATTR: {
2883                 r = -EFAULT;
2884                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2885                         break;
2886                 r = kvm_s390_vm_set_attr(kvm, &attr);
2887                 break;
2888         }
2889         case KVM_GET_DEVICE_ATTR: {
2890                 r = -EFAULT;
2891                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2892                         break;
2893                 r = kvm_s390_vm_get_attr(kvm, &attr);
2894                 break;
2895         }
2896         case KVM_HAS_DEVICE_ATTR: {
2897                 r = -EFAULT;
2898                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2899                         break;
2900                 r = kvm_s390_vm_has_attr(kvm, &attr);
2901                 break;
2902         }
2903         case KVM_S390_GET_SKEYS: {
2904                 struct kvm_s390_skeys args;
2905
2906                 r = -EFAULT;
2907                 if (copy_from_user(&args, argp,
2908                                    sizeof(struct kvm_s390_skeys)))
2909                         break;
2910                 r = kvm_s390_get_skeys(kvm, &args);
2911                 break;
2912         }
2913         case KVM_S390_SET_SKEYS: {
2914                 struct kvm_s390_skeys args;
2915
2916                 r = -EFAULT;
2917                 if (copy_from_user(&args, argp,
2918                                    sizeof(struct kvm_s390_skeys)))
2919                         break;
2920                 r = kvm_s390_set_skeys(kvm, &args);
2921                 break;
2922         }
2923         case KVM_S390_GET_CMMA_BITS: {
2924                 struct kvm_s390_cmma_log args;
2925
2926                 r = -EFAULT;
2927                 if (copy_from_user(&args, argp, sizeof(args)))
2928                         break;
2929                 mutex_lock(&kvm->slots_lock);
2930                 r = kvm_s390_get_cmma_bits(kvm, &args);
2931                 mutex_unlock(&kvm->slots_lock);
2932                 if (!r) {
2933                         r = copy_to_user(argp, &args, sizeof(args));
2934                         if (r)
2935                                 r = -EFAULT;
2936                 }
2937                 break;
2938         }
2939         case KVM_S390_SET_CMMA_BITS: {
2940                 struct kvm_s390_cmma_log args;
2941
2942                 r = -EFAULT;
2943                 if (copy_from_user(&args, argp, sizeof(args)))
2944                         break;
2945                 mutex_lock(&kvm->slots_lock);
2946                 r = kvm_s390_set_cmma_bits(kvm, &args);
2947                 mutex_unlock(&kvm->slots_lock);
2948                 break;
2949         }
2950         case KVM_S390_PV_COMMAND: {
2951                 struct kvm_pv_cmd args;
2952
2953                 /* protvirt means user cpu state */
2954                 kvm_s390_set_user_cpu_state_ctrl(kvm);
2955                 r = 0;
2956                 if (!is_prot_virt_host()) {
2957                         r = -EINVAL;
2958                         break;
2959                 }
2960                 if (copy_from_user(&args, argp, sizeof(args))) {
2961                         r = -EFAULT;
2962                         break;
2963                 }
2964                 if (args.flags) {
2965                         r = -EINVAL;
2966                         break;
2967                 }
2968                 /* must be called without kvm->lock */
2969                 r = kvm_s390_handle_pv(kvm, &args);
2970                 if (copy_to_user(argp, &args, sizeof(args))) {
2971                         r = -EFAULT;
2972                         break;
2973                 }
2974                 break;
2975         }
2976         case KVM_S390_MEM_OP: {
2977                 struct kvm_s390_mem_op mem_op;
2978
2979                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2980                         r = kvm_s390_vm_mem_op(kvm, &mem_op);
2981                 else
2982                         r = -EFAULT;
2983                 break;
2984         }
2985         case KVM_S390_ZPCI_OP: {
2986                 struct kvm_s390_zpci_op args;
2987
2988                 r = -EINVAL;
2989                 if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
2990                         break;
2991                 if (copy_from_user(&args, argp, sizeof(args))) {
2992                         r = -EFAULT;
2993                         break;
2994                 }
2995                 r = kvm_s390_pci_zpci_op(kvm, &args);
2996                 break;
2997         }
2998         default:
2999                 r = -ENOTTY;
3000         }
3001
3002         return r;
3003 }
3004
3005 static int kvm_s390_apxa_installed(void)
3006 {
3007         struct ap_config_info info;
3008
3009         if (ap_instructions_available()) {
3010                 if (ap_qci(&info) == 0)
3011                         return info.apxa;
3012         }
3013
3014         return 0;
3015 }
3016
3017 /*
3018  * The format of the crypto control block (CRYCB) is specified in the 3 low
3019  * order bits of the CRYCB designation (CRYCBD) field as follows:
3020  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
3021  *           AP extended addressing (APXA) facility are installed.
3022  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
3023  * Format 2: Both the APXA and MSAX3 facilities are installed
3024  */
3025 static void kvm_s390_set_crycb_format(struct kvm *kvm)
3026 {
3027         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
3028
3029         /* Clear the CRYCB format bits - i.e., set format 0 by default */
3030         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
3031
3032         /* Check whether MSAX3 is installed */
3033         if (!test_kvm_facility(kvm, 76))
3034                 return;
3035
3036         if (kvm_s390_apxa_installed())
3037                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
3038         else
3039                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
3040 }
3041
3042 /*
3043  * kvm_arch_crypto_set_masks
3044  *
3045  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3046  *       to be set.
3047  * @apm: the mask identifying the accessible AP adapters
3048  * @aqm: the mask identifying the accessible AP domains
3049  * @adm: the mask identifying the accessible AP control domains
3050  *
3051  * Set the masks that identify the adapters, domains and control domains to
3052  * which the KVM guest is granted access.
3053  *
3054  * Note: The kvm->lock mutex must be locked by the caller before invoking this
3055  *       function.
3056  */
3057 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3058                                unsigned long *aqm, unsigned long *adm)
3059 {
3060         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3061
3062         kvm_s390_vcpu_block_all(kvm);
3063
3064         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3065         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3066                 memcpy(crycb->apcb1.apm, apm, 32);
3067                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3068                          apm[0], apm[1], apm[2], apm[3]);
3069                 memcpy(crycb->apcb1.aqm, aqm, 32);
3070                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3071                          aqm[0], aqm[1], aqm[2], aqm[3]);
3072                 memcpy(crycb->apcb1.adm, adm, 32);
3073                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3074                          adm[0], adm[1], adm[2], adm[3]);
3075                 break;
3076         case CRYCB_FORMAT1:
3077         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3078                 memcpy(crycb->apcb0.apm, apm, 8);
3079                 memcpy(crycb->apcb0.aqm, aqm, 2);
3080                 memcpy(crycb->apcb0.adm, adm, 2);
3081                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3082                          apm[0], *((unsigned short *)aqm),
3083                          *((unsigned short *)adm));
3084                 break;
3085         default:        /* Can not happen */
3086                 break;
3087         }
3088
3089         /* recreate the shadow crycb for each vcpu */
3090         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3091         kvm_s390_vcpu_unblock_all(kvm);
3092 }
3093 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3094
3095 /*
3096  * kvm_arch_crypto_clear_masks
3097  *
3098  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3099  *       to be cleared.
3100  *
3101  * Clear the masks that identify the adapters, domains and control domains to
3102  * which the KVM guest is granted access.
3103  *
3104  * Note: The kvm->lock mutex must be locked by the caller before invoking this
3105  *       function.
3106  */
3107 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3108 {
3109         kvm_s390_vcpu_block_all(kvm);
3110
3111         memset(&kvm->arch.crypto.crycb->apcb0, 0,
3112                sizeof(kvm->arch.crypto.crycb->apcb0));
3113         memset(&kvm->arch.crypto.crycb->apcb1, 0,
3114                sizeof(kvm->arch.crypto.crycb->apcb1));
3115
3116         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3117         /* recreate the shadow crycb for each vcpu */
3118         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3119         kvm_s390_vcpu_unblock_all(kvm);
3120 }
3121 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3122
3123 static u64 kvm_s390_get_initial_cpuid(void)
3124 {
3125         struct cpuid cpuid;
3126
3127         get_cpu_id(&cpuid);
3128         cpuid.version = 0xff;
3129         return *((u64 *) &cpuid);
3130 }
3131
3132 static void kvm_s390_crypto_init(struct kvm *kvm)
3133 {
3134         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3135         kvm_s390_set_crycb_format(kvm);
3136         init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3137
3138         if (!test_kvm_facility(kvm, 76))
3139                 return;
3140
3141         /* Enable AES/DEA protected key functions by default */
3142         kvm->arch.crypto.aes_kw = 1;
3143         kvm->arch.crypto.dea_kw = 1;
3144         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3145                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3146         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3147                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3148 }
3149
3150 static void sca_dispose(struct kvm *kvm)
3151 {
3152         if (kvm->arch.use_esca)
3153                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3154         else
3155                 free_page((unsigned long)(kvm->arch.sca));
3156         kvm->arch.sca = NULL;
3157 }
3158
3159 void kvm_arch_free_vm(struct kvm *kvm)
3160 {
3161         if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3162                 kvm_s390_pci_clear_list(kvm);
3163
3164         __kvm_arch_free_vm(kvm);
3165 }
3166
3167 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3168 {
3169         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3170         int i, rc;
3171         char debug_name[16];
3172         static unsigned long sca_offset;
3173
3174         rc = -EINVAL;
3175 #ifdef CONFIG_KVM_S390_UCONTROL
3176         if (type & ~KVM_VM_S390_UCONTROL)
3177                 goto out_err;
3178         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3179                 goto out_err;
3180 #else
3181         if (type)
3182                 goto out_err;
3183 #endif
3184
3185         rc = s390_enable_sie();
3186         if (rc)
3187                 goto out_err;
3188
3189         rc = -ENOMEM;
3190
3191         if (!sclp.has_64bscao)
3192                 alloc_flags |= GFP_DMA;
3193         rwlock_init(&kvm->arch.sca_lock);
3194         /* start with basic SCA */
3195         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3196         if (!kvm->arch.sca)
3197                 goto out_err;
3198         mutex_lock(&kvm_lock);
3199         sca_offset += 16;
3200         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3201                 sca_offset = 0;
3202         kvm->arch.sca = (struct bsca_block *)
3203                         ((char *) kvm->arch.sca + sca_offset);
3204         mutex_unlock(&kvm_lock);
3205
3206         sprintf(debug_name, "kvm-%u", current->pid);
3207
3208         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3209         if (!kvm->arch.dbf)
3210                 goto out_err;
3211
3212         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3213         kvm->arch.sie_page2 =
3214              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3215         if (!kvm->arch.sie_page2)
3216                 goto out_err;
3217
3218         kvm->arch.sie_page2->kvm = kvm;
3219         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3220
3221         for (i = 0; i < kvm_s390_fac_size(); i++) {
3222                 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3223                                               (kvm_s390_fac_base[i] |
3224                                                kvm_s390_fac_ext[i]);
3225                 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3226                                               kvm_s390_fac_base[i];
3227         }
3228         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3229
3230         /* we are always in czam mode - even on pre z14 machines */
3231         set_kvm_facility(kvm->arch.model.fac_mask, 138);
3232         set_kvm_facility(kvm->arch.model.fac_list, 138);
3233         /* we emulate STHYI in kvm */
3234         set_kvm_facility(kvm->arch.model.fac_mask, 74);
3235         set_kvm_facility(kvm->arch.model.fac_list, 74);
3236         if (MACHINE_HAS_TLB_GUEST) {
3237                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
3238                 set_kvm_facility(kvm->arch.model.fac_list, 147);
3239         }
3240
3241         if (css_general_characteristics.aiv && test_facility(65))
3242                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
3243
3244         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3245         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3246
3247         kvm_s390_crypto_init(kvm);
3248
3249         if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3250                 mutex_lock(&kvm->lock);
3251                 kvm_s390_pci_init_list(kvm);
3252                 kvm_s390_vcpu_pci_enable_interp(kvm);
3253                 mutex_unlock(&kvm->lock);
3254         }
3255
3256         mutex_init(&kvm->arch.float_int.ais_lock);
3257         spin_lock_init(&kvm->arch.float_int.lock);
3258         for (i = 0; i < FIRQ_LIST_COUNT; i++)
3259                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3260         init_waitqueue_head(&kvm->arch.ipte_wq);
3261         mutex_init(&kvm->arch.ipte_mutex);
3262
3263         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3264         VM_EVENT(kvm, 3, "vm created with type %lu", type);
3265
3266         if (type & KVM_VM_S390_UCONTROL) {
3267                 kvm->arch.gmap = NULL;
3268                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3269         } else {
3270                 if (sclp.hamax == U64_MAX)
3271                         kvm->arch.mem_limit = TASK_SIZE_MAX;
3272                 else
3273                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3274                                                     sclp.hamax + 1);
3275                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3276                 if (!kvm->arch.gmap)
3277                         goto out_err;
3278                 kvm->arch.gmap->private = kvm;
3279                 kvm->arch.gmap->pfault_enabled = 0;
3280         }
3281
3282         kvm->arch.use_pfmfi = sclp.has_pfmfi;
3283         kvm->arch.use_skf = sclp.has_skey;
3284         spin_lock_init(&kvm->arch.start_stop_lock);
3285         kvm_s390_vsie_init(kvm);
3286         if (use_gisa)
3287                 kvm_s390_gisa_init(kvm);
3288         INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
3289         kvm->arch.pv.set_aside = NULL;
3290         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
3291
3292         return 0;
3293 out_err:
3294         free_page((unsigned long)kvm->arch.sie_page2);
3295         debug_unregister(kvm->arch.dbf);
3296         sca_dispose(kvm);
3297         KVM_EVENT(3, "creation of vm failed: %d", rc);
3298         return rc;
3299 }
3300
3301 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3302 {
3303         u16 rc, rrc;
3304
3305         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3306         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3307         kvm_s390_clear_local_irqs(vcpu);
3308         kvm_clear_async_pf_completion_queue(vcpu);
3309         if (!kvm_is_ucontrol(vcpu->kvm))
3310                 sca_del_vcpu(vcpu);
3311         kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3312
3313         if (kvm_is_ucontrol(vcpu->kvm))
3314                 gmap_remove(vcpu->arch.gmap);
3315
3316         if (vcpu->kvm->arch.use_cmma)
3317                 kvm_s390_vcpu_unsetup_cmma(vcpu);
3318         /* We can not hold the vcpu mutex here, we are already dying */
3319         if (kvm_s390_pv_cpu_get_handle(vcpu))
3320                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3321         free_page((unsigned long)(vcpu->arch.sie_block));
3322 }
3323
3324 void kvm_arch_destroy_vm(struct kvm *kvm)
3325 {
3326         u16 rc, rrc;
3327
3328         kvm_destroy_vcpus(kvm);
3329         sca_dispose(kvm);
3330         kvm_s390_gisa_destroy(kvm);
3331         /*
3332          * We are already at the end of life and kvm->lock is not taken.
3333          * This is ok as the file descriptor is closed by now and nobody
3334          * can mess with the pv state.
3335          */
3336         kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
3337         /*
3338          * Remove the mmu notifier only when the whole KVM VM is torn down,
3339          * and only if one was registered to begin with. If the VM is
3340          * currently not protected, but has been previously been protected,
3341          * then it's possible that the notifier is still registered.
3342          */
3343         if (kvm->arch.pv.mmu_notifier.ops)
3344                 mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3345
3346         debug_unregister(kvm->arch.dbf);
3347         free_page((unsigned long)kvm->arch.sie_page2);
3348         if (!kvm_is_ucontrol(kvm))
3349                 gmap_remove(kvm->arch.gmap);
3350         kvm_s390_destroy_adapters(kvm);
3351         kvm_s390_clear_float_irqs(kvm);
3352         kvm_s390_vsie_destroy(kvm);
3353         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
3354 }
3355
3356 /* Section: vcpu related */
3357 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3358 {
3359         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3360         if (!vcpu->arch.gmap)
3361                 return -ENOMEM;
3362         vcpu->arch.gmap->private = vcpu->kvm;
3363
3364         return 0;
3365 }
3366
3367 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3368 {
3369         if (!kvm_s390_use_sca_entries())
3370                 return;
3371         read_lock(&vcpu->kvm->arch.sca_lock);
3372         if (vcpu->kvm->arch.use_esca) {
3373                 struct esca_block *sca = vcpu->kvm->arch.sca;
3374
3375                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3376                 sca->cpu[vcpu->vcpu_id].sda = 0;
3377         } else {
3378                 struct bsca_block *sca = vcpu->kvm->arch.sca;
3379
3380                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3381                 sca->cpu[vcpu->vcpu_id].sda = 0;
3382         }
3383         read_unlock(&vcpu->kvm->arch.sca_lock);
3384 }
3385
3386 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3387 {
3388         if (!kvm_s390_use_sca_entries()) {
3389                 phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
3390
3391                 /* we still need the basic sca for the ipte control */
3392                 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3393                 vcpu->arch.sie_block->scaol = sca_phys;
3394                 return;
3395         }
3396         read_lock(&vcpu->kvm->arch.sca_lock);
3397         if (vcpu->kvm->arch.use_esca) {
3398                 struct esca_block *sca = vcpu->kvm->arch.sca;
3399                 phys_addr_t sca_phys = virt_to_phys(sca);
3400
3401                 sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3402                 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3403                 vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
3404                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3405                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3406         } else {
3407                 struct bsca_block *sca = vcpu->kvm->arch.sca;
3408                 phys_addr_t sca_phys = virt_to_phys(sca);
3409
3410                 sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3411                 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3412                 vcpu->arch.sie_block->scaol = sca_phys;
3413                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3414         }
3415         read_unlock(&vcpu->kvm->arch.sca_lock);
3416 }
3417
3418 /* Basic SCA to Extended SCA data copy routines */
3419 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3420 {
3421         d->sda = s->sda;
3422         d->sigp_ctrl.c = s->sigp_ctrl.c;
3423         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3424 }
3425
3426 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3427 {
3428         int i;
3429
3430         d->ipte_control = s->ipte_control;
3431         d->mcn[0] = s->mcn;
3432         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3433                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3434 }
3435
3436 static int sca_switch_to_extended(struct kvm *kvm)
3437 {
3438         struct bsca_block *old_sca = kvm->arch.sca;
3439         struct esca_block *new_sca;
3440         struct kvm_vcpu *vcpu;
3441         unsigned long vcpu_idx;
3442         u32 scaol, scaoh;
3443         phys_addr_t new_sca_phys;
3444
3445         if (kvm->arch.use_esca)
3446                 return 0;
3447
3448         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3449         if (!new_sca)
3450                 return -ENOMEM;
3451
3452         new_sca_phys = virt_to_phys(new_sca);
3453         scaoh = new_sca_phys >> 32;
3454         scaol = new_sca_phys & ESCA_SCAOL_MASK;
3455
3456         kvm_s390_vcpu_block_all(kvm);
3457         write_lock(&kvm->arch.sca_lock);
3458
3459         sca_copy_b_to_e(new_sca, old_sca);
3460
3461         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3462                 vcpu->arch.sie_block->scaoh = scaoh;
3463                 vcpu->arch.sie_block->scaol = scaol;
3464                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3465         }
3466         kvm->arch.sca = new_sca;
3467         kvm->arch.use_esca = 1;
3468
3469         write_unlock(&kvm->arch.sca_lock);
3470         kvm_s390_vcpu_unblock_all(kvm);
3471
3472         free_page((unsigned long)old_sca);
3473
3474         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3475                  old_sca, kvm->arch.sca);
3476         return 0;
3477 }
3478
3479 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3480 {
3481         int rc;
3482
3483         if (!kvm_s390_use_sca_entries()) {
3484                 if (id < KVM_MAX_VCPUS)
3485                         return true;
3486                 return false;
3487         }
3488         if (id < KVM_S390_BSCA_CPU_SLOTS)
3489                 return true;
3490         if (!sclp.has_esca || !sclp.has_64bscao)
3491                 return false;
3492
3493         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3494
3495         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3496 }
3497
3498 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3499 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3500 {
3501         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3502         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3503         vcpu->arch.cputm_start = get_tod_clock_fast();
3504         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3505 }
3506
3507 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3508 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3509 {
3510         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3511         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3512         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3513         vcpu->arch.cputm_start = 0;
3514         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3515 }
3516
3517 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3518 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3519 {
3520         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3521         vcpu->arch.cputm_enabled = true;
3522         __start_cpu_timer_accounting(vcpu);
3523 }
3524
3525 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3526 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3527 {
3528         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3529         __stop_cpu_timer_accounting(vcpu);
3530         vcpu->arch.cputm_enabled = false;
3531 }
3532
3533 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3534 {
3535         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3536         __enable_cpu_timer_accounting(vcpu);
3537         preempt_enable();
3538 }
3539
3540 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3541 {
3542         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3543         __disable_cpu_timer_accounting(vcpu);
3544         preempt_enable();
3545 }
3546
3547 /* set the cpu timer - may only be called from the VCPU thread itself */
3548 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3549 {
3550         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3551         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3552         if (vcpu->arch.cputm_enabled)
3553                 vcpu->arch.cputm_start = get_tod_clock_fast();
3554         vcpu->arch.sie_block->cputm = cputm;
3555         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3556         preempt_enable();
3557 }
3558
3559 /* update and get the cpu timer - can also be called from other VCPU threads */
3560 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3561 {
3562         unsigned int seq;
3563         __u64 value;
3564
3565         if (unlikely(!vcpu->arch.cputm_enabled))
3566                 return vcpu->arch.sie_block->cputm;
3567
3568         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3569         do {
3570                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3571                 /*
3572                  * If the writer would ever execute a read in the critical
3573                  * section, e.g. in irq context, we have a deadlock.
3574                  */
3575                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3576                 value = vcpu->arch.sie_block->cputm;
3577                 /* if cputm_start is 0, accounting is being started/stopped */
3578                 if (likely(vcpu->arch.cputm_start))
3579                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3580         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3581         preempt_enable();
3582         return value;
3583 }
3584
3585 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3586 {
3587
3588         gmap_enable(vcpu->arch.enabled_gmap);
3589         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3590         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3591                 __start_cpu_timer_accounting(vcpu);
3592         vcpu->cpu = cpu;
3593 }
3594
3595 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3596 {
3597         vcpu->cpu = -1;
3598         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3599                 __stop_cpu_timer_accounting(vcpu);
3600         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3601         vcpu->arch.enabled_gmap = gmap_get_enabled();
3602         gmap_disable(vcpu->arch.enabled_gmap);
3603
3604 }
3605
3606 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3607 {
3608         mutex_lock(&vcpu->kvm->lock);
3609         preempt_disable();
3610         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3611         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3612         preempt_enable();
3613         mutex_unlock(&vcpu->kvm->lock);
3614         if (!kvm_is_ucontrol(vcpu->kvm)) {
3615                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3616                 sca_add_vcpu(vcpu);
3617         }
3618         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3619                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3620         /* make vcpu_load load the right gmap on the first trigger */
3621         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3622 }
3623
3624 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3625 {
3626         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3627             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3628                 return true;
3629         return false;
3630 }
3631
3632 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3633 {
3634         /* At least one ECC subfunction must be present */
3635         return kvm_has_pckmo_subfunc(kvm, 32) ||
3636                kvm_has_pckmo_subfunc(kvm, 33) ||
3637                kvm_has_pckmo_subfunc(kvm, 34) ||
3638                kvm_has_pckmo_subfunc(kvm, 40) ||
3639                kvm_has_pckmo_subfunc(kvm, 41);
3640
3641 }
3642
3643 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3644 {
3645         /*
3646          * If the AP instructions are not being interpreted and the MSAX3
3647          * facility is not configured for the guest, there is nothing to set up.
3648          */
3649         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3650                 return;
3651
3652         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3653         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3654         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3655         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3656
3657         if (vcpu->kvm->arch.crypto.apie)
3658                 vcpu->arch.sie_block->eca |= ECA_APIE;
3659
3660         /* Set up protected key support */
3661         if (vcpu->kvm->arch.crypto.aes_kw) {
3662                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3663                 /* ecc is also wrapped with AES key */
3664                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3665                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3666         }
3667
3668         if (vcpu->kvm->arch.crypto.dea_kw)
3669                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3670 }
3671
3672 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3673 {
3674         free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
3675         vcpu->arch.sie_block->cbrlo = 0;
3676 }
3677
3678 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3679 {
3680         void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3681
3682         if (!cbrlo_page)
3683                 return -ENOMEM;
3684
3685         vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
3686         return 0;
3687 }
3688
3689 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3690 {
3691         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3692
3693         vcpu->arch.sie_block->ibc = model->ibc;
3694         if (test_kvm_facility(vcpu->kvm, 7))
3695                 vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
3696 }
3697
3698 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3699 {
3700         int rc = 0;
3701         u16 uvrc, uvrrc;
3702
3703         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3704                                                     CPUSTAT_SM |
3705                                                     CPUSTAT_STOPPED);
3706
3707         if (test_kvm_facility(vcpu->kvm, 78))
3708                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3709         else if (test_kvm_facility(vcpu->kvm, 8))
3710                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3711
3712         kvm_s390_vcpu_setup_model(vcpu);
3713
3714         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3715         if (MACHINE_HAS_ESOP)
3716                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3717         if (test_kvm_facility(vcpu->kvm, 9))
3718                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3719         if (test_kvm_facility(vcpu->kvm, 11))
3720                 vcpu->arch.sie_block->ecb |= ECB_PTF;
3721         if (test_kvm_facility(vcpu->kvm, 73))
3722                 vcpu->arch.sie_block->ecb |= ECB_TE;
3723         if (!kvm_is_ucontrol(vcpu->kvm))
3724                 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3725
3726         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3727                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3728         if (test_kvm_facility(vcpu->kvm, 130))
3729                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3730         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3731         if (sclp.has_cei)
3732                 vcpu->arch.sie_block->eca |= ECA_CEI;
3733         if (sclp.has_ib)
3734                 vcpu->arch.sie_block->eca |= ECA_IB;
3735         if (sclp.has_siif)
3736                 vcpu->arch.sie_block->eca |= ECA_SII;
3737         if (sclp.has_sigpif)
3738                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3739         if (test_kvm_facility(vcpu->kvm, 129)) {
3740                 vcpu->arch.sie_block->eca |= ECA_VX;
3741                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3742         }
3743         if (test_kvm_facility(vcpu->kvm, 139))
3744                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3745         if (test_kvm_facility(vcpu->kvm, 156))
3746                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3747         if (vcpu->arch.sie_block->gd) {
3748                 vcpu->arch.sie_block->eca |= ECA_AIV;
3749                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3750                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3751         }
3752         vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
3753         vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
3754
3755         if (sclp.has_kss)
3756                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3757         else
3758                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3759
3760         if (vcpu->kvm->arch.use_cmma) {
3761                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3762                 if (rc)
3763                         return rc;
3764         }
3765         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3766         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3767
3768         vcpu->arch.sie_block->hpid = HPID_KVM;
3769
3770         kvm_s390_vcpu_crypto_setup(vcpu);
3771
3772         kvm_s390_vcpu_pci_setup(vcpu);
3773
3774         mutex_lock(&vcpu->kvm->lock);
3775         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3776                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3777                 if (rc)
3778                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3779         }
3780         mutex_unlock(&vcpu->kvm->lock);
3781
3782         return rc;
3783 }
3784
3785 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3786 {
3787         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3788                 return -EINVAL;
3789         return 0;
3790 }
3791
3792 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3793 {
3794         struct sie_page *sie_page;
3795         int rc;
3796
3797         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3798         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3799         if (!sie_page)
3800                 return -ENOMEM;
3801
3802         vcpu->arch.sie_block = &sie_page->sie_block;
3803         vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
3804
3805         /* the real guest size will always be smaller than msl */
3806         vcpu->arch.sie_block->mso = 0;
3807         vcpu->arch.sie_block->msl = sclp.hamax;
3808
3809         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3810         spin_lock_init(&vcpu->arch.local_int.lock);
3811         vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3812         seqcount_init(&vcpu->arch.cputm_seqcount);
3813
3814         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3815         kvm_clear_async_pf_completion_queue(vcpu);
3816         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3817                                     KVM_SYNC_GPRS |
3818                                     KVM_SYNC_ACRS |
3819                                     KVM_SYNC_CRS |
3820                                     KVM_SYNC_ARCH0 |
3821                                     KVM_SYNC_PFAULT |
3822                                     KVM_SYNC_DIAG318;
3823         kvm_s390_set_prefix(vcpu, 0);
3824         if (test_kvm_facility(vcpu->kvm, 64))
3825                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3826         if (test_kvm_facility(vcpu->kvm, 82))
3827                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3828         if (test_kvm_facility(vcpu->kvm, 133))
3829                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3830         if (test_kvm_facility(vcpu->kvm, 156))
3831                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3832         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3833          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3834          */
3835         if (MACHINE_HAS_VX)
3836                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3837         else
3838                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3839
3840         if (kvm_is_ucontrol(vcpu->kvm)) {
3841                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3842                 if (rc)
3843                         goto out_free_sie_block;
3844         }
3845
3846         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3847                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3848         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3849
3850         rc = kvm_s390_vcpu_setup(vcpu);
3851         if (rc)
3852                 goto out_ucontrol_uninit;
3853
3854         kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3855         return 0;
3856
3857 out_ucontrol_uninit:
3858         if (kvm_is_ucontrol(vcpu->kvm))
3859                 gmap_remove(vcpu->arch.gmap);
3860 out_free_sie_block:
3861         free_page((unsigned long)(vcpu->arch.sie_block));
3862         return rc;
3863 }
3864
3865 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3866 {
3867         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3868         return kvm_s390_vcpu_has_irq(vcpu, 0);
3869 }
3870
3871 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3872 {
3873         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3874 }
3875
3876 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3877 {
3878         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3879         exit_sie(vcpu);
3880 }
3881
3882 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3883 {
3884         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3885 }
3886
3887 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3888 {
3889         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3890         exit_sie(vcpu);
3891 }
3892
3893 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3894 {
3895         return atomic_read(&vcpu->arch.sie_block->prog20) &
3896                (PROG_BLOCK_SIE | PROG_REQUEST);
3897 }
3898
3899 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3900 {
3901         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3902 }
3903
3904 /*
3905  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3906  * If the CPU is not running (e.g. waiting as idle) the function will
3907  * return immediately. */
3908 void exit_sie(struct kvm_vcpu *vcpu)
3909 {
3910         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3911         kvm_s390_vsie_kick(vcpu);
3912         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3913                 cpu_relax();
3914 }
3915
3916 /* Kick a guest cpu out of SIE to process a request synchronously */
3917 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3918 {
3919         __kvm_make_request(req, vcpu);
3920         kvm_s390_vcpu_request(vcpu);
3921 }
3922
3923 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3924                               unsigned long end)
3925 {
3926         struct kvm *kvm = gmap->private;
3927         struct kvm_vcpu *vcpu;
3928         unsigned long prefix;
3929         unsigned long i;
3930
3931         if (gmap_is_shadow(gmap))
3932                 return;
3933         if (start >= 1UL << 31)
3934                 /* We are only interested in prefix pages */
3935                 return;
3936         kvm_for_each_vcpu(i, vcpu, kvm) {
3937                 /* match against both prefix pages */
3938                 prefix = kvm_s390_get_prefix(vcpu);
3939                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3940                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3941                                    start, end);
3942                         kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3943                 }
3944         }
3945 }
3946
3947 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3948 {
3949         /* do not poll with more than halt_poll_max_steal percent of steal time */
3950         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3951             READ_ONCE(halt_poll_max_steal)) {
3952                 vcpu->stat.halt_no_poll_steal++;
3953                 return true;
3954         }
3955         return false;
3956 }
3957
3958 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3959 {
3960         /* kvm common code refers to this, but never calls it */
3961         BUG();
3962         return 0;
3963 }
3964
3965 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3966                                            struct kvm_one_reg *reg)
3967 {
3968         int r = -EINVAL;
3969
3970         switch (reg->id) {
3971         case KVM_REG_S390_TODPR:
3972                 r = put_user(vcpu->arch.sie_block->todpr,
3973                              (u32 __user *)reg->addr);
3974                 break;
3975         case KVM_REG_S390_EPOCHDIFF:
3976                 r = put_user(vcpu->arch.sie_block->epoch,
3977                              (u64 __user *)reg->addr);
3978                 break;
3979         case KVM_REG_S390_CPU_TIMER:
3980                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3981                              (u64 __user *)reg->addr);
3982                 break;
3983         case KVM_REG_S390_CLOCK_COMP:
3984                 r = put_user(vcpu->arch.sie_block->ckc,
3985                              (u64 __user *)reg->addr);
3986                 break;
3987         case KVM_REG_S390_PFTOKEN:
3988                 r = put_user(vcpu->arch.pfault_token,
3989                              (u64 __user *)reg->addr);
3990                 break;
3991         case KVM_REG_S390_PFCOMPARE:
3992                 r = put_user(vcpu->arch.pfault_compare,
3993                              (u64 __user *)reg->addr);
3994                 break;
3995         case KVM_REG_S390_PFSELECT:
3996                 r = put_user(vcpu->arch.pfault_select,
3997                              (u64 __user *)reg->addr);
3998                 break;
3999         case KVM_REG_S390_PP:
4000                 r = put_user(vcpu->arch.sie_block->pp,
4001                              (u64 __user *)reg->addr);
4002                 break;
4003         case KVM_REG_S390_GBEA:
4004                 r = put_user(vcpu->arch.sie_block->gbea,
4005                              (u64 __user *)reg->addr);
4006                 break;
4007         default:
4008                 break;
4009         }
4010
4011         return r;
4012 }
4013
4014 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
4015                                            struct kvm_one_reg *reg)
4016 {
4017         int r = -EINVAL;
4018         __u64 val;
4019
4020         switch (reg->id) {
4021         case KVM_REG_S390_TODPR:
4022                 r = get_user(vcpu->arch.sie_block->todpr,
4023                              (u32 __user *)reg->addr);
4024                 break;
4025         case KVM_REG_S390_EPOCHDIFF:
4026                 r = get_user(vcpu->arch.sie_block->epoch,
4027                              (u64 __user *)reg->addr);
4028                 break;
4029         case KVM_REG_S390_CPU_TIMER:
4030                 r = get_user(val, (u64 __user *)reg->addr);
4031                 if (!r)
4032                         kvm_s390_set_cpu_timer(vcpu, val);
4033                 break;
4034         case KVM_REG_S390_CLOCK_COMP:
4035                 r = get_user(vcpu->arch.sie_block->ckc,
4036                              (u64 __user *)reg->addr);
4037                 break;
4038         case KVM_REG_S390_PFTOKEN:
4039                 r = get_user(vcpu->arch.pfault_token,
4040                              (u64 __user *)reg->addr);
4041                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4042                         kvm_clear_async_pf_completion_queue(vcpu);
4043                 break;
4044         case KVM_REG_S390_PFCOMPARE:
4045                 r = get_user(vcpu->arch.pfault_compare,
4046                              (u64 __user *)reg->addr);
4047                 break;
4048         case KVM_REG_S390_PFSELECT:
4049                 r = get_user(vcpu->arch.pfault_select,
4050                              (u64 __user *)reg->addr);
4051                 break;
4052         case KVM_REG_S390_PP:
4053                 r = get_user(vcpu->arch.sie_block->pp,
4054                              (u64 __user *)reg->addr);
4055                 break;
4056         case KVM_REG_S390_GBEA:
4057                 r = get_user(vcpu->arch.sie_block->gbea,
4058                              (u64 __user *)reg->addr);
4059                 break;
4060         default:
4061                 break;
4062         }
4063
4064         return r;
4065 }
4066
4067 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4068 {
4069         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4070         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4071         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4072
4073         kvm_clear_async_pf_completion_queue(vcpu);
4074         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4075                 kvm_s390_vcpu_stop(vcpu);
4076         kvm_s390_clear_local_irqs(vcpu);
4077 }
4078
4079 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4080 {
4081         /* Initial reset is a superset of the normal reset */
4082         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4083
4084         /*
4085          * This equals initial cpu reset in pop, but we don't switch to ESA.
4086          * We do not only reset the internal data, but also ...
4087          */
4088         vcpu->arch.sie_block->gpsw.mask = 0;
4089         vcpu->arch.sie_block->gpsw.addr = 0;
4090         kvm_s390_set_prefix(vcpu, 0);
4091         kvm_s390_set_cpu_timer(vcpu, 0);
4092         vcpu->arch.sie_block->ckc = 0;
4093         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4094         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4095         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4096
4097         /* ... the data in sync regs */
4098         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4099         vcpu->run->s.regs.ckc = 0;
4100         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4101         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4102         vcpu->run->psw_addr = 0;
4103         vcpu->run->psw_mask = 0;
4104         vcpu->run->s.regs.todpr = 0;
4105         vcpu->run->s.regs.cputm = 0;
4106         vcpu->run->s.regs.ckc = 0;
4107         vcpu->run->s.regs.pp = 0;
4108         vcpu->run->s.regs.gbea = 1;
4109         vcpu->run->s.regs.fpc = 0;
4110         /*
4111          * Do not reset these registers in the protected case, as some of
4112          * them are overlayed and they are not accessible in this case
4113          * anyway.
4114          */
4115         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4116                 vcpu->arch.sie_block->gbea = 1;
4117                 vcpu->arch.sie_block->pp = 0;
4118                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4119                 vcpu->arch.sie_block->todpr = 0;
4120         }
4121 }
4122
4123 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4124 {
4125         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4126
4127         /* Clear reset is a superset of the initial reset */
4128         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4129
4130         memset(&regs->gprs, 0, sizeof(regs->gprs));
4131         memset(&regs->vrs, 0, sizeof(regs->vrs));
4132         memset(&regs->acrs, 0, sizeof(regs->acrs));
4133         memset(&regs->gscb, 0, sizeof(regs->gscb));
4134
4135         regs->etoken = 0;
4136         regs->etoken_extension = 0;
4137 }
4138
4139 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4140 {
4141         vcpu_load(vcpu);
4142         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
4143         vcpu_put(vcpu);
4144         return 0;
4145 }
4146
4147 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4148 {
4149         vcpu_load(vcpu);
4150         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4151         vcpu_put(vcpu);
4152         return 0;
4153 }
4154
4155 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4156                                   struct kvm_sregs *sregs)
4157 {
4158         vcpu_load(vcpu);
4159
4160         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4161         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4162
4163         vcpu_put(vcpu);
4164         return 0;
4165 }
4166
4167 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4168                                   struct kvm_sregs *sregs)
4169 {
4170         vcpu_load(vcpu);
4171
4172         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4173         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4174
4175         vcpu_put(vcpu);
4176         return 0;
4177 }
4178
4179 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4180 {
4181         int ret = 0;
4182
4183         vcpu_load(vcpu);
4184
4185         if (test_fp_ctl(fpu->fpc)) {
4186                 ret = -EINVAL;
4187                 goto out;
4188         }
4189         vcpu->run->s.regs.fpc = fpu->fpc;
4190         if (MACHINE_HAS_VX)
4191                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4192                                  (freg_t *) fpu->fprs);
4193         else
4194                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4195
4196 out:
4197         vcpu_put(vcpu);
4198         return ret;
4199 }
4200
4201 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4202 {
4203         vcpu_load(vcpu);
4204
4205         /* make sure we have the latest values */
4206         save_fpu_regs();
4207         if (MACHINE_HAS_VX)
4208                 convert_vx_to_fp((freg_t *) fpu->fprs,
4209                                  (__vector128 *) vcpu->run->s.regs.vrs);
4210         else
4211                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4212         fpu->fpc = vcpu->run->s.regs.fpc;
4213
4214         vcpu_put(vcpu);
4215         return 0;
4216 }
4217
4218 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4219 {
4220         int rc = 0;
4221
4222         if (!is_vcpu_stopped(vcpu))
4223                 rc = -EBUSY;
4224         else {
4225                 vcpu->run->psw_mask = psw.mask;
4226                 vcpu->run->psw_addr = psw.addr;
4227         }
4228         return rc;
4229 }
4230
4231 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4232                                   struct kvm_translation *tr)
4233 {
4234         return -EINVAL; /* not implemented yet */
4235 }
4236
4237 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4238                               KVM_GUESTDBG_USE_HW_BP | \
4239                               KVM_GUESTDBG_ENABLE)
4240
4241 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4242                                         struct kvm_guest_debug *dbg)
4243 {
4244         int rc = 0;
4245
4246         vcpu_load(vcpu);
4247
4248         vcpu->guest_debug = 0;
4249         kvm_s390_clear_bp_data(vcpu);
4250
4251         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4252                 rc = -EINVAL;
4253                 goto out;
4254         }
4255         if (!sclp.has_gpere) {
4256                 rc = -EINVAL;
4257                 goto out;
4258         }
4259
4260         if (dbg->control & KVM_GUESTDBG_ENABLE) {
4261                 vcpu->guest_debug = dbg->control;
4262                 /* enforce guest PER */
4263                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4264
4265                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4266                         rc = kvm_s390_import_bp_data(vcpu, dbg);
4267         } else {
4268                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4269                 vcpu->arch.guestdbg.last_bp = 0;
4270         }
4271
4272         if (rc) {
4273                 vcpu->guest_debug = 0;
4274                 kvm_s390_clear_bp_data(vcpu);
4275                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4276         }
4277
4278 out:
4279         vcpu_put(vcpu);
4280         return rc;
4281 }
4282
4283 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4284                                     struct kvm_mp_state *mp_state)
4285 {
4286         int ret;
4287
4288         vcpu_load(vcpu);
4289
4290         /* CHECK_STOP and LOAD are not supported yet */
4291         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4292                                       KVM_MP_STATE_OPERATING;
4293
4294         vcpu_put(vcpu);
4295         return ret;
4296 }
4297
4298 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4299                                     struct kvm_mp_state *mp_state)
4300 {
4301         int rc = 0;
4302
4303         vcpu_load(vcpu);
4304
4305         /* user space knows about this interface - let it control the state */
4306         kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4307
4308         switch (mp_state->mp_state) {
4309         case KVM_MP_STATE_STOPPED:
4310                 rc = kvm_s390_vcpu_stop(vcpu);
4311                 break;
4312         case KVM_MP_STATE_OPERATING:
4313                 rc = kvm_s390_vcpu_start(vcpu);
4314                 break;
4315         case KVM_MP_STATE_LOAD:
4316                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4317                         rc = -ENXIO;
4318                         break;
4319                 }
4320                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4321                 break;
4322         case KVM_MP_STATE_CHECK_STOP:
4323                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
4324         default:
4325                 rc = -ENXIO;
4326         }
4327
4328         vcpu_put(vcpu);
4329         return rc;
4330 }
4331
4332 static bool ibs_enabled(struct kvm_vcpu *vcpu)
4333 {
4334         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4335 }
4336
4337 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4338 {
4339 retry:
4340         kvm_s390_vcpu_request_handled(vcpu);
4341         if (!kvm_request_pending(vcpu))
4342                 return 0;
4343         /*
4344          * If the guest prefix changed, re-arm the ipte notifier for the
4345          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4346          * This ensures that the ipte instruction for this request has
4347          * already finished. We might race against a second unmapper that
4348          * wants to set the blocking bit. Lets just retry the request loop.
4349          */
4350         if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4351                 int rc;
4352                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
4353                                           kvm_s390_get_prefix(vcpu),
4354                                           PAGE_SIZE * 2, PROT_WRITE);
4355                 if (rc) {
4356                         kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4357                         return rc;
4358                 }
4359                 goto retry;
4360         }
4361
4362         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4363                 vcpu->arch.sie_block->ihcpu = 0xffff;
4364                 goto retry;
4365         }
4366
4367         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4368                 if (!ibs_enabled(vcpu)) {
4369                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4370                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4371                 }
4372                 goto retry;
4373         }
4374
4375         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4376                 if (ibs_enabled(vcpu)) {
4377                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4378                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4379                 }
4380                 goto retry;
4381         }
4382
4383         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4384                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4385                 goto retry;
4386         }
4387
4388         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4389                 /*
4390                  * Disable CMM virtualization; we will emulate the ESSA
4391                  * instruction manually, in order to provide additional
4392                  * functionalities needed for live migration.
4393                  */
4394                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4395                 goto retry;
4396         }
4397
4398         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4399                 /*
4400                  * Re-enable CMM virtualization if CMMA is available and
4401                  * CMM has been used.
4402                  */
4403                 if ((vcpu->kvm->arch.use_cmma) &&
4404                     (vcpu->kvm->mm->context.uses_cmm))
4405                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4406                 goto retry;
4407         }
4408
4409         /* we left the vsie handler, nothing to do, just clear the request */
4410         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4411
4412         return 0;
4413 }
4414
4415 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4416 {
4417         struct kvm_vcpu *vcpu;
4418         union tod_clock clk;
4419         unsigned long i;
4420
4421         preempt_disable();
4422
4423         store_tod_clock_ext(&clk);
4424
4425         kvm->arch.epoch = gtod->tod - clk.tod;
4426         kvm->arch.epdx = 0;
4427         if (test_kvm_facility(kvm, 139)) {
4428                 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4429                 if (kvm->arch.epoch > gtod->tod)
4430                         kvm->arch.epdx -= 1;
4431         }
4432
4433         kvm_s390_vcpu_block_all(kvm);
4434         kvm_for_each_vcpu(i, vcpu, kvm) {
4435                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4436                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
4437         }
4438
4439         kvm_s390_vcpu_unblock_all(kvm);
4440         preempt_enable();
4441 }
4442
4443 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4444 {
4445         if (!mutex_trylock(&kvm->lock))
4446                 return 0;
4447         __kvm_s390_set_tod_clock(kvm, gtod);
4448         mutex_unlock(&kvm->lock);
4449         return 1;
4450 }
4451
4452 /**
4453  * kvm_arch_fault_in_page - fault-in guest page if necessary
4454  * @vcpu: The corresponding virtual cpu
4455  * @gpa: Guest physical address
4456  * @writable: Whether the page should be writable or not
4457  *
4458  * Make sure that a guest page has been faulted-in on the host.
4459  *
4460  * Return: Zero on success, negative error code otherwise.
4461  */
4462 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4463 {
4464         return gmap_fault(vcpu->arch.gmap, gpa,
4465                           writable ? FAULT_FLAG_WRITE : 0);
4466 }
4467
4468 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4469                                       unsigned long token)
4470 {
4471         struct kvm_s390_interrupt inti;
4472         struct kvm_s390_irq irq;
4473
4474         if (start_token) {
4475                 irq.u.ext.ext_params2 = token;
4476                 irq.type = KVM_S390_INT_PFAULT_INIT;
4477                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4478         } else {
4479                 inti.type = KVM_S390_INT_PFAULT_DONE;
4480                 inti.parm64 = token;
4481                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4482         }
4483 }
4484
4485 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4486                                      struct kvm_async_pf *work)
4487 {
4488         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4489         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4490
4491         return true;
4492 }
4493
4494 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4495                                  struct kvm_async_pf *work)
4496 {
4497         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4498         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4499 }
4500
4501 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4502                                struct kvm_async_pf *work)
4503 {
4504         /* s390 will always inject the page directly */
4505 }
4506
4507 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4508 {
4509         /*
4510          * s390 will always inject the page directly,
4511          * but we still want check_async_completion to cleanup
4512          */
4513         return true;
4514 }
4515
4516 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4517 {
4518         hva_t hva;
4519         struct kvm_arch_async_pf arch;
4520
4521         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4522                 return false;
4523         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4524             vcpu->arch.pfault_compare)
4525                 return false;
4526         if (psw_extint_disabled(vcpu))
4527                 return false;
4528         if (kvm_s390_vcpu_has_irq(vcpu, 0))
4529                 return false;
4530         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4531                 return false;
4532         if (!vcpu->arch.gmap->pfault_enabled)
4533                 return false;
4534
4535         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4536         hva += current->thread.gmap_addr & ~PAGE_MASK;
4537         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4538                 return false;
4539
4540         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4541 }
4542
4543 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4544 {
4545         int rc, cpuflags;
4546
4547         /*
4548          * On s390 notifications for arriving pages will be delivered directly
4549          * to the guest but the house keeping for completed pfaults is
4550          * handled outside the worker.
4551          */
4552         kvm_check_async_pf_completion(vcpu);
4553
4554         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4555         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4556
4557         if (need_resched())
4558                 schedule();
4559
4560         if (!kvm_is_ucontrol(vcpu->kvm)) {
4561                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4562                 if (rc)
4563                         return rc;
4564         }
4565
4566         rc = kvm_s390_handle_requests(vcpu);
4567         if (rc)
4568                 return rc;
4569
4570         if (guestdbg_enabled(vcpu)) {
4571                 kvm_s390_backup_guest_per_regs(vcpu);
4572                 kvm_s390_patch_guest_per_regs(vcpu);
4573         }
4574
4575         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4576
4577         vcpu->arch.sie_block->icptcode = 0;
4578         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4579         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4580         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4581
4582         return 0;
4583 }
4584
4585 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4586 {
4587         struct kvm_s390_pgm_info pgm_info = {
4588                 .code = PGM_ADDRESSING,
4589         };
4590         u8 opcode, ilen;
4591         int rc;
4592
4593         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4594         trace_kvm_s390_sie_fault(vcpu);
4595
4596         /*
4597          * We want to inject an addressing exception, which is defined as a
4598          * suppressing or terminating exception. However, since we came here
4599          * by a DAT access exception, the PSW still points to the faulting
4600          * instruction since DAT exceptions are nullifying. So we've got
4601          * to look up the current opcode to get the length of the instruction
4602          * to be able to forward the PSW.
4603          */
4604         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4605         ilen = insn_length(opcode);
4606         if (rc < 0) {
4607                 return rc;
4608         } else if (rc) {
4609                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4610                  * Forward by arbitrary ilc, injection will take care of
4611                  * nullification if necessary.
4612                  */
4613                 pgm_info = vcpu->arch.pgm;
4614                 ilen = 4;
4615         }
4616         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4617         kvm_s390_forward_psw(vcpu, ilen);
4618         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4619 }
4620
4621 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4622 {
4623         struct mcck_volatile_info *mcck_info;
4624         struct sie_page *sie_page;
4625
4626         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4627                    vcpu->arch.sie_block->icptcode);
4628         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4629
4630         if (guestdbg_enabled(vcpu))
4631                 kvm_s390_restore_guest_per_regs(vcpu);
4632
4633         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4634         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4635
4636         if (exit_reason == -EINTR) {
4637                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4638                 sie_page = container_of(vcpu->arch.sie_block,
4639                                         struct sie_page, sie_block);
4640                 mcck_info = &sie_page->mcck_info;
4641                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4642                 return 0;
4643         }
4644
4645         if (vcpu->arch.sie_block->icptcode > 0) {
4646                 int rc = kvm_handle_sie_intercept(vcpu);
4647
4648                 if (rc != -EOPNOTSUPP)
4649                         return rc;
4650                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4651                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4652                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4653                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4654                 return -EREMOTE;
4655         } else if (exit_reason != -EFAULT) {
4656                 vcpu->stat.exit_null++;
4657                 return 0;
4658         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4659                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4660                 vcpu->run->s390_ucontrol.trans_exc_code =
4661                                                 current->thread.gmap_addr;
4662                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4663                 return -EREMOTE;
4664         } else if (current->thread.gmap_pfault) {
4665                 trace_kvm_s390_major_guest_pfault(vcpu);
4666                 current->thread.gmap_pfault = 0;
4667                 if (kvm_arch_setup_async_pf(vcpu))
4668                         return 0;
4669                 vcpu->stat.pfault_sync++;
4670                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4671         }
4672         return vcpu_post_run_fault_in_sie(vcpu);
4673 }
4674
4675 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4676 static int __vcpu_run(struct kvm_vcpu *vcpu)
4677 {
4678         int rc, exit_reason;
4679         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4680
4681         /*
4682          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4683          * ning the guest), so that memslots (and other stuff) are protected
4684          */
4685         kvm_vcpu_srcu_read_lock(vcpu);
4686
4687         do {
4688                 rc = vcpu_pre_run(vcpu);
4689                 if (rc)
4690                         break;
4691
4692                 kvm_vcpu_srcu_read_unlock(vcpu);
4693                 /*
4694                  * As PF_VCPU will be used in fault handler, between
4695                  * guest_enter and guest_exit should be no uaccess.
4696                  */
4697                 local_irq_disable();
4698                 guest_enter_irqoff();
4699                 __disable_cpu_timer_accounting(vcpu);
4700                 local_irq_enable();
4701                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4702                         memcpy(sie_page->pv_grregs,
4703                                vcpu->run->s.regs.gprs,
4704                                sizeof(sie_page->pv_grregs));
4705                 }
4706                 if (test_cpu_flag(CIF_FPU))
4707                         load_fpu_regs();
4708                 exit_reason = sie64a(vcpu->arch.sie_block,
4709                                      vcpu->run->s.regs.gprs);
4710                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4711                         memcpy(vcpu->run->s.regs.gprs,
4712                                sie_page->pv_grregs,
4713                                sizeof(sie_page->pv_grregs));
4714                         /*
4715                          * We're not allowed to inject interrupts on intercepts
4716                          * that leave the guest state in an "in-between" state
4717                          * where the next SIE entry will do a continuation.
4718                          * Fence interrupts in our "internal" PSW.
4719                          */
4720                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4721                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4722                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4723                         }
4724                 }
4725                 local_irq_disable();
4726                 __enable_cpu_timer_accounting(vcpu);
4727                 guest_exit_irqoff();
4728                 local_irq_enable();
4729                 kvm_vcpu_srcu_read_lock(vcpu);
4730
4731                 rc = vcpu_post_run(vcpu, exit_reason);
4732         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4733
4734         kvm_vcpu_srcu_read_unlock(vcpu);
4735         return rc;
4736 }
4737
4738 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4739 {
4740         struct kvm_run *kvm_run = vcpu->run;
4741         struct runtime_instr_cb *riccb;
4742         struct gs_cb *gscb;
4743
4744         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4745         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4746         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4747         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4748         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4749                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4750                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4751                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4752         }
4753         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4754                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4755                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4756                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4757                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4758                         kvm_clear_async_pf_completion_queue(vcpu);
4759         }
4760         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4761                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4762                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4763                 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4764         }
4765         /*
4766          * If userspace sets the riccb (e.g. after migration) to a valid state,
4767          * we should enable RI here instead of doing the lazy enablement.
4768          */
4769         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4770             test_kvm_facility(vcpu->kvm, 64) &&
4771             riccb->v &&
4772             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4773                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4774                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4775         }
4776         /*
4777          * If userspace sets the gscb (e.g. after migration) to non-zero,
4778          * we should enable GS here instead of doing the lazy enablement.
4779          */
4780         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4781             test_kvm_facility(vcpu->kvm, 133) &&
4782             gscb->gssm &&
4783             !vcpu->arch.gs_enabled) {
4784                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4785                 vcpu->arch.sie_block->ecb |= ECB_GS;
4786                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4787                 vcpu->arch.gs_enabled = 1;
4788         }
4789         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4790             test_kvm_facility(vcpu->kvm, 82)) {
4791                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4792                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4793         }
4794         if (MACHINE_HAS_GS) {
4795                 preempt_disable();
4796                 __ctl_set_bit(2, 4);
4797                 if (current->thread.gs_cb) {
4798                         vcpu->arch.host_gscb = current->thread.gs_cb;
4799                         save_gs_cb(vcpu->arch.host_gscb);
4800                 }
4801                 if (vcpu->arch.gs_enabled) {
4802                         current->thread.gs_cb = (struct gs_cb *)
4803                                                 &vcpu->run->s.regs.gscb;
4804                         restore_gs_cb(current->thread.gs_cb);
4805                 }
4806                 preempt_enable();
4807         }
4808         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4809 }
4810
4811 static void sync_regs(struct kvm_vcpu *vcpu)
4812 {
4813         struct kvm_run *kvm_run = vcpu->run;
4814
4815         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4816                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4817         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4818                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4819                 /* some control register changes require a tlb flush */
4820                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4821         }
4822         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4823                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4824                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4825         }
4826         save_access_regs(vcpu->arch.host_acrs);
4827         restore_access_regs(vcpu->run->s.regs.acrs);
4828         /* save host (userspace) fprs/vrs */
4829         save_fpu_regs();
4830         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4831         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4832         if (MACHINE_HAS_VX)
4833                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4834         else
4835                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4836         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4837         if (test_fp_ctl(current->thread.fpu.fpc))
4838                 /* User space provided an invalid FPC, let's clear it */
4839                 current->thread.fpu.fpc = 0;
4840
4841         /* Sync fmt2 only data */
4842         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4843                 sync_regs_fmt2(vcpu);
4844         } else {
4845                 /*
4846                  * In several places we have to modify our internal view to
4847                  * not do things that are disallowed by the ultravisor. For
4848                  * example we must not inject interrupts after specific exits
4849                  * (e.g. 112 prefix page not secure). We do this by turning
4850                  * off the machine check, external and I/O interrupt bits
4851                  * of our PSW copy. To avoid getting validity intercepts, we
4852                  * do only accept the condition code from userspace.
4853                  */
4854                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4855                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4856                                                    PSW_MASK_CC;
4857         }
4858
4859         kvm_run->kvm_dirty_regs = 0;
4860 }
4861
4862 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4863 {
4864         struct kvm_run *kvm_run = vcpu->run;
4865
4866         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4867         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4868         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4869         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4870         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4871         if (MACHINE_HAS_GS) {
4872                 preempt_disable();
4873                 __ctl_set_bit(2, 4);
4874                 if (vcpu->arch.gs_enabled)
4875                         save_gs_cb(current->thread.gs_cb);
4876                 current->thread.gs_cb = vcpu->arch.host_gscb;
4877                 restore_gs_cb(vcpu->arch.host_gscb);
4878                 if (!vcpu->arch.host_gscb)
4879                         __ctl_clear_bit(2, 4);
4880                 vcpu->arch.host_gscb = NULL;
4881                 preempt_enable();
4882         }
4883         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4884 }
4885
4886 static void store_regs(struct kvm_vcpu *vcpu)
4887 {
4888         struct kvm_run *kvm_run = vcpu->run;
4889
4890         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4891         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4892         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4893         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4894         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4895         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4896         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4897         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4898         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4899         save_access_regs(vcpu->run->s.regs.acrs);
4900         restore_access_regs(vcpu->arch.host_acrs);
4901         /* Save guest register state */
4902         save_fpu_regs();
4903         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4904         /* Restore will be done lazily at return */
4905         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4906         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4907         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4908                 store_regs_fmt2(vcpu);
4909 }
4910
4911 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4912 {
4913         struct kvm_run *kvm_run = vcpu->run;
4914         int rc;
4915
4916         /*
4917          * Running a VM while dumping always has the potential to
4918          * produce inconsistent dump data. But for PV vcpus a SIE
4919          * entry while dumping could also lead to a fatal validity
4920          * intercept which we absolutely want to avoid.
4921          */
4922         if (vcpu->kvm->arch.pv.dumping)
4923                 return -EINVAL;
4924
4925         if (kvm_run->immediate_exit)
4926                 return -EINTR;
4927
4928         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4929             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4930                 return -EINVAL;
4931
4932         vcpu_load(vcpu);
4933
4934         if (guestdbg_exit_pending(vcpu)) {
4935                 kvm_s390_prepare_debug_exit(vcpu);
4936                 rc = 0;
4937                 goto out;
4938         }
4939
4940         kvm_sigset_activate(vcpu);
4941
4942         /*
4943          * no need to check the return value of vcpu_start as it can only have
4944          * an error for protvirt, but protvirt means user cpu state
4945          */
4946         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4947                 kvm_s390_vcpu_start(vcpu);
4948         } else if (is_vcpu_stopped(vcpu)) {
4949                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4950                                    vcpu->vcpu_id);
4951                 rc = -EINVAL;
4952                 goto out;
4953         }
4954
4955         sync_regs(vcpu);
4956         enable_cpu_timer_accounting(vcpu);
4957
4958         might_fault();
4959         rc = __vcpu_run(vcpu);
4960
4961         if (signal_pending(current) && !rc) {
4962                 kvm_run->exit_reason = KVM_EXIT_INTR;
4963                 rc = -EINTR;
4964         }
4965
4966         if (guestdbg_exit_pending(vcpu) && !rc)  {
4967                 kvm_s390_prepare_debug_exit(vcpu);
4968                 rc = 0;
4969         }
4970
4971         if (rc == -EREMOTE) {
4972                 /* userspace support is needed, kvm_run has been prepared */
4973                 rc = 0;
4974         }
4975
4976         disable_cpu_timer_accounting(vcpu);
4977         store_regs(vcpu);
4978
4979         kvm_sigset_deactivate(vcpu);
4980
4981         vcpu->stat.exit_userspace++;
4982 out:
4983         vcpu_put(vcpu);
4984         return rc;
4985 }
4986
4987 /*
4988  * store status at address
4989  * we use have two special cases:
4990  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4991  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4992  */
4993 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4994 {
4995         unsigned char archmode = 1;
4996         freg_t fprs[NUM_FPRS];
4997         unsigned int px;
4998         u64 clkcomp, cputm;
4999         int rc;
5000
5001         px = kvm_s390_get_prefix(vcpu);
5002         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
5003                 if (write_guest_abs(vcpu, 163, &archmode, 1))
5004                         return -EFAULT;
5005                 gpa = 0;
5006         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
5007                 if (write_guest_real(vcpu, 163, &archmode, 1))
5008                         return -EFAULT;
5009                 gpa = px;
5010         } else
5011                 gpa -= __LC_FPREGS_SAVE_AREA;
5012
5013         /* manually convert vector registers if necessary */
5014         if (MACHINE_HAS_VX) {
5015                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
5016                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5017                                      fprs, 128);
5018         } else {
5019                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5020                                      vcpu->run->s.regs.fprs, 128);
5021         }
5022         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
5023                               vcpu->run->s.regs.gprs, 128);
5024         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
5025                               &vcpu->arch.sie_block->gpsw, 16);
5026         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
5027                               &px, 4);
5028         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
5029                               &vcpu->run->s.regs.fpc, 4);
5030         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
5031                               &vcpu->arch.sie_block->todpr, 4);
5032         cputm = kvm_s390_get_cpu_timer(vcpu);
5033         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
5034                               &cputm, 8);
5035         clkcomp = vcpu->arch.sie_block->ckc >> 8;
5036         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
5037                               &clkcomp, 8);
5038         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
5039                               &vcpu->run->s.regs.acrs, 64);
5040         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
5041                               &vcpu->arch.sie_block->gcr, 128);
5042         return rc ? -EFAULT : 0;
5043 }
5044
5045 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
5046 {
5047         /*
5048          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5049          * switch in the run ioctl. Let's update our copies before we save
5050          * it into the save area
5051          */
5052         save_fpu_regs();
5053         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
5054         save_access_regs(vcpu->run->s.regs.acrs);
5055
5056         return kvm_s390_store_status_unloaded(vcpu, addr);
5057 }
5058
5059 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5060 {
5061         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5062         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5063 }
5064
5065 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5066 {
5067         unsigned long i;
5068         struct kvm_vcpu *vcpu;
5069
5070         kvm_for_each_vcpu(i, vcpu, kvm) {
5071                 __disable_ibs_on_vcpu(vcpu);
5072         }
5073 }
5074
5075 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5076 {
5077         if (!sclp.has_ibs)
5078                 return;
5079         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5080         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5081 }
5082
5083 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5084 {
5085         int i, online_vcpus, r = 0, started_vcpus = 0;
5086
5087         if (!is_vcpu_stopped(vcpu))
5088                 return 0;
5089
5090         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5091         /* Only one cpu at a time may enter/leave the STOPPED state. */
5092         spin_lock(&vcpu->kvm->arch.start_stop_lock);
5093         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5094
5095         /* Let's tell the UV that we want to change into the operating state */
5096         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5097                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5098                 if (r) {
5099                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5100                         return r;
5101                 }
5102         }
5103
5104         for (i = 0; i < online_vcpus; i++) {
5105                 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5106                         started_vcpus++;
5107         }
5108
5109         if (started_vcpus == 0) {
5110                 /* we're the only active VCPU -> speed it up */
5111                 __enable_ibs_on_vcpu(vcpu);
5112         } else if (started_vcpus == 1) {
5113                 /*
5114                  * As we are starting a second VCPU, we have to disable
5115                  * the IBS facility on all VCPUs to remove potentially
5116                  * outstanding ENABLE requests.
5117                  */
5118                 __disable_ibs_on_all_vcpus(vcpu->kvm);
5119         }
5120
5121         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5122         /*
5123          * The real PSW might have changed due to a RESTART interpreted by the
5124          * ultravisor. We block all interrupts and let the next sie exit
5125          * refresh our view.
5126          */
5127         if (kvm_s390_pv_cpu_is_protected(vcpu))
5128                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5129         /*
5130          * Another VCPU might have used IBS while we were offline.
5131          * Let's play safe and flush the VCPU at startup.
5132          */
5133         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5134         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5135         return 0;
5136 }
5137
5138 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5139 {
5140         int i, online_vcpus, r = 0, started_vcpus = 0;
5141         struct kvm_vcpu *started_vcpu = NULL;
5142
5143         if (is_vcpu_stopped(vcpu))
5144                 return 0;
5145
5146         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5147         /* Only one cpu at a time may enter/leave the STOPPED state. */
5148         spin_lock(&vcpu->kvm->arch.start_stop_lock);
5149         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5150
5151         /* Let's tell the UV that we want to change into the stopped state */
5152         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5153                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5154                 if (r) {
5155                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5156                         return r;
5157                 }
5158         }
5159
5160         /*
5161          * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5162          * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5163          * have been fully processed. This will ensure that the VCPU
5164          * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5165          */
5166         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5167         kvm_s390_clear_stop_irq(vcpu);
5168
5169         __disable_ibs_on_vcpu(vcpu);
5170
5171         for (i = 0; i < online_vcpus; i++) {
5172                 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5173
5174                 if (!is_vcpu_stopped(tmp)) {
5175                         started_vcpus++;
5176                         started_vcpu = tmp;
5177                 }
5178         }
5179
5180         if (started_vcpus == 1) {
5181                 /*
5182                  * As we only have one VCPU left, we want to enable the
5183                  * IBS facility for that VCPU to speed it up.
5184                  */
5185                 __enable_ibs_on_vcpu(started_vcpu);
5186         }
5187
5188         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5189         return 0;
5190 }
5191
5192 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5193                                      struct kvm_enable_cap *cap)
5194 {
5195         int r;
5196
5197         if (cap->flags)
5198                 return -EINVAL;
5199
5200         switch (cap->cap) {
5201         case KVM_CAP_S390_CSS_SUPPORT:
5202                 if (!vcpu->kvm->arch.css_support) {
5203                         vcpu->kvm->arch.css_support = 1;
5204                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5205                         trace_kvm_s390_enable_css(vcpu->kvm);
5206                 }
5207                 r = 0;
5208                 break;
5209         default:
5210                 r = -EINVAL;
5211                 break;
5212         }
5213         return r;
5214 }
5215
5216 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5217                                   struct kvm_s390_mem_op *mop)
5218 {
5219         void __user *uaddr = (void __user *)mop->buf;
5220         void *sida_addr;
5221         int r = 0;
5222
5223         if (mop->flags || !mop->size)
5224                 return -EINVAL;
5225         if (mop->size + mop->sida_offset < mop->size)
5226                 return -EINVAL;
5227         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5228                 return -E2BIG;
5229         if (!kvm_s390_pv_cpu_is_protected(vcpu))
5230                 return -EINVAL;
5231
5232         sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
5233
5234         switch (mop->op) {
5235         case KVM_S390_MEMOP_SIDA_READ:
5236                 if (copy_to_user(uaddr, sida_addr, mop->size))
5237                         r = -EFAULT;
5238
5239                 break;
5240         case KVM_S390_MEMOP_SIDA_WRITE:
5241                 if (copy_from_user(sida_addr, uaddr, mop->size))
5242                         r = -EFAULT;
5243                 break;
5244         }
5245         return r;
5246 }
5247
5248 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5249                                  struct kvm_s390_mem_op *mop)
5250 {
5251         void __user *uaddr = (void __user *)mop->buf;
5252         void *tmpbuf = NULL;
5253         int r = 0;
5254         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
5255                                     | KVM_S390_MEMOP_F_CHECK_ONLY
5256                                     | KVM_S390_MEMOP_F_SKEY_PROTECTION;
5257
5258         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
5259                 return -EINVAL;
5260         if (mop->size > MEM_OP_MAX_SIZE)
5261                 return -E2BIG;
5262         if (kvm_s390_pv_cpu_is_protected(vcpu))
5263                 return -EINVAL;
5264         if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
5265                 if (access_key_invalid(mop->key))
5266                         return -EINVAL;
5267         } else {
5268                 mop->key = 0;
5269         }
5270         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5271                 tmpbuf = vmalloc(mop->size);
5272                 if (!tmpbuf)
5273                         return -ENOMEM;
5274         }
5275
5276         switch (mop->op) {
5277         case KVM_S390_MEMOP_LOGICAL_READ:
5278                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5279                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5280                                             GACC_FETCH, mop->key);
5281                         break;
5282                 }
5283                 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5284                                         mop->size, mop->key);
5285                 if (r == 0) {
5286                         if (copy_to_user(uaddr, tmpbuf, mop->size))
5287                                 r = -EFAULT;
5288                 }
5289                 break;
5290         case KVM_S390_MEMOP_LOGICAL_WRITE:
5291                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5292                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5293                                             GACC_STORE, mop->key);
5294                         break;
5295                 }
5296                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5297                         r = -EFAULT;
5298                         break;
5299                 }
5300                 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5301                                          mop->size, mop->key);
5302                 break;
5303         }
5304
5305         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5306                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5307
5308         vfree(tmpbuf);
5309         return r;
5310 }
5311
5312 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5313                                      struct kvm_s390_mem_op *mop)
5314 {
5315         int r, srcu_idx;
5316
5317         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5318
5319         switch (mop->op) {
5320         case KVM_S390_MEMOP_LOGICAL_READ:
5321         case KVM_S390_MEMOP_LOGICAL_WRITE:
5322                 r = kvm_s390_vcpu_mem_op(vcpu, mop);
5323                 break;
5324         case KVM_S390_MEMOP_SIDA_READ:
5325         case KVM_S390_MEMOP_SIDA_WRITE:
5326                 /* we are locked against sida going away by the vcpu->mutex */
5327                 r = kvm_s390_vcpu_sida_op(vcpu, mop);
5328                 break;
5329         default:
5330                 r = -EINVAL;
5331         }
5332
5333         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5334         return r;
5335 }
5336
5337 long kvm_arch_vcpu_async_ioctl(struct file *filp,
5338                                unsigned int ioctl, unsigned long arg)
5339 {
5340         struct kvm_vcpu *vcpu = filp->private_data;
5341         void __user *argp = (void __user *)arg;
5342
5343         switch (ioctl) {
5344         case KVM_S390_IRQ: {
5345                 struct kvm_s390_irq s390irq;
5346
5347                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5348                         return -EFAULT;
5349                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
5350         }
5351         case KVM_S390_INTERRUPT: {
5352                 struct kvm_s390_interrupt s390int;
5353                 struct kvm_s390_irq s390irq = {};
5354
5355                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
5356                         return -EFAULT;
5357                 if (s390int_to_s390irq(&s390int, &s390irq))
5358                         return -EINVAL;
5359                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
5360         }
5361         }
5362         return -ENOIOCTLCMD;
5363 }
5364
5365 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5366                                         struct kvm_pv_cmd *cmd)
5367 {
5368         struct kvm_s390_pv_dmp dmp;
5369         void *data;
5370         int ret;
5371
5372         /* Dump initialization is a prerequisite */
5373         if (!vcpu->kvm->arch.pv.dumping)
5374                 return -EINVAL;
5375
5376         if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5377                 return -EFAULT;
5378
5379         /* We only handle this subcmd right now */
5380         if (dmp.subcmd != KVM_PV_DUMP_CPU)
5381                 return -EINVAL;
5382
5383         /* CPU dump length is the same as create cpu storage donation. */
5384         if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5385                 return -EINVAL;
5386
5387         data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5388         if (!data)
5389                 return -ENOMEM;
5390
5391         ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5392
5393         VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5394                    vcpu->vcpu_id, cmd->rc, cmd->rrc);
5395
5396         if (ret)
5397                 ret = -EINVAL;
5398
5399         /* On success copy over the dump data */
5400         if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5401                 ret = -EFAULT;
5402
5403         kvfree(data);
5404         return ret;
5405 }
5406
5407 long kvm_arch_vcpu_ioctl(struct file *filp,
5408                          unsigned int ioctl, unsigned long arg)
5409 {
5410         struct kvm_vcpu *vcpu = filp->private_data;
5411         void __user *argp = (void __user *)arg;
5412         int idx;
5413         long r;
5414         u16 rc, rrc;
5415
5416         vcpu_load(vcpu);
5417
5418         switch (ioctl) {
5419         case KVM_S390_STORE_STATUS:
5420                 idx = srcu_read_lock(&vcpu->kvm->srcu);
5421                 r = kvm_s390_store_status_unloaded(vcpu, arg);
5422                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5423                 break;
5424         case KVM_S390_SET_INITIAL_PSW: {
5425                 psw_t psw;
5426
5427                 r = -EFAULT;
5428                 if (copy_from_user(&psw, argp, sizeof(psw)))
5429                         break;
5430                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5431                 break;
5432         }
5433         case KVM_S390_CLEAR_RESET:
5434                 r = 0;
5435                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5436                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5437                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5438                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5439                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5440                                    rc, rrc);
5441                 }
5442                 break;
5443         case KVM_S390_INITIAL_RESET:
5444                 r = 0;
5445                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5446                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5447                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5448                                           UVC_CMD_CPU_RESET_INITIAL,
5449                                           &rc, &rrc);
5450                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5451                                    rc, rrc);
5452                 }
5453                 break;
5454         case KVM_S390_NORMAL_RESET:
5455                 r = 0;
5456                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5457                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5458                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5459                                           UVC_CMD_CPU_RESET, &rc, &rrc);
5460                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5461                                    rc, rrc);
5462                 }
5463                 break;
5464         case KVM_SET_ONE_REG:
5465         case KVM_GET_ONE_REG: {
5466                 struct kvm_one_reg reg;
5467                 r = -EINVAL;
5468                 if (kvm_s390_pv_cpu_is_protected(vcpu))
5469                         break;
5470                 r = -EFAULT;
5471                 if (copy_from_user(&reg, argp, sizeof(reg)))
5472                         break;
5473                 if (ioctl == KVM_SET_ONE_REG)
5474                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5475                 else
5476                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5477                 break;
5478         }
5479 #ifdef CONFIG_KVM_S390_UCONTROL
5480         case KVM_S390_UCAS_MAP: {
5481                 struct kvm_s390_ucas_mapping ucasmap;
5482
5483                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5484                         r = -EFAULT;
5485                         break;
5486                 }
5487
5488                 if (!kvm_is_ucontrol(vcpu->kvm)) {
5489                         r = -EINVAL;
5490                         break;
5491                 }
5492
5493                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5494                                      ucasmap.vcpu_addr, ucasmap.length);
5495                 break;
5496         }
5497         case KVM_S390_UCAS_UNMAP: {
5498                 struct kvm_s390_ucas_mapping ucasmap;
5499
5500                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5501                         r = -EFAULT;
5502                         break;
5503                 }
5504
5505                 if (!kvm_is_ucontrol(vcpu->kvm)) {
5506                         r = -EINVAL;
5507                         break;
5508                 }
5509
5510                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5511                         ucasmap.length);
5512                 break;
5513         }
5514 #endif
5515         case KVM_S390_VCPU_FAULT: {
5516                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
5517                 break;
5518         }
5519         case KVM_ENABLE_CAP:
5520         {
5521                 struct kvm_enable_cap cap;
5522                 r = -EFAULT;
5523                 if (copy_from_user(&cap, argp, sizeof(cap)))
5524                         break;
5525                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5526                 break;
5527         }
5528         case KVM_S390_MEM_OP: {
5529                 struct kvm_s390_mem_op mem_op;
5530
5531                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5532                         r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5533                 else
5534                         r = -EFAULT;
5535                 break;
5536         }
5537         case KVM_S390_SET_IRQ_STATE: {
5538                 struct kvm_s390_irq_state irq_state;
5539
5540                 r = -EFAULT;
5541                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5542                         break;
5543                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5544                     irq_state.len == 0 ||
5545                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5546                         r = -EINVAL;
5547                         break;
5548                 }
5549                 /* do not use irq_state.flags, it will break old QEMUs */
5550                 r = kvm_s390_set_irq_state(vcpu,
5551                                            (void __user *) irq_state.buf,
5552                                            irq_state.len);
5553                 break;
5554         }
5555         case KVM_S390_GET_IRQ_STATE: {
5556                 struct kvm_s390_irq_state irq_state;
5557
5558                 r = -EFAULT;
5559                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5560                         break;
5561                 if (irq_state.len == 0) {
5562                         r = -EINVAL;
5563                         break;
5564                 }
5565                 /* do not use irq_state.flags, it will break old QEMUs */
5566                 r = kvm_s390_get_irq_state(vcpu,
5567                                            (__u8 __user *)  irq_state.buf,
5568                                            irq_state.len);
5569                 break;
5570         }
5571         case KVM_S390_PV_CPU_COMMAND: {
5572                 struct kvm_pv_cmd cmd;
5573
5574                 r = -EINVAL;
5575                 if (!is_prot_virt_host())
5576                         break;
5577
5578                 r = -EFAULT;
5579                 if (copy_from_user(&cmd, argp, sizeof(cmd)))
5580                         break;
5581
5582                 r = -EINVAL;
5583                 if (cmd.flags)
5584                         break;
5585
5586                 /* We only handle this cmd right now */
5587                 if (cmd.cmd != KVM_PV_DUMP)
5588                         break;
5589
5590                 r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
5591
5592                 /* Always copy over UV rc / rrc data */
5593                 if (copy_to_user((__u8 __user *)argp, &cmd.rc,
5594                                  sizeof(cmd.rc) + sizeof(cmd.rrc)))
5595                         r = -EFAULT;
5596                 break;
5597         }
5598         default:
5599                 r = -ENOTTY;
5600         }
5601
5602         vcpu_put(vcpu);
5603         return r;
5604 }
5605
5606 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5607 {
5608 #ifdef CONFIG_KVM_S390_UCONTROL
5609         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5610                  && (kvm_is_ucontrol(vcpu->kvm))) {
5611                 vmf->page = virt_to_page(vcpu->arch.sie_block);
5612                 get_page(vmf->page);
5613                 return 0;
5614         }
5615 #endif
5616         return VM_FAULT_SIGBUS;
5617 }
5618
5619 bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
5620 {
5621         return true;
5622 }
5623
5624 /* Section: memory related */
5625 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5626                                    const struct kvm_memory_slot *old,
5627                                    struct kvm_memory_slot *new,
5628                                    enum kvm_mr_change change)
5629 {
5630         gpa_t size;
5631
5632         /* When we are protected, we should not change the memory slots */
5633         if (kvm_s390_pv_get_handle(kvm))
5634                 return -EINVAL;
5635
5636         if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5637                 return 0;
5638
5639         /* A few sanity checks. We can have memory slots which have to be
5640            located/ended at a segment boundary (1MB). The memory in userland is
5641            ok to be fragmented into various different vmas. It is okay to mmap()
5642            and munmap() stuff in this slot after doing this call at any time */
5643
5644         if (new->userspace_addr & 0xffffful)
5645                 return -EINVAL;
5646
5647         size = new->npages * PAGE_SIZE;
5648         if (size & 0xffffful)
5649                 return -EINVAL;
5650
5651         if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5652                 return -EINVAL;
5653
5654         return 0;
5655 }
5656
5657 void kvm_arch_commit_memory_region(struct kvm *kvm,
5658                                 struct kvm_memory_slot *old,
5659                                 const struct kvm_memory_slot *new,
5660                                 enum kvm_mr_change change)
5661 {
5662         int rc = 0;
5663
5664         switch (change) {
5665         case KVM_MR_DELETE:
5666                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5667                                         old->npages * PAGE_SIZE);
5668                 break;
5669         case KVM_MR_MOVE:
5670                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5671                                         old->npages * PAGE_SIZE);
5672                 if (rc)
5673                         break;
5674                 fallthrough;
5675         case KVM_MR_CREATE:
5676                 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5677                                       new->base_gfn * PAGE_SIZE,
5678                                       new->npages * PAGE_SIZE);
5679                 break;
5680         case KVM_MR_FLAGS_ONLY:
5681                 break;
5682         default:
5683                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5684         }
5685         if (rc)
5686                 pr_warn("failed to commit memory region\n");
5687         return;
5688 }
5689
5690 static inline unsigned long nonhyp_mask(int i)
5691 {
5692         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5693
5694         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5695 }
5696
5697 static int __init kvm_s390_init(void)
5698 {
5699         int i;
5700
5701         if (!sclp.has_sief2) {
5702                 pr_info("SIE is not available\n");
5703                 return -ENODEV;
5704         }
5705
5706         if (nested && hpage) {
5707                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5708                 return -EINVAL;
5709         }
5710
5711         for (i = 0; i < 16; i++)
5712                 kvm_s390_fac_base[i] |=
5713                         stfle_fac_list[i] & nonhyp_mask(i);
5714
5715         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5716 }
5717
5718 static void __exit kvm_s390_exit(void)
5719 {
5720         kvm_exit();
5721 }
5722
5723 module_init(kvm_s390_init);
5724 module_exit(kvm_s390_exit);
5725
5726 /*
5727  * Enable autoloading of the kvm module.
5728  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5729  * since x86 takes a different approach.
5730  */
5731 #include <linux/miscdevice.h>
5732 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5733 MODULE_ALIAS("devname:kvm");
This page took 0.383845 seconds and 4 git commands to generate.