]> Git Repo - linux.git/blob - arch/s390/kvm/kvm-s390.c
Merge tag 'kvm-s390-master-4.15-3' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2017
6  *
7  *    Author(s): Carsten Otte <[email protected]>
8  *               Christian Borntraeger <[email protected]>
9  *               Heiko Carstens <[email protected]>
10  *               Christian Ehrhardt <[email protected]>
11  *               Jason J. Herne <[email protected]>
12  */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62         { "userspace_handled", VCPU_STAT(exit_userspace) },
63         { "exit_null", VCPU_STAT(exit_null) },
64         { "exit_validity", VCPU_STAT(exit_validity) },
65         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66         { "exit_external_request", VCPU_STAT(exit_external_request) },
67         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68         { "exit_instruction", VCPU_STAT(exit_instruction) },
69         { "exit_pei", VCPU_STAT(exit_pei) },
70         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92         { "instruction_spx", VCPU_STAT(instruction_spx) },
93         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94         { "instruction_stap", VCPU_STAT(instruction_stap) },
95         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99         { "instruction_essa", VCPU_STAT(instruction_essa) },
100         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104         { "instruction_sie", VCPU_STAT(instruction_sie) },
105         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121         { "diagnose_10", VCPU_STAT(diagnose_10) },
122         { "diagnose_44", VCPU_STAT(diagnose_44) },
123         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124         { "diagnose_258", VCPU_STAT(diagnose_258) },
125         { "diagnose_308", VCPU_STAT(diagnose_308) },
126         { "diagnose_500", VCPU_STAT(diagnose_500) },
127         { NULL }
128 };
129
130 struct kvm_s390_tod_clock_ext {
131         __u8 epoch_idx;
132         __u64 tod;
133         __u8 reserved[7];
134 } __packed;
135
136 /* allow nested virtualization in KVM (if enabled by user space) */
137 static int nested;
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
140
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
143
144 unsigned long kvm_s390_fac_list_mask_size(void)
145 {
146         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147         return ARRAY_SIZE(kvm_s390_fac_list_mask);
148 }
149
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
154
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
158
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
161 {
162         /* every s390 is virtualization enabled ;-) */
163         return 0;
164 }
165
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167                               unsigned long end);
168
169 /*
170  * This callback is executed during stop_machine(). All CPUs are therefore
171  * temporarily stopped. In order not to change guest behavior, we have to
172  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173  * so a CPU won't be stopped while calculating with the epoch.
174  */
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176                           void *v)
177 {
178         struct kvm *kvm;
179         struct kvm_vcpu *vcpu;
180         int i;
181         unsigned long long *delta = v;
182
183         list_for_each_entry(kvm, &vm_list, vm_list) {
184                 kvm->arch.epoch -= *delta;
185                 kvm_for_each_vcpu(i, vcpu, kvm) {
186                         vcpu->arch.sie_block->epoch -= *delta;
187                         if (vcpu->arch.cputm_enabled)
188                                 vcpu->arch.cputm_start += *delta;
189                         if (vcpu->arch.vsie_block)
190                                 vcpu->arch.vsie_block->epoch -= *delta;
191                 }
192         }
193         return NOTIFY_OK;
194 }
195
196 static struct notifier_block kvm_clock_notifier = {
197         .notifier_call = kvm_clock_sync,
198 };
199
200 int kvm_arch_hardware_setup(void)
201 {
202         gmap_notifier.notifier_call = kvm_gmap_notifier;
203         gmap_register_pte_notifier(&gmap_notifier);
204         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205         gmap_register_pte_notifier(&vsie_gmap_notifier);
206         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207                                        &kvm_clock_notifier);
208         return 0;
209 }
210
211 void kvm_arch_hardware_unsetup(void)
212 {
213         gmap_unregister_pte_notifier(&gmap_notifier);
214         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216                                          &kvm_clock_notifier);
217 }
218
219 static void allow_cpu_feat(unsigned long nr)
220 {
221         set_bit_inv(nr, kvm_s390_available_cpu_feat);
222 }
223
224 static inline int plo_test_bit(unsigned char nr)
225 {
226         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227         int cc;
228
229         asm volatile(
230                 /* Parameter registers are ignored for "test bit" */
231                 "       plo     0,0,0,0(0)\n"
232                 "       ipm     %0\n"
233                 "       srl     %0,28\n"
234                 : "=d" (cc)
235                 : "d" (r0)
236                 : "cc");
237         return cc == 0;
238 }
239
240 static void kvm_s390_cpu_feat_init(void)
241 {
242         int i;
243
244         for (i = 0; i < 256; ++i) {
245                 if (plo_test_bit(i))
246                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
247         }
248
249         if (test_facility(28)) /* TOD-clock steering */
250                 ptff(kvm_s390_available_subfunc.ptff,
251                      sizeof(kvm_s390_available_subfunc.ptff),
252                      PTFF_QAF);
253
254         if (test_facility(17)) { /* MSA */
255                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256                               kvm_s390_available_subfunc.kmac);
257                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258                               kvm_s390_available_subfunc.kmc);
259                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
260                               kvm_s390_available_subfunc.km);
261                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262                               kvm_s390_available_subfunc.kimd);
263                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.klmd);
265         }
266         if (test_facility(76)) /* MSA3 */
267                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.pckmo);
269         if (test_facility(77)) { /* MSA4 */
270                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.kmctr);
272                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273                               kvm_s390_available_subfunc.kmf);
274                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275                               kvm_s390_available_subfunc.kmo);
276                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277                               kvm_s390_available_subfunc.pcc);
278         }
279         if (test_facility(57)) /* MSA5 */
280                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281                               kvm_s390_available_subfunc.ppno);
282
283         if (test_facility(146)) /* MSA8 */
284                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285                               kvm_s390_available_subfunc.kma);
286
287         if (MACHINE_HAS_ESOP)
288                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
289         /*
290          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
292          */
293         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294             !test_facility(3) || !nested)
295                 return;
296         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297         if (sclp.has_64bscao)
298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
299         if (sclp.has_siif)
300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
301         if (sclp.has_gpere)
302                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
303         if (sclp.has_gsls)
304                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
305         if (sclp.has_ib)
306                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
307         if (sclp.has_cei)
308                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
309         if (sclp.has_ibs)
310                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
311         if (sclp.has_kss)
312                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
313         /*
314          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315          * all skey handling functions read/set the skey from the PGSTE
316          * instead of the real storage key.
317          *
318          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319          * pages being detected as preserved although they are resident.
320          *
321          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
323          *
324          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
327          *
328          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329          * cannot easily shadow the SCA because of the ipte lock.
330          */
331 }
332
333 int kvm_arch_init(void *opaque)
334 {
335         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336         if (!kvm_s390_dbf)
337                 return -ENOMEM;
338
339         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340                 debug_unregister(kvm_s390_dbf);
341                 return -ENOMEM;
342         }
343
344         kvm_s390_cpu_feat_init();
345
346         /* Register floating interrupt controller interface. */
347         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
348 }
349
350 void kvm_arch_exit(void)
351 {
352         debug_unregister(kvm_s390_dbf);
353 }
354
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357                         unsigned int ioctl, unsigned long arg)
358 {
359         if (ioctl == KVM_S390_ENABLE_SIE)
360                 return s390_enable_sie();
361         return -EINVAL;
362 }
363
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
365 {
366         int r;
367
368         switch (ext) {
369         case KVM_CAP_S390_PSW:
370         case KVM_CAP_S390_GMAP:
371         case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373         case KVM_CAP_S390_UCONTROL:
374 #endif
375         case KVM_CAP_ASYNC_PF:
376         case KVM_CAP_SYNC_REGS:
377         case KVM_CAP_ONE_REG:
378         case KVM_CAP_ENABLE_CAP:
379         case KVM_CAP_S390_CSS_SUPPORT:
380         case KVM_CAP_IOEVENTFD:
381         case KVM_CAP_DEVICE_CTRL:
382         case KVM_CAP_ENABLE_CAP_VM:
383         case KVM_CAP_S390_IRQCHIP:
384         case KVM_CAP_VM_ATTRIBUTES:
385         case KVM_CAP_MP_STATE:
386         case KVM_CAP_IMMEDIATE_EXIT:
387         case KVM_CAP_S390_INJECT_IRQ:
388         case KVM_CAP_S390_USER_SIGP:
389         case KVM_CAP_S390_USER_STSI:
390         case KVM_CAP_S390_SKEYS:
391         case KVM_CAP_S390_IRQ_STATE:
392         case KVM_CAP_S390_USER_INSTR0:
393         case KVM_CAP_S390_CMMA_MIGRATION:
394         case KVM_CAP_S390_AIS:
395         case KVM_CAP_S390_AIS_MIGRATION:
396                 r = 1;
397                 break;
398         case KVM_CAP_S390_MEM_OP:
399                 r = MEM_OP_MAX_SIZE;
400                 break;
401         case KVM_CAP_NR_VCPUS:
402         case KVM_CAP_MAX_VCPUS:
403                 r = KVM_S390_BSCA_CPU_SLOTS;
404                 if (!kvm_s390_use_sca_entries())
405                         r = KVM_MAX_VCPUS;
406                 else if (sclp.has_esca && sclp.has_64bscao)
407                         r = KVM_S390_ESCA_CPU_SLOTS;
408                 break;
409         case KVM_CAP_NR_MEMSLOTS:
410                 r = KVM_USER_MEM_SLOTS;
411                 break;
412         case KVM_CAP_S390_COW:
413                 r = MACHINE_HAS_ESOP;
414                 break;
415         case KVM_CAP_S390_VECTOR_REGISTERS:
416                 r = MACHINE_HAS_VX;
417                 break;
418         case KVM_CAP_S390_RI:
419                 r = test_facility(64);
420                 break;
421         case KVM_CAP_S390_GS:
422                 r = test_facility(133);
423                 break;
424         case KVM_CAP_S390_BPB:
425                 r = test_facility(82);
426                 break;
427         default:
428                 r = 0;
429         }
430         return r;
431 }
432
433 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
434                                         struct kvm_memory_slot *memslot)
435 {
436         gfn_t cur_gfn, last_gfn;
437         unsigned long address;
438         struct gmap *gmap = kvm->arch.gmap;
439
440         /* Loop over all guest pages */
441         last_gfn = memslot->base_gfn + memslot->npages;
442         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
443                 address = gfn_to_hva_memslot(memslot, cur_gfn);
444
445                 if (test_and_clear_guest_dirty(gmap->mm, address))
446                         mark_page_dirty(kvm, cur_gfn);
447                 if (fatal_signal_pending(current))
448                         return;
449                 cond_resched();
450         }
451 }
452
453 /* Section: vm related */
454 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
455
456 /*
457  * Get (and clear) the dirty memory log for a memory slot.
458  */
459 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
460                                struct kvm_dirty_log *log)
461 {
462         int r;
463         unsigned long n;
464         struct kvm_memslots *slots;
465         struct kvm_memory_slot *memslot;
466         int is_dirty = 0;
467
468         if (kvm_is_ucontrol(kvm))
469                 return -EINVAL;
470
471         mutex_lock(&kvm->slots_lock);
472
473         r = -EINVAL;
474         if (log->slot >= KVM_USER_MEM_SLOTS)
475                 goto out;
476
477         slots = kvm_memslots(kvm);
478         memslot = id_to_memslot(slots, log->slot);
479         r = -ENOENT;
480         if (!memslot->dirty_bitmap)
481                 goto out;
482
483         kvm_s390_sync_dirty_log(kvm, memslot);
484         r = kvm_get_dirty_log(kvm, log, &is_dirty);
485         if (r)
486                 goto out;
487
488         /* Clear the dirty log */
489         if (is_dirty) {
490                 n = kvm_dirty_bitmap_bytes(memslot);
491                 memset(memslot->dirty_bitmap, 0, n);
492         }
493         r = 0;
494 out:
495         mutex_unlock(&kvm->slots_lock);
496         return r;
497 }
498
499 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
500 {
501         unsigned int i;
502         struct kvm_vcpu *vcpu;
503
504         kvm_for_each_vcpu(i, vcpu, kvm) {
505                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
506         }
507 }
508
509 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
510 {
511         int r;
512
513         if (cap->flags)
514                 return -EINVAL;
515
516         switch (cap->cap) {
517         case KVM_CAP_S390_IRQCHIP:
518                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
519                 kvm->arch.use_irqchip = 1;
520                 r = 0;
521                 break;
522         case KVM_CAP_S390_USER_SIGP:
523                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
524                 kvm->arch.user_sigp = 1;
525                 r = 0;
526                 break;
527         case KVM_CAP_S390_VECTOR_REGISTERS:
528                 mutex_lock(&kvm->lock);
529                 if (kvm->created_vcpus) {
530                         r = -EBUSY;
531                 } else if (MACHINE_HAS_VX) {
532                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
533                         set_kvm_facility(kvm->arch.model.fac_list, 129);
534                         if (test_facility(134)) {
535                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
536                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
537                         }
538                         if (test_facility(135)) {
539                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
540                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
541                         }
542                         r = 0;
543                 } else
544                         r = -EINVAL;
545                 mutex_unlock(&kvm->lock);
546                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
547                          r ? "(not available)" : "(success)");
548                 break;
549         case KVM_CAP_S390_RI:
550                 r = -EINVAL;
551                 mutex_lock(&kvm->lock);
552                 if (kvm->created_vcpus) {
553                         r = -EBUSY;
554                 } else if (test_facility(64)) {
555                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
556                         set_kvm_facility(kvm->arch.model.fac_list, 64);
557                         r = 0;
558                 }
559                 mutex_unlock(&kvm->lock);
560                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
561                          r ? "(not available)" : "(success)");
562                 break;
563         case KVM_CAP_S390_AIS:
564                 mutex_lock(&kvm->lock);
565                 if (kvm->created_vcpus) {
566                         r = -EBUSY;
567                 } else {
568                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
569                         set_kvm_facility(kvm->arch.model.fac_list, 72);
570                         r = 0;
571                 }
572                 mutex_unlock(&kvm->lock);
573                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
574                          r ? "(not available)" : "(success)");
575                 break;
576         case KVM_CAP_S390_GS:
577                 r = -EINVAL;
578                 mutex_lock(&kvm->lock);
579                 if (atomic_read(&kvm->online_vcpus)) {
580                         r = -EBUSY;
581                 } else if (test_facility(133)) {
582                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
583                         set_kvm_facility(kvm->arch.model.fac_list, 133);
584                         r = 0;
585                 }
586                 mutex_unlock(&kvm->lock);
587                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
588                          r ? "(not available)" : "(success)");
589                 break;
590         case KVM_CAP_S390_USER_STSI:
591                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
592                 kvm->arch.user_stsi = 1;
593                 r = 0;
594                 break;
595         case KVM_CAP_S390_USER_INSTR0:
596                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
597                 kvm->arch.user_instr0 = 1;
598                 icpt_operexc_on_all_vcpus(kvm);
599                 r = 0;
600                 break;
601         default:
602                 r = -EINVAL;
603                 break;
604         }
605         return r;
606 }
607
608 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
609 {
610         int ret;
611
612         switch (attr->attr) {
613         case KVM_S390_VM_MEM_LIMIT_SIZE:
614                 ret = 0;
615                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
616                          kvm->arch.mem_limit);
617                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
618                         ret = -EFAULT;
619                 break;
620         default:
621                 ret = -ENXIO;
622                 break;
623         }
624         return ret;
625 }
626
627 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
628 {
629         int ret;
630         unsigned int idx;
631         switch (attr->attr) {
632         case KVM_S390_VM_MEM_ENABLE_CMMA:
633                 ret = -ENXIO;
634                 if (!sclp.has_cmma)
635                         break;
636
637                 ret = -EBUSY;
638                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
639                 mutex_lock(&kvm->lock);
640                 if (!kvm->created_vcpus) {
641                         kvm->arch.use_cmma = 1;
642                         ret = 0;
643                 }
644                 mutex_unlock(&kvm->lock);
645                 break;
646         case KVM_S390_VM_MEM_CLR_CMMA:
647                 ret = -ENXIO;
648                 if (!sclp.has_cmma)
649                         break;
650                 ret = -EINVAL;
651                 if (!kvm->arch.use_cmma)
652                         break;
653
654                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
655                 mutex_lock(&kvm->lock);
656                 idx = srcu_read_lock(&kvm->srcu);
657                 s390_reset_cmma(kvm->arch.gmap->mm);
658                 srcu_read_unlock(&kvm->srcu, idx);
659                 mutex_unlock(&kvm->lock);
660                 ret = 0;
661                 break;
662         case KVM_S390_VM_MEM_LIMIT_SIZE: {
663                 unsigned long new_limit;
664
665                 if (kvm_is_ucontrol(kvm))
666                         return -EINVAL;
667
668                 if (get_user(new_limit, (u64 __user *)attr->addr))
669                         return -EFAULT;
670
671                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
672                     new_limit > kvm->arch.mem_limit)
673                         return -E2BIG;
674
675                 if (!new_limit)
676                         return -EINVAL;
677
678                 /* gmap_create takes last usable address */
679                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
680                         new_limit -= 1;
681
682                 ret = -EBUSY;
683                 mutex_lock(&kvm->lock);
684                 if (!kvm->created_vcpus) {
685                         /* gmap_create will round the limit up */
686                         struct gmap *new = gmap_create(current->mm, new_limit);
687
688                         if (!new) {
689                                 ret = -ENOMEM;
690                         } else {
691                                 gmap_remove(kvm->arch.gmap);
692                                 new->private = kvm;
693                                 kvm->arch.gmap = new;
694                                 ret = 0;
695                         }
696                 }
697                 mutex_unlock(&kvm->lock);
698                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
699                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
700                          (void *) kvm->arch.gmap->asce);
701                 break;
702         }
703         default:
704                 ret = -ENXIO;
705                 break;
706         }
707         return ret;
708 }
709
710 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
711
712 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
713 {
714         struct kvm_vcpu *vcpu;
715         int i;
716
717         if (!test_kvm_facility(kvm, 76))
718                 return -EINVAL;
719
720         mutex_lock(&kvm->lock);
721         switch (attr->attr) {
722         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
723                 get_random_bytes(
724                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
725                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
726                 kvm->arch.crypto.aes_kw = 1;
727                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
728                 break;
729         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
730                 get_random_bytes(
731                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
732                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
733                 kvm->arch.crypto.dea_kw = 1;
734                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
735                 break;
736         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
737                 kvm->arch.crypto.aes_kw = 0;
738                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
739                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
740                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
741                 break;
742         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
743                 kvm->arch.crypto.dea_kw = 0;
744                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
745                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
746                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
747                 break;
748         default:
749                 mutex_unlock(&kvm->lock);
750                 return -ENXIO;
751         }
752
753         kvm_for_each_vcpu(i, vcpu, kvm) {
754                 kvm_s390_vcpu_crypto_setup(vcpu);
755                 exit_sie(vcpu);
756         }
757         mutex_unlock(&kvm->lock);
758         return 0;
759 }
760
761 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
762 {
763         int cx;
764         struct kvm_vcpu *vcpu;
765
766         kvm_for_each_vcpu(cx, vcpu, kvm)
767                 kvm_s390_sync_request(req, vcpu);
768 }
769
770 /*
771  * Must be called with kvm->srcu held to avoid races on memslots, and with
772  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
773  */
774 static int kvm_s390_vm_start_migration(struct kvm *kvm)
775 {
776         struct kvm_s390_migration_state *mgs;
777         struct kvm_memory_slot *ms;
778         /* should be the only one */
779         struct kvm_memslots *slots;
780         unsigned long ram_pages;
781         int slotnr;
782
783         /* migration mode already enabled */
784         if (kvm->arch.migration_state)
785                 return 0;
786
787         slots = kvm_memslots(kvm);
788         if (!slots || !slots->used_slots)
789                 return -EINVAL;
790
791         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
792         if (!mgs)
793                 return -ENOMEM;
794         kvm->arch.migration_state = mgs;
795
796         if (kvm->arch.use_cmma) {
797                 /*
798                  * Get the first slot. They are reverse sorted by base_gfn, so
799                  * the first slot is also the one at the end of the address
800                  * space. We have verified above that at least one slot is
801                  * present.
802                  */
803                 ms = slots->memslots;
804                 /* round up so we only use full longs */
805                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
806                 /* allocate enough bytes to store all the bits */
807                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
808                 if (!mgs->pgste_bitmap) {
809                         kfree(mgs);
810                         kvm->arch.migration_state = NULL;
811                         return -ENOMEM;
812                 }
813
814                 mgs->bitmap_size = ram_pages;
815                 atomic64_set(&mgs->dirty_pages, ram_pages);
816                 /* mark all the pages in active slots as dirty */
817                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
818                         ms = slots->memslots + slotnr;
819                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
820                 }
821
822                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
823         }
824         return 0;
825 }
826
827 /*
828  * Must be called with kvm->slots_lock to avoid races with ourselves and
829  * kvm_s390_vm_start_migration.
830  */
831 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
832 {
833         struct kvm_s390_migration_state *mgs;
834
835         /* migration mode already disabled */
836         if (!kvm->arch.migration_state)
837                 return 0;
838         mgs = kvm->arch.migration_state;
839         kvm->arch.migration_state = NULL;
840
841         if (kvm->arch.use_cmma) {
842                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
843                 /* We have to wait for the essa emulation to finish */
844                 synchronize_srcu(&kvm->srcu);
845                 vfree(mgs->pgste_bitmap);
846         }
847         kfree(mgs);
848         return 0;
849 }
850
851 static int kvm_s390_vm_set_migration(struct kvm *kvm,
852                                      struct kvm_device_attr *attr)
853 {
854         int res = -ENXIO;
855
856         mutex_lock(&kvm->slots_lock);
857         switch (attr->attr) {
858         case KVM_S390_VM_MIGRATION_START:
859                 res = kvm_s390_vm_start_migration(kvm);
860                 break;
861         case KVM_S390_VM_MIGRATION_STOP:
862                 res = kvm_s390_vm_stop_migration(kvm);
863                 break;
864         default:
865                 break;
866         }
867         mutex_unlock(&kvm->slots_lock);
868
869         return res;
870 }
871
872 static int kvm_s390_vm_get_migration(struct kvm *kvm,
873                                      struct kvm_device_attr *attr)
874 {
875         u64 mig = (kvm->arch.migration_state != NULL);
876
877         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
878                 return -ENXIO;
879
880         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
881                 return -EFAULT;
882         return 0;
883 }
884
885 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
886 {
887         struct kvm_s390_vm_tod_clock gtod;
888
889         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
890                 return -EFAULT;
891
892         if (test_kvm_facility(kvm, 139))
893                 kvm_s390_set_tod_clock_ext(kvm, &gtod);
894         else if (gtod.epoch_idx == 0)
895                 kvm_s390_set_tod_clock(kvm, gtod.tod);
896         else
897                 return -EINVAL;
898
899         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
900                 gtod.epoch_idx, gtod.tod);
901
902         return 0;
903 }
904
905 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
906 {
907         u8 gtod_high;
908
909         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
910                                            sizeof(gtod_high)))
911                 return -EFAULT;
912
913         if (gtod_high != 0)
914                 return -EINVAL;
915         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
916
917         return 0;
918 }
919
920 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
921 {
922         u64 gtod;
923
924         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
925                 return -EFAULT;
926
927         kvm_s390_set_tod_clock(kvm, gtod);
928         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
929         return 0;
930 }
931
932 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
933 {
934         int ret;
935
936         if (attr->flags)
937                 return -EINVAL;
938
939         switch (attr->attr) {
940         case KVM_S390_VM_TOD_EXT:
941                 ret = kvm_s390_set_tod_ext(kvm, attr);
942                 break;
943         case KVM_S390_VM_TOD_HIGH:
944                 ret = kvm_s390_set_tod_high(kvm, attr);
945                 break;
946         case KVM_S390_VM_TOD_LOW:
947                 ret = kvm_s390_set_tod_low(kvm, attr);
948                 break;
949         default:
950                 ret = -ENXIO;
951                 break;
952         }
953         return ret;
954 }
955
956 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
957                                         struct kvm_s390_vm_tod_clock *gtod)
958 {
959         struct kvm_s390_tod_clock_ext htod;
960
961         preempt_disable();
962
963         get_tod_clock_ext((char *)&htod);
964
965         gtod->tod = htod.tod + kvm->arch.epoch;
966         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
967
968         if (gtod->tod < htod.tod)
969                 gtod->epoch_idx += 1;
970
971         preempt_enable();
972 }
973
974 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
975 {
976         struct kvm_s390_vm_tod_clock gtod;
977
978         memset(&gtod, 0, sizeof(gtod));
979
980         if (test_kvm_facility(kvm, 139))
981                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
982         else
983                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
984
985         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
986                 return -EFAULT;
987
988         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
989                 gtod.epoch_idx, gtod.tod);
990         return 0;
991 }
992
993 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
994 {
995         u8 gtod_high = 0;
996
997         if (copy_to_user((void __user *)attr->addr, &gtod_high,
998                                          sizeof(gtod_high)))
999                 return -EFAULT;
1000         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1001
1002         return 0;
1003 }
1004
1005 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1006 {
1007         u64 gtod;
1008
1009         gtod = kvm_s390_get_tod_clock_fast(kvm);
1010         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1011                 return -EFAULT;
1012         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1013
1014         return 0;
1015 }
1016
1017 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1018 {
1019         int ret;
1020
1021         if (attr->flags)
1022                 return -EINVAL;
1023
1024         switch (attr->attr) {
1025         case KVM_S390_VM_TOD_EXT:
1026                 ret = kvm_s390_get_tod_ext(kvm, attr);
1027                 break;
1028         case KVM_S390_VM_TOD_HIGH:
1029                 ret = kvm_s390_get_tod_high(kvm, attr);
1030                 break;
1031         case KVM_S390_VM_TOD_LOW:
1032                 ret = kvm_s390_get_tod_low(kvm, attr);
1033                 break;
1034         default:
1035                 ret = -ENXIO;
1036                 break;
1037         }
1038         return ret;
1039 }
1040
1041 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1042 {
1043         struct kvm_s390_vm_cpu_processor *proc;
1044         u16 lowest_ibc, unblocked_ibc;
1045         int ret = 0;
1046
1047         mutex_lock(&kvm->lock);
1048         if (kvm->created_vcpus) {
1049                 ret = -EBUSY;
1050                 goto out;
1051         }
1052         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1053         if (!proc) {
1054                 ret = -ENOMEM;
1055                 goto out;
1056         }
1057         if (!copy_from_user(proc, (void __user *)attr->addr,
1058                             sizeof(*proc))) {
1059                 kvm->arch.model.cpuid = proc->cpuid;
1060                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1061                 unblocked_ibc = sclp.ibc & 0xfff;
1062                 if (lowest_ibc && proc->ibc) {
1063                         if (proc->ibc > unblocked_ibc)
1064                                 kvm->arch.model.ibc = unblocked_ibc;
1065                         else if (proc->ibc < lowest_ibc)
1066                                 kvm->arch.model.ibc = lowest_ibc;
1067                         else
1068                                 kvm->arch.model.ibc = proc->ibc;
1069                 }
1070                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1071                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1072                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1073                          kvm->arch.model.ibc,
1074                          kvm->arch.model.cpuid);
1075                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1076                          kvm->arch.model.fac_list[0],
1077                          kvm->arch.model.fac_list[1],
1078                          kvm->arch.model.fac_list[2]);
1079         } else
1080                 ret = -EFAULT;
1081         kfree(proc);
1082 out:
1083         mutex_unlock(&kvm->lock);
1084         return ret;
1085 }
1086
1087 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1088                                        struct kvm_device_attr *attr)
1089 {
1090         struct kvm_s390_vm_cpu_feat data;
1091         int ret = -EBUSY;
1092
1093         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1094                 return -EFAULT;
1095         if (!bitmap_subset((unsigned long *) data.feat,
1096                            kvm_s390_available_cpu_feat,
1097                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1098                 return -EINVAL;
1099
1100         mutex_lock(&kvm->lock);
1101         if (!atomic_read(&kvm->online_vcpus)) {
1102                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1103                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1104                 ret = 0;
1105         }
1106         mutex_unlock(&kvm->lock);
1107         return ret;
1108 }
1109
1110 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1111                                           struct kvm_device_attr *attr)
1112 {
1113         /*
1114          * Once supported by kernel + hw, we have to store the subfunctions
1115          * in kvm->arch and remember that user space configured them.
1116          */
1117         return -ENXIO;
1118 }
1119
1120 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1121 {
1122         int ret = -ENXIO;
1123
1124         switch (attr->attr) {
1125         case KVM_S390_VM_CPU_PROCESSOR:
1126                 ret = kvm_s390_set_processor(kvm, attr);
1127                 break;
1128         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1129                 ret = kvm_s390_set_processor_feat(kvm, attr);
1130                 break;
1131         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1132                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1133                 break;
1134         }
1135         return ret;
1136 }
1137
1138 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140         struct kvm_s390_vm_cpu_processor *proc;
1141         int ret = 0;
1142
1143         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1144         if (!proc) {
1145                 ret = -ENOMEM;
1146                 goto out;
1147         }
1148         proc->cpuid = kvm->arch.model.cpuid;
1149         proc->ibc = kvm->arch.model.ibc;
1150         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1151                S390_ARCH_FAC_LIST_SIZE_BYTE);
1152         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153                  kvm->arch.model.ibc,
1154                  kvm->arch.model.cpuid);
1155         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156                  kvm->arch.model.fac_list[0],
1157                  kvm->arch.model.fac_list[1],
1158                  kvm->arch.model.fac_list[2]);
1159         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1160                 ret = -EFAULT;
1161         kfree(proc);
1162 out:
1163         return ret;
1164 }
1165
1166 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168         struct kvm_s390_vm_cpu_machine *mach;
1169         int ret = 0;
1170
1171         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1172         if (!mach) {
1173                 ret = -ENOMEM;
1174                 goto out;
1175         }
1176         get_cpu_id((struct cpuid *) &mach->cpuid);
1177         mach->ibc = sclp.ibc;
1178         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1179                S390_ARCH_FAC_LIST_SIZE_BYTE);
1180         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1181                sizeof(S390_lowcore.stfle_fac_list));
1182         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1183                  kvm->arch.model.ibc,
1184                  kvm->arch.model.cpuid);
1185         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1186                  mach->fac_mask[0],
1187                  mach->fac_mask[1],
1188                  mach->fac_mask[2]);
1189         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1190                  mach->fac_list[0],
1191                  mach->fac_list[1],
1192                  mach->fac_list[2]);
1193         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1194                 ret = -EFAULT;
1195         kfree(mach);
1196 out:
1197         return ret;
1198 }
1199
1200 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1201                                        struct kvm_device_attr *attr)
1202 {
1203         struct kvm_s390_vm_cpu_feat data;
1204
1205         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1206                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1207         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1208                 return -EFAULT;
1209         return 0;
1210 }
1211
1212 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1213                                      struct kvm_device_attr *attr)
1214 {
1215         struct kvm_s390_vm_cpu_feat data;
1216
1217         bitmap_copy((unsigned long *) data.feat,
1218                     kvm_s390_available_cpu_feat,
1219                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1220         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1221                 return -EFAULT;
1222         return 0;
1223 }
1224
1225 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1226                                           struct kvm_device_attr *attr)
1227 {
1228         /*
1229          * Once we can actually configure subfunctions (kernel + hw support),
1230          * we have to check if they were already set by user space, if so copy
1231          * them from kvm->arch.
1232          */
1233         return -ENXIO;
1234 }
1235
1236 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1237                                         struct kvm_device_attr *attr)
1238 {
1239         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1240             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1241                 return -EFAULT;
1242         return 0;
1243 }
1244 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1245 {
1246         int ret = -ENXIO;
1247
1248         switch (attr->attr) {
1249         case KVM_S390_VM_CPU_PROCESSOR:
1250                 ret = kvm_s390_get_processor(kvm, attr);
1251                 break;
1252         case KVM_S390_VM_CPU_MACHINE:
1253                 ret = kvm_s390_get_machine(kvm, attr);
1254                 break;
1255         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1256                 ret = kvm_s390_get_processor_feat(kvm, attr);
1257                 break;
1258         case KVM_S390_VM_CPU_MACHINE_FEAT:
1259                 ret = kvm_s390_get_machine_feat(kvm, attr);
1260                 break;
1261         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1262                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1263                 break;
1264         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1265                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1266                 break;
1267         }
1268         return ret;
1269 }
1270
1271 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1272 {
1273         int ret;
1274
1275         switch (attr->group) {
1276         case KVM_S390_VM_MEM_CTRL:
1277                 ret = kvm_s390_set_mem_control(kvm, attr);
1278                 break;
1279         case KVM_S390_VM_TOD:
1280                 ret = kvm_s390_set_tod(kvm, attr);
1281                 break;
1282         case KVM_S390_VM_CPU_MODEL:
1283                 ret = kvm_s390_set_cpu_model(kvm, attr);
1284                 break;
1285         case KVM_S390_VM_CRYPTO:
1286                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1287                 break;
1288         case KVM_S390_VM_MIGRATION:
1289                 ret = kvm_s390_vm_set_migration(kvm, attr);
1290                 break;
1291         default:
1292                 ret = -ENXIO;
1293                 break;
1294         }
1295
1296         return ret;
1297 }
1298
1299 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1300 {
1301         int ret;
1302
1303         switch (attr->group) {
1304         case KVM_S390_VM_MEM_CTRL:
1305                 ret = kvm_s390_get_mem_control(kvm, attr);
1306                 break;
1307         case KVM_S390_VM_TOD:
1308                 ret = kvm_s390_get_tod(kvm, attr);
1309                 break;
1310         case KVM_S390_VM_CPU_MODEL:
1311                 ret = kvm_s390_get_cpu_model(kvm, attr);
1312                 break;
1313         case KVM_S390_VM_MIGRATION:
1314                 ret = kvm_s390_vm_get_migration(kvm, attr);
1315                 break;
1316         default:
1317                 ret = -ENXIO;
1318                 break;
1319         }
1320
1321         return ret;
1322 }
1323
1324 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1325 {
1326         int ret;
1327
1328         switch (attr->group) {
1329         case KVM_S390_VM_MEM_CTRL:
1330                 switch (attr->attr) {
1331                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1332                 case KVM_S390_VM_MEM_CLR_CMMA:
1333                         ret = sclp.has_cmma ? 0 : -ENXIO;
1334                         break;
1335                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1336                         ret = 0;
1337                         break;
1338                 default:
1339                         ret = -ENXIO;
1340                         break;
1341                 }
1342                 break;
1343         case KVM_S390_VM_TOD:
1344                 switch (attr->attr) {
1345                 case KVM_S390_VM_TOD_LOW:
1346                 case KVM_S390_VM_TOD_HIGH:
1347                         ret = 0;
1348                         break;
1349                 default:
1350                         ret = -ENXIO;
1351                         break;
1352                 }
1353                 break;
1354         case KVM_S390_VM_CPU_MODEL:
1355                 switch (attr->attr) {
1356                 case KVM_S390_VM_CPU_PROCESSOR:
1357                 case KVM_S390_VM_CPU_MACHINE:
1358                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1359                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1360                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1361                         ret = 0;
1362                         break;
1363                 /* configuring subfunctions is not supported yet */
1364                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1365                 default:
1366                         ret = -ENXIO;
1367                         break;
1368                 }
1369                 break;
1370         case KVM_S390_VM_CRYPTO:
1371                 switch (attr->attr) {
1372                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1373                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1374                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1375                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1376                         ret = 0;
1377                         break;
1378                 default:
1379                         ret = -ENXIO;
1380                         break;
1381                 }
1382                 break;
1383         case KVM_S390_VM_MIGRATION:
1384                 ret = 0;
1385                 break;
1386         default:
1387                 ret = -ENXIO;
1388                 break;
1389         }
1390
1391         return ret;
1392 }
1393
1394 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1395 {
1396         uint8_t *keys;
1397         uint64_t hva;
1398         int srcu_idx, i, r = 0;
1399
1400         if (args->flags != 0)
1401                 return -EINVAL;
1402
1403         /* Is this guest using storage keys? */
1404         if (!mm_use_skey(current->mm))
1405                 return KVM_S390_GET_SKEYS_NONE;
1406
1407         /* Enforce sane limit on memory allocation */
1408         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1409                 return -EINVAL;
1410
1411         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1412         if (!keys)
1413                 return -ENOMEM;
1414
1415         down_read(&current->mm->mmap_sem);
1416         srcu_idx = srcu_read_lock(&kvm->srcu);
1417         for (i = 0; i < args->count; i++) {
1418                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1419                 if (kvm_is_error_hva(hva)) {
1420                         r = -EFAULT;
1421                         break;
1422                 }
1423
1424                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1425                 if (r)
1426                         break;
1427         }
1428         srcu_read_unlock(&kvm->srcu, srcu_idx);
1429         up_read(&current->mm->mmap_sem);
1430
1431         if (!r) {
1432                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1433                                  sizeof(uint8_t) * args->count);
1434                 if (r)
1435                         r = -EFAULT;
1436         }
1437
1438         kvfree(keys);
1439         return r;
1440 }
1441
1442 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1443 {
1444         uint8_t *keys;
1445         uint64_t hva;
1446         int srcu_idx, i, r = 0;
1447
1448         if (args->flags != 0)
1449                 return -EINVAL;
1450
1451         /* Enforce sane limit on memory allocation */
1452         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1453                 return -EINVAL;
1454
1455         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1456         if (!keys)
1457                 return -ENOMEM;
1458
1459         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1460                            sizeof(uint8_t) * args->count);
1461         if (r) {
1462                 r = -EFAULT;
1463                 goto out;
1464         }
1465
1466         /* Enable storage key handling for the guest */
1467         r = s390_enable_skey();
1468         if (r)
1469                 goto out;
1470
1471         down_read(&current->mm->mmap_sem);
1472         srcu_idx = srcu_read_lock(&kvm->srcu);
1473         for (i = 0; i < args->count; i++) {
1474                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1475                 if (kvm_is_error_hva(hva)) {
1476                         r = -EFAULT;
1477                         break;
1478                 }
1479
1480                 /* Lowest order bit is reserved */
1481                 if (keys[i] & 0x01) {
1482                         r = -EINVAL;
1483                         break;
1484                 }
1485
1486                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1487                 if (r)
1488                         break;
1489         }
1490         srcu_read_unlock(&kvm->srcu, srcu_idx);
1491         up_read(&current->mm->mmap_sem);
1492 out:
1493         kvfree(keys);
1494         return r;
1495 }
1496
1497 /*
1498  * Base address and length must be sent at the start of each block, therefore
1499  * it's cheaper to send some clean data, as long as it's less than the size of
1500  * two longs.
1501  */
1502 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1503 /* for consistency */
1504 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1505
1506 /*
1507  * This function searches for the next page with dirty CMMA attributes, and
1508  * saves the attributes in the buffer up to either the end of the buffer or
1509  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1510  * no trailing clean bytes are saved.
1511  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1512  * output buffer will indicate 0 as length.
1513  */
1514 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1515                                   struct kvm_s390_cmma_log *args)
1516 {
1517         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1518         unsigned long bufsize, hva, pgstev, i, next, cur;
1519         int srcu_idx, peek, r = 0, rr;
1520         u8 *res;
1521
1522         cur = args->start_gfn;
1523         i = next = pgstev = 0;
1524
1525         if (unlikely(!kvm->arch.use_cmma))
1526                 return -ENXIO;
1527         /* Invalid/unsupported flags were specified */
1528         if (args->flags & ~KVM_S390_CMMA_PEEK)
1529                 return -EINVAL;
1530         /* Migration mode query, and we are not doing a migration */
1531         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1532         if (!peek && !s)
1533                 return -EINVAL;
1534         /* CMMA is disabled or was not used, or the buffer has length zero */
1535         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1536         if (!bufsize || !kvm->mm->context.use_cmma) {
1537                 memset(args, 0, sizeof(*args));
1538                 return 0;
1539         }
1540
1541         if (!peek) {
1542                 /* We are not peeking, and there are no dirty pages */
1543                 if (!atomic64_read(&s->dirty_pages)) {
1544                         memset(args, 0, sizeof(*args));
1545                         return 0;
1546                 }
1547                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1548                                     args->start_gfn);
1549                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1550                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1551                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1552                         memset(args, 0, sizeof(*args));
1553                         return 0;
1554                 }
1555                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1556         }
1557
1558         res = vmalloc(bufsize);
1559         if (!res)
1560                 return -ENOMEM;
1561
1562         args->start_gfn = cur;
1563
1564         down_read(&kvm->mm->mmap_sem);
1565         srcu_idx = srcu_read_lock(&kvm->srcu);
1566         while (i < bufsize) {
1567                 hva = gfn_to_hva(kvm, cur);
1568                 if (kvm_is_error_hva(hva)) {
1569                         r = -EFAULT;
1570                         break;
1571                 }
1572                 /* decrement only if we actually flipped the bit to 0 */
1573                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1574                         atomic64_dec(&s->dirty_pages);
1575                 r = get_pgste(kvm->mm, hva, &pgstev);
1576                 if (r < 0)
1577                         pgstev = 0;
1578                 /* save the value */
1579                 res[i++] = (pgstev >> 24) & 0x43;
1580                 /*
1581                  * if the next bit is too far away, stop.
1582                  * if we reached the previous "next", find the next one
1583                  */
1584                 if (!peek) {
1585                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1586                                 break;
1587                         if (cur == next)
1588                                 next = find_next_bit(s->pgste_bitmap,
1589                                                      s->bitmap_size, cur + 1);
1590                 /* reached the end of the bitmap or of the buffer, stop */
1591                         if ((next >= s->bitmap_size) ||
1592                             (next >= args->start_gfn + bufsize))
1593                                 break;
1594                 }
1595                 cur++;
1596         }
1597         srcu_read_unlock(&kvm->srcu, srcu_idx);
1598         up_read(&kvm->mm->mmap_sem);
1599         args->count = i;
1600         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1601
1602         rr = copy_to_user((void __user *)args->values, res, args->count);
1603         if (rr)
1604                 r = -EFAULT;
1605
1606         vfree(res);
1607         return r;
1608 }
1609
1610 /*
1611  * This function sets the CMMA attributes for the given pages. If the input
1612  * buffer has zero length, no action is taken, otherwise the attributes are
1613  * set and the mm->context.use_cmma flag is set.
1614  */
1615 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1616                                   const struct kvm_s390_cmma_log *args)
1617 {
1618         unsigned long hva, mask, pgstev, i;
1619         uint8_t *bits;
1620         int srcu_idx, r = 0;
1621
1622         mask = args->mask;
1623
1624         if (!kvm->arch.use_cmma)
1625                 return -ENXIO;
1626         /* invalid/unsupported flags */
1627         if (args->flags != 0)
1628                 return -EINVAL;
1629         /* Enforce sane limit on memory allocation */
1630         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1631                 return -EINVAL;
1632         /* Nothing to do */
1633         if (args->count == 0)
1634                 return 0;
1635
1636         bits = vmalloc(sizeof(*bits) * args->count);
1637         if (!bits)
1638                 return -ENOMEM;
1639
1640         r = copy_from_user(bits, (void __user *)args->values, args->count);
1641         if (r) {
1642                 r = -EFAULT;
1643                 goto out;
1644         }
1645
1646         down_read(&kvm->mm->mmap_sem);
1647         srcu_idx = srcu_read_lock(&kvm->srcu);
1648         for (i = 0; i < args->count; i++) {
1649                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1650                 if (kvm_is_error_hva(hva)) {
1651                         r = -EFAULT;
1652                         break;
1653                 }
1654
1655                 pgstev = bits[i];
1656                 pgstev = pgstev << 24;
1657                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1658                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1659         }
1660         srcu_read_unlock(&kvm->srcu, srcu_idx);
1661         up_read(&kvm->mm->mmap_sem);
1662
1663         if (!kvm->mm->context.use_cmma) {
1664                 down_write(&kvm->mm->mmap_sem);
1665                 kvm->mm->context.use_cmma = 1;
1666                 up_write(&kvm->mm->mmap_sem);
1667         }
1668 out:
1669         vfree(bits);
1670         return r;
1671 }
1672
1673 long kvm_arch_vm_ioctl(struct file *filp,
1674                        unsigned int ioctl, unsigned long arg)
1675 {
1676         struct kvm *kvm = filp->private_data;
1677         void __user *argp = (void __user *)arg;
1678         struct kvm_device_attr attr;
1679         int r;
1680
1681         switch (ioctl) {
1682         case KVM_S390_INTERRUPT: {
1683                 struct kvm_s390_interrupt s390int;
1684
1685                 r = -EFAULT;
1686                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1687                         break;
1688                 r = kvm_s390_inject_vm(kvm, &s390int);
1689                 break;
1690         }
1691         case KVM_ENABLE_CAP: {
1692                 struct kvm_enable_cap cap;
1693                 r = -EFAULT;
1694                 if (copy_from_user(&cap, argp, sizeof(cap)))
1695                         break;
1696                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1697                 break;
1698         }
1699         case KVM_CREATE_IRQCHIP: {
1700                 struct kvm_irq_routing_entry routing;
1701
1702                 r = -EINVAL;
1703                 if (kvm->arch.use_irqchip) {
1704                         /* Set up dummy routing. */
1705                         memset(&routing, 0, sizeof(routing));
1706                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1707                 }
1708                 break;
1709         }
1710         case KVM_SET_DEVICE_ATTR: {
1711                 r = -EFAULT;
1712                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1713                         break;
1714                 r = kvm_s390_vm_set_attr(kvm, &attr);
1715                 break;
1716         }
1717         case KVM_GET_DEVICE_ATTR: {
1718                 r = -EFAULT;
1719                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1720                         break;
1721                 r = kvm_s390_vm_get_attr(kvm, &attr);
1722                 break;
1723         }
1724         case KVM_HAS_DEVICE_ATTR: {
1725                 r = -EFAULT;
1726                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1727                         break;
1728                 r = kvm_s390_vm_has_attr(kvm, &attr);
1729                 break;
1730         }
1731         case KVM_S390_GET_SKEYS: {
1732                 struct kvm_s390_skeys args;
1733
1734                 r = -EFAULT;
1735                 if (copy_from_user(&args, argp,
1736                                    sizeof(struct kvm_s390_skeys)))
1737                         break;
1738                 r = kvm_s390_get_skeys(kvm, &args);
1739                 break;
1740         }
1741         case KVM_S390_SET_SKEYS: {
1742                 struct kvm_s390_skeys args;
1743
1744                 r = -EFAULT;
1745                 if (copy_from_user(&args, argp,
1746                                    sizeof(struct kvm_s390_skeys)))
1747                         break;
1748                 r = kvm_s390_set_skeys(kvm, &args);
1749                 break;
1750         }
1751         case KVM_S390_GET_CMMA_BITS: {
1752                 struct kvm_s390_cmma_log args;
1753
1754                 r = -EFAULT;
1755                 if (copy_from_user(&args, argp, sizeof(args)))
1756                         break;
1757                 mutex_lock(&kvm->slots_lock);
1758                 r = kvm_s390_get_cmma_bits(kvm, &args);
1759                 mutex_unlock(&kvm->slots_lock);
1760                 if (!r) {
1761                         r = copy_to_user(argp, &args, sizeof(args));
1762                         if (r)
1763                                 r = -EFAULT;
1764                 }
1765                 break;
1766         }
1767         case KVM_S390_SET_CMMA_BITS: {
1768                 struct kvm_s390_cmma_log args;
1769
1770                 r = -EFAULT;
1771                 if (copy_from_user(&args, argp, sizeof(args)))
1772                         break;
1773                 mutex_lock(&kvm->slots_lock);
1774                 r = kvm_s390_set_cmma_bits(kvm, &args);
1775                 mutex_unlock(&kvm->slots_lock);
1776                 break;
1777         }
1778         default:
1779                 r = -ENOTTY;
1780         }
1781
1782         return r;
1783 }
1784
1785 static int kvm_s390_query_ap_config(u8 *config)
1786 {
1787         u32 fcn_code = 0x04000000UL;
1788         u32 cc = 0;
1789
1790         memset(config, 0, 128);
1791         asm volatile(
1792                 "lgr 0,%1\n"
1793                 "lgr 2,%2\n"
1794                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1795                 "0: ipm %0\n"
1796                 "srl %0,28\n"
1797                 "1:\n"
1798                 EX_TABLE(0b, 1b)
1799                 : "+r" (cc)
1800                 : "r" (fcn_code), "r" (config)
1801                 : "cc", "0", "2", "memory"
1802         );
1803
1804         return cc;
1805 }
1806
1807 static int kvm_s390_apxa_installed(void)
1808 {
1809         u8 config[128];
1810         int cc;
1811
1812         if (test_facility(12)) {
1813                 cc = kvm_s390_query_ap_config(config);
1814
1815                 if (cc)
1816                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1817                 else
1818                         return config[0] & 0x40;
1819         }
1820
1821         return 0;
1822 }
1823
1824 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1825 {
1826         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1827
1828         if (kvm_s390_apxa_installed())
1829                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1830         else
1831                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1832 }
1833
1834 static u64 kvm_s390_get_initial_cpuid(void)
1835 {
1836         struct cpuid cpuid;
1837
1838         get_cpu_id(&cpuid);
1839         cpuid.version = 0xff;
1840         return *((u64 *) &cpuid);
1841 }
1842
1843 static void kvm_s390_crypto_init(struct kvm *kvm)
1844 {
1845         if (!test_kvm_facility(kvm, 76))
1846                 return;
1847
1848         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1849         kvm_s390_set_crycb_format(kvm);
1850
1851         /* Enable AES/DEA protected key functions by default */
1852         kvm->arch.crypto.aes_kw = 1;
1853         kvm->arch.crypto.dea_kw = 1;
1854         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1855                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1856         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1857                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1858 }
1859
1860 static void sca_dispose(struct kvm *kvm)
1861 {
1862         if (kvm->arch.use_esca)
1863                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1864         else
1865                 free_page((unsigned long)(kvm->arch.sca));
1866         kvm->arch.sca = NULL;
1867 }
1868
1869 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1870 {
1871         gfp_t alloc_flags = GFP_KERNEL;
1872         int i, rc;
1873         char debug_name[16];
1874         static unsigned long sca_offset;
1875
1876         rc = -EINVAL;
1877 #ifdef CONFIG_KVM_S390_UCONTROL
1878         if (type & ~KVM_VM_S390_UCONTROL)
1879                 goto out_err;
1880         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1881                 goto out_err;
1882 #else
1883         if (type)
1884                 goto out_err;
1885 #endif
1886
1887         rc = s390_enable_sie();
1888         if (rc)
1889                 goto out_err;
1890
1891         rc = -ENOMEM;
1892
1893         kvm->arch.use_esca = 0; /* start with basic SCA */
1894         if (!sclp.has_64bscao)
1895                 alloc_flags |= GFP_DMA;
1896         rwlock_init(&kvm->arch.sca_lock);
1897         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1898         if (!kvm->arch.sca)
1899                 goto out_err;
1900         spin_lock(&kvm_lock);
1901         sca_offset += 16;
1902         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1903                 sca_offset = 0;
1904         kvm->arch.sca = (struct bsca_block *)
1905                         ((char *) kvm->arch.sca + sca_offset);
1906         spin_unlock(&kvm_lock);
1907
1908         sprintf(debug_name, "kvm-%u", current->pid);
1909
1910         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1911         if (!kvm->arch.dbf)
1912                 goto out_err;
1913
1914         kvm->arch.sie_page2 =
1915              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1916         if (!kvm->arch.sie_page2)
1917                 goto out_err;
1918
1919         /* Populate the facility mask initially. */
1920         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1921                sizeof(S390_lowcore.stfle_fac_list));
1922         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1923                 if (i < kvm_s390_fac_list_mask_size())
1924                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1925                 else
1926                         kvm->arch.model.fac_mask[i] = 0UL;
1927         }
1928
1929         /* Populate the facility list initially. */
1930         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1931         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1932                S390_ARCH_FAC_LIST_SIZE_BYTE);
1933
1934         /* we are always in czam mode - even on pre z14 machines */
1935         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1936         set_kvm_facility(kvm->arch.model.fac_list, 138);
1937         /* we emulate STHYI in kvm */
1938         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1939         set_kvm_facility(kvm->arch.model.fac_list, 74);
1940         if (MACHINE_HAS_TLB_GUEST) {
1941                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1942                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1943         }
1944
1945         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1946         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1947
1948         kvm_s390_crypto_init(kvm);
1949
1950         mutex_init(&kvm->arch.float_int.ais_lock);
1951         kvm->arch.float_int.simm = 0;
1952         kvm->arch.float_int.nimm = 0;
1953         spin_lock_init(&kvm->arch.float_int.lock);
1954         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1955                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1956         init_waitqueue_head(&kvm->arch.ipte_wq);
1957         mutex_init(&kvm->arch.ipte_mutex);
1958
1959         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1960         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1961
1962         if (type & KVM_VM_S390_UCONTROL) {
1963                 kvm->arch.gmap = NULL;
1964                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1965         } else {
1966                 if (sclp.hamax == U64_MAX)
1967                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1968                 else
1969                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1970                                                     sclp.hamax + 1);
1971                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1972                 if (!kvm->arch.gmap)
1973                         goto out_err;
1974                 kvm->arch.gmap->private = kvm;
1975                 kvm->arch.gmap->pfault_enabled = 0;
1976         }
1977
1978         kvm->arch.css_support = 0;
1979         kvm->arch.use_irqchip = 0;
1980         kvm->arch.epoch = 0;
1981
1982         spin_lock_init(&kvm->arch.start_stop_lock);
1983         kvm_s390_vsie_init(kvm);
1984         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1985
1986         return 0;
1987 out_err:
1988         free_page((unsigned long)kvm->arch.sie_page2);
1989         debug_unregister(kvm->arch.dbf);
1990         sca_dispose(kvm);
1991         KVM_EVENT(3, "creation of vm failed: %d", rc);
1992         return rc;
1993 }
1994
1995 bool kvm_arch_has_vcpu_debugfs(void)
1996 {
1997         return false;
1998 }
1999
2000 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2001 {
2002         return 0;
2003 }
2004
2005 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2006 {
2007         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2008         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2009         kvm_s390_clear_local_irqs(vcpu);
2010         kvm_clear_async_pf_completion_queue(vcpu);
2011         if (!kvm_is_ucontrol(vcpu->kvm))
2012                 sca_del_vcpu(vcpu);
2013
2014         if (kvm_is_ucontrol(vcpu->kvm))
2015                 gmap_remove(vcpu->arch.gmap);
2016
2017         if (vcpu->kvm->arch.use_cmma)
2018                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2019         free_page((unsigned long)(vcpu->arch.sie_block));
2020
2021         kvm_vcpu_uninit(vcpu);
2022         kmem_cache_free(kvm_vcpu_cache, vcpu);
2023 }
2024
2025 static void kvm_free_vcpus(struct kvm *kvm)
2026 {
2027         unsigned int i;
2028         struct kvm_vcpu *vcpu;
2029
2030         kvm_for_each_vcpu(i, vcpu, kvm)
2031                 kvm_arch_vcpu_destroy(vcpu);
2032
2033         mutex_lock(&kvm->lock);
2034         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2035                 kvm->vcpus[i] = NULL;
2036
2037         atomic_set(&kvm->online_vcpus, 0);
2038         mutex_unlock(&kvm->lock);
2039 }
2040
2041 void kvm_arch_destroy_vm(struct kvm *kvm)
2042 {
2043         kvm_free_vcpus(kvm);
2044         sca_dispose(kvm);
2045         debug_unregister(kvm->arch.dbf);
2046         free_page((unsigned long)kvm->arch.sie_page2);
2047         if (!kvm_is_ucontrol(kvm))
2048                 gmap_remove(kvm->arch.gmap);
2049         kvm_s390_destroy_adapters(kvm);
2050         kvm_s390_clear_float_irqs(kvm);
2051         kvm_s390_vsie_destroy(kvm);
2052         if (kvm->arch.migration_state) {
2053                 vfree(kvm->arch.migration_state->pgste_bitmap);
2054                 kfree(kvm->arch.migration_state);
2055         }
2056         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2057 }
2058
2059 /* Section: vcpu related */
2060 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2061 {
2062         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2063         if (!vcpu->arch.gmap)
2064                 return -ENOMEM;
2065         vcpu->arch.gmap->private = vcpu->kvm;
2066
2067         return 0;
2068 }
2069
2070 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2071 {
2072         if (!kvm_s390_use_sca_entries())
2073                 return;
2074         read_lock(&vcpu->kvm->arch.sca_lock);
2075         if (vcpu->kvm->arch.use_esca) {
2076                 struct esca_block *sca = vcpu->kvm->arch.sca;
2077
2078                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2079                 sca->cpu[vcpu->vcpu_id].sda = 0;
2080         } else {
2081                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2082
2083                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2084                 sca->cpu[vcpu->vcpu_id].sda = 0;
2085         }
2086         read_unlock(&vcpu->kvm->arch.sca_lock);
2087 }
2088
2089 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2090 {
2091         if (!kvm_s390_use_sca_entries()) {
2092                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2093
2094                 /* we still need the basic sca for the ipte control */
2095                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2096                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2097         }
2098         read_lock(&vcpu->kvm->arch.sca_lock);
2099         if (vcpu->kvm->arch.use_esca) {
2100                 struct esca_block *sca = vcpu->kvm->arch.sca;
2101
2102                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2103                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2104                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2105                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2106                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2107         } else {
2108                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2109
2110                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2111                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2112                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2113                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2114         }
2115         read_unlock(&vcpu->kvm->arch.sca_lock);
2116 }
2117
2118 /* Basic SCA to Extended SCA data copy routines */
2119 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2120 {
2121         d->sda = s->sda;
2122         d->sigp_ctrl.c = s->sigp_ctrl.c;
2123         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2124 }
2125
2126 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2127 {
2128         int i;
2129
2130         d->ipte_control = s->ipte_control;
2131         d->mcn[0] = s->mcn;
2132         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2133                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2134 }
2135
2136 static int sca_switch_to_extended(struct kvm *kvm)
2137 {
2138         struct bsca_block *old_sca = kvm->arch.sca;
2139         struct esca_block *new_sca;
2140         struct kvm_vcpu *vcpu;
2141         unsigned int vcpu_idx;
2142         u32 scaol, scaoh;
2143
2144         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2145         if (!new_sca)
2146                 return -ENOMEM;
2147
2148         scaoh = (u32)((u64)(new_sca) >> 32);
2149         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2150
2151         kvm_s390_vcpu_block_all(kvm);
2152         write_lock(&kvm->arch.sca_lock);
2153
2154         sca_copy_b_to_e(new_sca, old_sca);
2155
2156         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2157                 vcpu->arch.sie_block->scaoh = scaoh;
2158                 vcpu->arch.sie_block->scaol = scaol;
2159                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2160         }
2161         kvm->arch.sca = new_sca;
2162         kvm->arch.use_esca = 1;
2163
2164         write_unlock(&kvm->arch.sca_lock);
2165         kvm_s390_vcpu_unblock_all(kvm);
2166
2167         free_page((unsigned long)old_sca);
2168
2169         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2170                  old_sca, kvm->arch.sca);
2171         return 0;
2172 }
2173
2174 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2175 {
2176         int rc;
2177
2178         if (!kvm_s390_use_sca_entries()) {
2179                 if (id < KVM_MAX_VCPUS)
2180                         return true;
2181                 return false;
2182         }
2183         if (id < KVM_S390_BSCA_CPU_SLOTS)
2184                 return true;
2185         if (!sclp.has_esca || !sclp.has_64bscao)
2186                 return false;
2187
2188         mutex_lock(&kvm->lock);
2189         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2190         mutex_unlock(&kvm->lock);
2191
2192         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2193 }
2194
2195 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2196 {
2197         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2198         kvm_clear_async_pf_completion_queue(vcpu);
2199         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2200                                     KVM_SYNC_GPRS |
2201                                     KVM_SYNC_ACRS |
2202                                     KVM_SYNC_CRS |
2203                                     KVM_SYNC_ARCH0 |
2204                                     KVM_SYNC_PFAULT;
2205         kvm_s390_set_prefix(vcpu, 0);
2206         if (test_kvm_facility(vcpu->kvm, 64))
2207                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2208         if (test_kvm_facility(vcpu->kvm, 82))
2209                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2210         if (test_kvm_facility(vcpu->kvm, 133))
2211                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2212         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2213          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2214          */
2215         if (MACHINE_HAS_VX)
2216                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2217         else
2218                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2219
2220         if (kvm_is_ucontrol(vcpu->kvm))
2221                 return __kvm_ucontrol_vcpu_init(vcpu);
2222
2223         return 0;
2224 }
2225
2226 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2227 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2228 {
2229         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2230         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2231         vcpu->arch.cputm_start = get_tod_clock_fast();
2232         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2233 }
2234
2235 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2236 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2237 {
2238         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2239         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2240         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2241         vcpu->arch.cputm_start = 0;
2242         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2243 }
2244
2245 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2246 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2247 {
2248         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2249         vcpu->arch.cputm_enabled = true;
2250         __start_cpu_timer_accounting(vcpu);
2251 }
2252
2253 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2254 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2255 {
2256         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2257         __stop_cpu_timer_accounting(vcpu);
2258         vcpu->arch.cputm_enabled = false;
2259 }
2260
2261 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2262 {
2263         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2264         __enable_cpu_timer_accounting(vcpu);
2265         preempt_enable();
2266 }
2267
2268 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2269 {
2270         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2271         __disable_cpu_timer_accounting(vcpu);
2272         preempt_enable();
2273 }
2274
2275 /* set the cpu timer - may only be called from the VCPU thread itself */
2276 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2277 {
2278         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2279         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2280         if (vcpu->arch.cputm_enabled)
2281                 vcpu->arch.cputm_start = get_tod_clock_fast();
2282         vcpu->arch.sie_block->cputm = cputm;
2283         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2284         preempt_enable();
2285 }
2286
2287 /* update and get the cpu timer - can also be called from other VCPU threads */
2288 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2289 {
2290         unsigned int seq;
2291         __u64 value;
2292
2293         if (unlikely(!vcpu->arch.cputm_enabled))
2294                 return vcpu->arch.sie_block->cputm;
2295
2296         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2297         do {
2298                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2299                 /*
2300                  * If the writer would ever execute a read in the critical
2301                  * section, e.g. in irq context, we have a deadlock.
2302                  */
2303                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2304                 value = vcpu->arch.sie_block->cputm;
2305                 /* if cputm_start is 0, accounting is being started/stopped */
2306                 if (likely(vcpu->arch.cputm_start))
2307                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2308         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2309         preempt_enable();
2310         return value;
2311 }
2312
2313 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2314 {
2315
2316         gmap_enable(vcpu->arch.enabled_gmap);
2317         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2318         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2319                 __start_cpu_timer_accounting(vcpu);
2320         vcpu->cpu = cpu;
2321 }
2322
2323 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2324 {
2325         vcpu->cpu = -1;
2326         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2327                 __stop_cpu_timer_accounting(vcpu);
2328         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2329         vcpu->arch.enabled_gmap = gmap_get_enabled();
2330         gmap_disable(vcpu->arch.enabled_gmap);
2331
2332 }
2333
2334 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2335 {
2336         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2337         vcpu->arch.sie_block->gpsw.mask = 0UL;
2338         vcpu->arch.sie_block->gpsw.addr = 0UL;
2339         kvm_s390_set_prefix(vcpu, 0);
2340         kvm_s390_set_cpu_timer(vcpu, 0);
2341         vcpu->arch.sie_block->ckc       = 0UL;
2342         vcpu->arch.sie_block->todpr     = 0;
2343         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2344         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2345         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2346         /* make sure the new fpc will be lazily loaded */
2347         save_fpu_regs();
2348         current->thread.fpu.fpc = 0;
2349         vcpu->arch.sie_block->gbea = 1;
2350         vcpu->arch.sie_block->pp = 0;
2351         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2352         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2353         kvm_clear_async_pf_completion_queue(vcpu);
2354         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2355                 kvm_s390_vcpu_stop(vcpu);
2356         kvm_s390_clear_local_irqs(vcpu);
2357 }
2358
2359 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2360 {
2361         mutex_lock(&vcpu->kvm->lock);
2362         preempt_disable();
2363         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2364         preempt_enable();
2365         mutex_unlock(&vcpu->kvm->lock);
2366         if (!kvm_is_ucontrol(vcpu->kvm)) {
2367                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2368                 sca_add_vcpu(vcpu);
2369         }
2370         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2371                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2372         /* make vcpu_load load the right gmap on the first trigger */
2373         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2374 }
2375
2376 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2377 {
2378         if (!test_kvm_facility(vcpu->kvm, 76))
2379                 return;
2380
2381         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2382
2383         if (vcpu->kvm->arch.crypto.aes_kw)
2384                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2385         if (vcpu->kvm->arch.crypto.dea_kw)
2386                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2387
2388         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2389 }
2390
2391 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2392 {
2393         free_page(vcpu->arch.sie_block->cbrlo);
2394         vcpu->arch.sie_block->cbrlo = 0;
2395 }
2396
2397 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2398 {
2399         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2400         if (!vcpu->arch.sie_block->cbrlo)
2401                 return -ENOMEM;
2402
2403         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2404         return 0;
2405 }
2406
2407 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2408 {
2409         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2410
2411         vcpu->arch.sie_block->ibc = model->ibc;
2412         if (test_kvm_facility(vcpu->kvm, 7))
2413                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2414 }
2415
2416 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2417 {
2418         int rc = 0;
2419
2420         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2421                                                     CPUSTAT_SM |
2422                                                     CPUSTAT_STOPPED);
2423
2424         if (test_kvm_facility(vcpu->kvm, 78))
2425                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2426         else if (test_kvm_facility(vcpu->kvm, 8))
2427                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2428
2429         kvm_s390_vcpu_setup_model(vcpu);
2430
2431         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2432         if (MACHINE_HAS_ESOP)
2433                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2434         if (test_kvm_facility(vcpu->kvm, 9))
2435                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2436         if (test_kvm_facility(vcpu->kvm, 73))
2437                 vcpu->arch.sie_block->ecb |= ECB_TE;
2438
2439         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2440                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2441         if (test_kvm_facility(vcpu->kvm, 130))
2442                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2443         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2444         if (sclp.has_cei)
2445                 vcpu->arch.sie_block->eca |= ECA_CEI;
2446         if (sclp.has_ib)
2447                 vcpu->arch.sie_block->eca |= ECA_IB;
2448         if (sclp.has_siif)
2449                 vcpu->arch.sie_block->eca |= ECA_SII;
2450         if (sclp.has_sigpif)
2451                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2452         if (test_kvm_facility(vcpu->kvm, 129)) {
2453                 vcpu->arch.sie_block->eca |= ECA_VX;
2454                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2455         }
2456         if (test_kvm_facility(vcpu->kvm, 139))
2457                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2458
2459         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2460                                         | SDNXC;
2461         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2462
2463         if (sclp.has_kss)
2464                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2465         else
2466                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2467
2468         if (vcpu->kvm->arch.use_cmma) {
2469                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2470                 if (rc)
2471                         return rc;
2472         }
2473         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2474         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2475
2476         kvm_s390_vcpu_crypto_setup(vcpu);
2477
2478         return rc;
2479 }
2480
2481 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2482                                       unsigned int id)
2483 {
2484         struct kvm_vcpu *vcpu;
2485         struct sie_page *sie_page;
2486         int rc = -EINVAL;
2487
2488         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2489                 goto out;
2490
2491         rc = -ENOMEM;
2492
2493         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2494         if (!vcpu)
2495                 goto out;
2496
2497         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2498         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2499         if (!sie_page)
2500                 goto out_free_cpu;
2501
2502         vcpu->arch.sie_block = &sie_page->sie_block;
2503         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2504
2505         /* the real guest size will always be smaller than msl */
2506         vcpu->arch.sie_block->mso = 0;
2507         vcpu->arch.sie_block->msl = sclp.hamax;
2508
2509         vcpu->arch.sie_block->icpua = id;
2510         spin_lock_init(&vcpu->arch.local_int.lock);
2511         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2512         vcpu->arch.local_int.wq = &vcpu->wq;
2513         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2514         seqcount_init(&vcpu->arch.cputm_seqcount);
2515
2516         rc = kvm_vcpu_init(vcpu, kvm, id);
2517         if (rc)
2518                 goto out_free_sie_block;
2519         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2520                  vcpu->arch.sie_block);
2521         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2522
2523         return vcpu;
2524 out_free_sie_block:
2525         free_page((unsigned long)(vcpu->arch.sie_block));
2526 out_free_cpu:
2527         kmem_cache_free(kvm_vcpu_cache, vcpu);
2528 out:
2529         return ERR_PTR(rc);
2530 }
2531
2532 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2533 {
2534         return kvm_s390_vcpu_has_irq(vcpu, 0);
2535 }
2536
2537 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2538 {
2539         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2540 }
2541
2542 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2543 {
2544         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2545         exit_sie(vcpu);
2546 }
2547
2548 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2549 {
2550         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2551 }
2552
2553 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2554 {
2555         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2556         exit_sie(vcpu);
2557 }
2558
2559 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2560 {
2561         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2562 }
2563
2564 /*
2565  * Kick a guest cpu out of SIE and wait until SIE is not running.
2566  * If the CPU is not running (e.g. waiting as idle) the function will
2567  * return immediately. */
2568 void exit_sie(struct kvm_vcpu *vcpu)
2569 {
2570         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2571         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2572                 cpu_relax();
2573 }
2574
2575 /* Kick a guest cpu out of SIE to process a request synchronously */
2576 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2577 {
2578         kvm_make_request(req, vcpu);
2579         kvm_s390_vcpu_request(vcpu);
2580 }
2581
2582 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2583                               unsigned long end)
2584 {
2585         struct kvm *kvm = gmap->private;
2586         struct kvm_vcpu *vcpu;
2587         unsigned long prefix;
2588         int i;
2589
2590         if (gmap_is_shadow(gmap))
2591                 return;
2592         if (start >= 1UL << 31)
2593                 /* We are only interested in prefix pages */
2594                 return;
2595         kvm_for_each_vcpu(i, vcpu, kvm) {
2596                 /* match against both prefix pages */
2597                 prefix = kvm_s390_get_prefix(vcpu);
2598                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2599                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2600                                    start, end);
2601                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2602                 }
2603         }
2604 }
2605
2606 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2607 {
2608         /* kvm common code refers to this, but never calls it */
2609         BUG();
2610         return 0;
2611 }
2612
2613 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2614                                            struct kvm_one_reg *reg)
2615 {
2616         int r = -EINVAL;
2617
2618         switch (reg->id) {
2619         case KVM_REG_S390_TODPR:
2620                 r = put_user(vcpu->arch.sie_block->todpr,
2621                              (u32 __user *)reg->addr);
2622                 break;
2623         case KVM_REG_S390_EPOCHDIFF:
2624                 r = put_user(vcpu->arch.sie_block->epoch,
2625                              (u64 __user *)reg->addr);
2626                 break;
2627         case KVM_REG_S390_CPU_TIMER:
2628                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2629                              (u64 __user *)reg->addr);
2630                 break;
2631         case KVM_REG_S390_CLOCK_COMP:
2632                 r = put_user(vcpu->arch.sie_block->ckc,
2633                              (u64 __user *)reg->addr);
2634                 break;
2635         case KVM_REG_S390_PFTOKEN:
2636                 r = put_user(vcpu->arch.pfault_token,
2637                              (u64 __user *)reg->addr);
2638                 break;
2639         case KVM_REG_S390_PFCOMPARE:
2640                 r = put_user(vcpu->arch.pfault_compare,
2641                              (u64 __user *)reg->addr);
2642                 break;
2643         case KVM_REG_S390_PFSELECT:
2644                 r = put_user(vcpu->arch.pfault_select,
2645                              (u64 __user *)reg->addr);
2646                 break;
2647         case KVM_REG_S390_PP:
2648                 r = put_user(vcpu->arch.sie_block->pp,
2649                              (u64 __user *)reg->addr);
2650                 break;
2651         case KVM_REG_S390_GBEA:
2652                 r = put_user(vcpu->arch.sie_block->gbea,
2653                              (u64 __user *)reg->addr);
2654                 break;
2655         default:
2656                 break;
2657         }
2658
2659         return r;
2660 }
2661
2662 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2663                                            struct kvm_one_reg *reg)
2664 {
2665         int r = -EINVAL;
2666         __u64 val;
2667
2668         switch (reg->id) {
2669         case KVM_REG_S390_TODPR:
2670                 r = get_user(vcpu->arch.sie_block->todpr,
2671                              (u32 __user *)reg->addr);
2672                 break;
2673         case KVM_REG_S390_EPOCHDIFF:
2674                 r = get_user(vcpu->arch.sie_block->epoch,
2675                              (u64 __user *)reg->addr);
2676                 break;
2677         case KVM_REG_S390_CPU_TIMER:
2678                 r = get_user(val, (u64 __user *)reg->addr);
2679                 if (!r)
2680                         kvm_s390_set_cpu_timer(vcpu, val);
2681                 break;
2682         case KVM_REG_S390_CLOCK_COMP:
2683                 r = get_user(vcpu->arch.sie_block->ckc,
2684                              (u64 __user *)reg->addr);
2685                 break;
2686         case KVM_REG_S390_PFTOKEN:
2687                 r = get_user(vcpu->arch.pfault_token,
2688                              (u64 __user *)reg->addr);
2689                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2690                         kvm_clear_async_pf_completion_queue(vcpu);
2691                 break;
2692         case KVM_REG_S390_PFCOMPARE:
2693                 r = get_user(vcpu->arch.pfault_compare,
2694                              (u64 __user *)reg->addr);
2695                 break;
2696         case KVM_REG_S390_PFSELECT:
2697                 r = get_user(vcpu->arch.pfault_select,
2698                              (u64 __user *)reg->addr);
2699                 break;
2700         case KVM_REG_S390_PP:
2701                 r = get_user(vcpu->arch.sie_block->pp,
2702                              (u64 __user *)reg->addr);
2703                 break;
2704         case KVM_REG_S390_GBEA:
2705                 r = get_user(vcpu->arch.sie_block->gbea,
2706                              (u64 __user *)reg->addr);
2707                 break;
2708         default:
2709                 break;
2710         }
2711
2712         return r;
2713 }
2714
2715 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2716 {
2717         kvm_s390_vcpu_initial_reset(vcpu);
2718         return 0;
2719 }
2720
2721 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2722 {
2723         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2724         return 0;
2725 }
2726
2727 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2728 {
2729         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2730         return 0;
2731 }
2732
2733 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2734                                   struct kvm_sregs *sregs)
2735 {
2736         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2737         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2738         return 0;
2739 }
2740
2741 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2742                                   struct kvm_sregs *sregs)
2743 {
2744         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2745         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2746         return 0;
2747 }
2748
2749 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2750 {
2751         if (test_fp_ctl(fpu->fpc))
2752                 return -EINVAL;
2753         vcpu->run->s.regs.fpc = fpu->fpc;
2754         if (MACHINE_HAS_VX)
2755                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2756                                  (freg_t *) fpu->fprs);
2757         else
2758                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2759         return 0;
2760 }
2761
2762 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2763 {
2764         /* make sure we have the latest values */
2765         save_fpu_regs();
2766         if (MACHINE_HAS_VX)
2767                 convert_vx_to_fp((freg_t *) fpu->fprs,
2768                                  (__vector128 *) vcpu->run->s.regs.vrs);
2769         else
2770                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2771         fpu->fpc = vcpu->run->s.regs.fpc;
2772         return 0;
2773 }
2774
2775 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2776 {
2777         int rc = 0;
2778
2779         if (!is_vcpu_stopped(vcpu))
2780                 rc = -EBUSY;
2781         else {
2782                 vcpu->run->psw_mask = psw.mask;
2783                 vcpu->run->psw_addr = psw.addr;
2784         }
2785         return rc;
2786 }
2787
2788 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2789                                   struct kvm_translation *tr)
2790 {
2791         return -EINVAL; /* not implemented yet */
2792 }
2793
2794 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2795                               KVM_GUESTDBG_USE_HW_BP | \
2796                               KVM_GUESTDBG_ENABLE)
2797
2798 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2799                                         struct kvm_guest_debug *dbg)
2800 {
2801         int rc = 0;
2802
2803         vcpu->guest_debug = 0;
2804         kvm_s390_clear_bp_data(vcpu);
2805
2806         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2807                 return -EINVAL;
2808         if (!sclp.has_gpere)
2809                 return -EINVAL;
2810
2811         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2812                 vcpu->guest_debug = dbg->control;
2813                 /* enforce guest PER */
2814                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2815
2816                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2817                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2818         } else {
2819                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2820                 vcpu->arch.guestdbg.last_bp = 0;
2821         }
2822
2823         if (rc) {
2824                 vcpu->guest_debug = 0;
2825                 kvm_s390_clear_bp_data(vcpu);
2826                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2827         }
2828
2829         return rc;
2830 }
2831
2832 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2833                                     struct kvm_mp_state *mp_state)
2834 {
2835         /* CHECK_STOP and LOAD are not supported yet */
2836         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2837                                        KVM_MP_STATE_OPERATING;
2838 }
2839
2840 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2841                                     struct kvm_mp_state *mp_state)
2842 {
2843         int rc = 0;
2844
2845         /* user space knows about this interface - let it control the state */
2846         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2847
2848         switch (mp_state->mp_state) {
2849         case KVM_MP_STATE_STOPPED:
2850                 kvm_s390_vcpu_stop(vcpu);
2851                 break;
2852         case KVM_MP_STATE_OPERATING:
2853                 kvm_s390_vcpu_start(vcpu);
2854                 break;
2855         case KVM_MP_STATE_LOAD:
2856         case KVM_MP_STATE_CHECK_STOP:
2857                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2858         default:
2859                 rc = -ENXIO;
2860         }
2861
2862         return rc;
2863 }
2864
2865 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2866 {
2867         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2868 }
2869
2870 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2871 {
2872 retry:
2873         kvm_s390_vcpu_request_handled(vcpu);
2874         if (!kvm_request_pending(vcpu))
2875                 return 0;
2876         /*
2877          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2878          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2879          * This ensures that the ipte instruction for this request has
2880          * already finished. We might race against a second unmapper that
2881          * wants to set the blocking bit. Lets just retry the request loop.
2882          */
2883         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2884                 int rc;
2885                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2886                                           kvm_s390_get_prefix(vcpu),
2887                                           PAGE_SIZE * 2, PROT_WRITE);
2888                 if (rc) {
2889                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2890                         return rc;
2891                 }
2892                 goto retry;
2893         }
2894
2895         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2896                 vcpu->arch.sie_block->ihcpu = 0xffff;
2897                 goto retry;
2898         }
2899
2900         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2901                 if (!ibs_enabled(vcpu)) {
2902                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2903                         atomic_or(CPUSTAT_IBS,
2904                                         &vcpu->arch.sie_block->cpuflags);
2905                 }
2906                 goto retry;
2907         }
2908
2909         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2910                 if (ibs_enabled(vcpu)) {
2911                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2912                         atomic_andnot(CPUSTAT_IBS,
2913                                           &vcpu->arch.sie_block->cpuflags);
2914                 }
2915                 goto retry;
2916         }
2917
2918         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2919                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2920                 goto retry;
2921         }
2922
2923         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2924                 /*
2925                  * Disable CMMA virtualization; we will emulate the ESSA
2926                  * instruction manually, in order to provide additional
2927                  * functionalities needed for live migration.
2928                  */
2929                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2930                 goto retry;
2931         }
2932
2933         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2934                 /*
2935                  * Re-enable CMMA virtualization if CMMA is available and
2936                  * was used.
2937                  */
2938                 if ((vcpu->kvm->arch.use_cmma) &&
2939                     (vcpu->kvm->mm->context.use_cmma))
2940                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2941                 goto retry;
2942         }
2943
2944         /* nothing to do, just clear the request */
2945         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2946
2947         return 0;
2948 }
2949
2950 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2951                                  const struct kvm_s390_vm_tod_clock *gtod)
2952 {
2953         struct kvm_vcpu *vcpu;
2954         struct kvm_s390_tod_clock_ext htod;
2955         int i;
2956
2957         mutex_lock(&kvm->lock);
2958         preempt_disable();
2959
2960         get_tod_clock_ext((char *)&htod);
2961
2962         kvm->arch.epoch = gtod->tod - htod.tod;
2963         kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2964
2965         if (kvm->arch.epoch > gtod->tod)
2966                 kvm->arch.epdx -= 1;
2967
2968         kvm_s390_vcpu_block_all(kvm);
2969         kvm_for_each_vcpu(i, vcpu, kvm) {
2970                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2971                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2972         }
2973
2974         kvm_s390_vcpu_unblock_all(kvm);
2975         preempt_enable();
2976         mutex_unlock(&kvm->lock);
2977 }
2978
2979 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2980 {
2981         struct kvm_vcpu *vcpu;
2982         int i;
2983
2984         mutex_lock(&kvm->lock);
2985         preempt_disable();
2986         kvm->arch.epoch = tod - get_tod_clock();
2987         kvm_s390_vcpu_block_all(kvm);
2988         kvm_for_each_vcpu(i, vcpu, kvm)
2989                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2990         kvm_s390_vcpu_unblock_all(kvm);
2991         preempt_enable();
2992         mutex_unlock(&kvm->lock);
2993 }
2994
2995 /**
2996  * kvm_arch_fault_in_page - fault-in guest page if necessary
2997  * @vcpu: The corresponding virtual cpu
2998  * @gpa: Guest physical address
2999  * @writable: Whether the page should be writable or not
3000  *
3001  * Make sure that a guest page has been faulted-in on the host.
3002  *
3003  * Return: Zero on success, negative error code otherwise.
3004  */
3005 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3006 {
3007         return gmap_fault(vcpu->arch.gmap, gpa,
3008                           writable ? FAULT_FLAG_WRITE : 0);
3009 }
3010
3011 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3012                                       unsigned long token)
3013 {
3014         struct kvm_s390_interrupt inti;
3015         struct kvm_s390_irq irq;
3016
3017         if (start_token) {
3018                 irq.u.ext.ext_params2 = token;
3019                 irq.type = KVM_S390_INT_PFAULT_INIT;
3020                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3021         } else {
3022                 inti.type = KVM_S390_INT_PFAULT_DONE;
3023                 inti.parm64 = token;
3024                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3025         }
3026 }
3027
3028 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3029                                      struct kvm_async_pf *work)
3030 {
3031         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3032         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3033 }
3034
3035 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3036                                  struct kvm_async_pf *work)
3037 {
3038         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3039         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3040 }
3041
3042 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3043                                struct kvm_async_pf *work)
3044 {
3045         /* s390 will always inject the page directly */
3046 }
3047
3048 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3049 {
3050         /*
3051          * s390 will always inject the page directly,
3052          * but we still want check_async_completion to cleanup
3053          */
3054         return true;
3055 }
3056
3057 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3058 {
3059         hva_t hva;
3060         struct kvm_arch_async_pf arch;
3061         int rc;
3062
3063         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3064                 return 0;
3065         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3066             vcpu->arch.pfault_compare)
3067                 return 0;
3068         if (psw_extint_disabled(vcpu))
3069                 return 0;
3070         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3071                 return 0;
3072         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3073                 return 0;
3074         if (!vcpu->arch.gmap->pfault_enabled)
3075                 return 0;
3076
3077         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3078         hva += current->thread.gmap_addr & ~PAGE_MASK;
3079         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3080                 return 0;
3081
3082         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3083         return rc;
3084 }
3085
3086 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3087 {
3088         int rc, cpuflags;
3089
3090         /*
3091          * On s390 notifications for arriving pages will be delivered directly
3092          * to the guest but the house keeping for completed pfaults is
3093          * handled outside the worker.
3094          */
3095         kvm_check_async_pf_completion(vcpu);
3096
3097         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3098         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3099
3100         if (need_resched())
3101                 schedule();
3102
3103         if (test_cpu_flag(CIF_MCCK_PENDING))
3104                 s390_handle_mcck();
3105
3106         if (!kvm_is_ucontrol(vcpu->kvm)) {
3107                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3108                 if (rc)
3109                         return rc;
3110         }
3111
3112         rc = kvm_s390_handle_requests(vcpu);
3113         if (rc)
3114                 return rc;
3115
3116         if (guestdbg_enabled(vcpu)) {
3117                 kvm_s390_backup_guest_per_regs(vcpu);
3118                 kvm_s390_patch_guest_per_regs(vcpu);
3119         }
3120
3121         vcpu->arch.sie_block->icptcode = 0;
3122         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3123         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3124         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3125
3126         return 0;
3127 }
3128
3129 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3130 {
3131         struct kvm_s390_pgm_info pgm_info = {
3132                 .code = PGM_ADDRESSING,
3133         };
3134         u8 opcode, ilen;
3135         int rc;
3136
3137         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3138         trace_kvm_s390_sie_fault(vcpu);
3139
3140         /*
3141          * We want to inject an addressing exception, which is defined as a
3142          * suppressing or terminating exception. However, since we came here
3143          * by a DAT access exception, the PSW still points to the faulting
3144          * instruction since DAT exceptions are nullifying. So we've got
3145          * to look up the current opcode to get the length of the instruction
3146          * to be able to forward the PSW.
3147          */
3148         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3149         ilen = insn_length(opcode);
3150         if (rc < 0) {
3151                 return rc;
3152         } else if (rc) {
3153                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3154                  * Forward by arbitrary ilc, injection will take care of
3155                  * nullification if necessary.
3156                  */
3157                 pgm_info = vcpu->arch.pgm;
3158                 ilen = 4;
3159         }
3160         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3161         kvm_s390_forward_psw(vcpu, ilen);
3162         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3163 }
3164
3165 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3166 {
3167         struct mcck_volatile_info *mcck_info;
3168         struct sie_page *sie_page;
3169
3170         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3171                    vcpu->arch.sie_block->icptcode);
3172         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3173
3174         if (guestdbg_enabled(vcpu))
3175                 kvm_s390_restore_guest_per_regs(vcpu);
3176
3177         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3178         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3179
3180         if (exit_reason == -EINTR) {
3181                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3182                 sie_page = container_of(vcpu->arch.sie_block,
3183                                         struct sie_page, sie_block);
3184                 mcck_info = &sie_page->mcck_info;
3185                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3186                 return 0;
3187         }
3188
3189         if (vcpu->arch.sie_block->icptcode > 0) {
3190                 int rc = kvm_handle_sie_intercept(vcpu);
3191
3192                 if (rc != -EOPNOTSUPP)
3193                         return rc;
3194                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3195                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3196                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3197                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3198                 return -EREMOTE;
3199         } else if (exit_reason != -EFAULT) {
3200                 vcpu->stat.exit_null++;
3201                 return 0;
3202         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3203                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3204                 vcpu->run->s390_ucontrol.trans_exc_code =
3205                                                 current->thread.gmap_addr;
3206                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3207                 return -EREMOTE;
3208         } else if (current->thread.gmap_pfault) {
3209                 trace_kvm_s390_major_guest_pfault(vcpu);
3210                 current->thread.gmap_pfault = 0;
3211                 if (kvm_arch_setup_async_pf(vcpu))
3212                         return 0;
3213                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3214         }
3215         return vcpu_post_run_fault_in_sie(vcpu);
3216 }
3217
3218 static int __vcpu_run(struct kvm_vcpu *vcpu)
3219 {
3220         int rc, exit_reason;
3221
3222         /*
3223          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3224          * ning the guest), so that memslots (and other stuff) are protected
3225          */
3226         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3227
3228         do {
3229                 rc = vcpu_pre_run(vcpu);
3230                 if (rc)
3231                         break;
3232
3233                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3234                 /*
3235                  * As PF_VCPU will be used in fault handler, between
3236                  * guest_enter and guest_exit should be no uaccess.
3237                  */
3238                 local_irq_disable();
3239                 guest_enter_irqoff();
3240                 __disable_cpu_timer_accounting(vcpu);
3241                 local_irq_enable();
3242                 exit_reason = sie64a(vcpu->arch.sie_block,
3243                                      vcpu->run->s.regs.gprs);
3244                 local_irq_disable();
3245                 __enable_cpu_timer_accounting(vcpu);
3246                 guest_exit_irqoff();
3247                 local_irq_enable();
3248                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3249
3250                 rc = vcpu_post_run(vcpu, exit_reason);
3251         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3252
3253         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3254         return rc;
3255 }
3256
3257 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3258 {
3259         struct runtime_instr_cb *riccb;
3260         struct gs_cb *gscb;
3261
3262         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3263         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3264         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3265         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3266         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3267                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3268         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3269                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3270                 /* some control register changes require a tlb flush */
3271                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3272         }
3273         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3274                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3275                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3276                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3277                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3278                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3279         }
3280         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3281                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3282                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3283                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3284                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3285                         kvm_clear_async_pf_completion_queue(vcpu);
3286         }
3287         /*
3288          * If userspace sets the riccb (e.g. after migration) to a valid state,
3289          * we should enable RI here instead of doing the lazy enablement.
3290          */
3291         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3292             test_kvm_facility(vcpu->kvm, 64) &&
3293             riccb->v &&
3294             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3295                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3296                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3297         }
3298         /*
3299          * If userspace sets the gscb (e.g. after migration) to non-zero,
3300          * we should enable GS here instead of doing the lazy enablement.
3301          */
3302         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3303             test_kvm_facility(vcpu->kvm, 133) &&
3304             gscb->gssm &&
3305             !vcpu->arch.gs_enabled) {
3306                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3307                 vcpu->arch.sie_block->ecb |= ECB_GS;
3308                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3309                 vcpu->arch.gs_enabled = 1;
3310         }
3311         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3312             test_kvm_facility(vcpu->kvm, 82)) {
3313                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3314                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3315         }
3316         save_access_regs(vcpu->arch.host_acrs);
3317         restore_access_regs(vcpu->run->s.regs.acrs);
3318         /* save host (userspace) fprs/vrs */
3319         save_fpu_regs();
3320         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3321         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3322         if (MACHINE_HAS_VX)
3323                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3324         else
3325                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3326         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3327         if (test_fp_ctl(current->thread.fpu.fpc))
3328                 /* User space provided an invalid FPC, let's clear it */
3329                 current->thread.fpu.fpc = 0;
3330         if (MACHINE_HAS_GS) {
3331                 preempt_disable();
3332                 __ctl_set_bit(2, 4);
3333                 if (current->thread.gs_cb) {
3334                         vcpu->arch.host_gscb = current->thread.gs_cb;
3335                         save_gs_cb(vcpu->arch.host_gscb);
3336                 }
3337                 if (vcpu->arch.gs_enabled) {
3338                         current->thread.gs_cb = (struct gs_cb *)
3339                                                 &vcpu->run->s.regs.gscb;
3340                         restore_gs_cb(current->thread.gs_cb);
3341                 }
3342                 preempt_enable();
3343         }
3344
3345         kvm_run->kvm_dirty_regs = 0;
3346 }
3347
3348 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3349 {
3350         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3351         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3352         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3353         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3354         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3355         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3356         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3357         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3358         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3359         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3360         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3361         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3362         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3363         save_access_regs(vcpu->run->s.regs.acrs);
3364         restore_access_regs(vcpu->arch.host_acrs);
3365         /* Save guest register state */
3366         save_fpu_regs();
3367         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3368         /* Restore will be done lazily at return */
3369         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3370         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3371         if (MACHINE_HAS_GS) {
3372                 __ctl_set_bit(2, 4);
3373                 if (vcpu->arch.gs_enabled)
3374                         save_gs_cb(current->thread.gs_cb);
3375                 preempt_disable();
3376                 current->thread.gs_cb = vcpu->arch.host_gscb;
3377                 restore_gs_cb(vcpu->arch.host_gscb);
3378                 preempt_enable();
3379                 if (!vcpu->arch.host_gscb)
3380                         __ctl_clear_bit(2, 4);
3381                 vcpu->arch.host_gscb = NULL;
3382         }
3383
3384 }
3385
3386 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3387 {
3388         int rc;
3389
3390         if (kvm_run->immediate_exit)
3391                 return -EINTR;
3392
3393         if (guestdbg_exit_pending(vcpu)) {
3394                 kvm_s390_prepare_debug_exit(vcpu);
3395                 return 0;
3396         }
3397
3398         kvm_sigset_activate(vcpu);
3399
3400         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3401                 kvm_s390_vcpu_start(vcpu);
3402         } else if (is_vcpu_stopped(vcpu)) {
3403                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3404                                    vcpu->vcpu_id);
3405                 return -EINVAL;
3406         }
3407
3408         sync_regs(vcpu, kvm_run);
3409         enable_cpu_timer_accounting(vcpu);
3410
3411         might_fault();
3412         rc = __vcpu_run(vcpu);
3413
3414         if (signal_pending(current) && !rc) {
3415                 kvm_run->exit_reason = KVM_EXIT_INTR;
3416                 rc = -EINTR;
3417         }
3418
3419         if (guestdbg_exit_pending(vcpu) && !rc)  {
3420                 kvm_s390_prepare_debug_exit(vcpu);
3421                 rc = 0;
3422         }
3423
3424         if (rc == -EREMOTE) {
3425                 /* userspace support is needed, kvm_run has been prepared */
3426                 rc = 0;
3427         }
3428
3429         disable_cpu_timer_accounting(vcpu);
3430         store_regs(vcpu, kvm_run);
3431
3432         kvm_sigset_deactivate(vcpu);
3433
3434         vcpu->stat.exit_userspace++;
3435         return rc;
3436 }
3437
3438 /*
3439  * store status at address
3440  * we use have two special cases:
3441  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3442  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3443  */
3444 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3445 {
3446         unsigned char archmode = 1;
3447         freg_t fprs[NUM_FPRS];
3448         unsigned int px;
3449         u64 clkcomp, cputm;
3450         int rc;
3451
3452         px = kvm_s390_get_prefix(vcpu);
3453         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3454                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3455                         return -EFAULT;
3456                 gpa = 0;
3457         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3458                 if (write_guest_real(vcpu, 163, &archmode, 1))
3459                         return -EFAULT;
3460                 gpa = px;
3461         } else
3462                 gpa -= __LC_FPREGS_SAVE_AREA;
3463
3464         /* manually convert vector registers if necessary */
3465         if (MACHINE_HAS_VX) {
3466                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3467                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3468                                      fprs, 128);
3469         } else {
3470                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3471                                      vcpu->run->s.regs.fprs, 128);
3472         }
3473         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3474                               vcpu->run->s.regs.gprs, 128);
3475         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3476                               &vcpu->arch.sie_block->gpsw, 16);
3477         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3478                               &px, 4);
3479         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3480                               &vcpu->run->s.regs.fpc, 4);
3481         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3482                               &vcpu->arch.sie_block->todpr, 4);
3483         cputm = kvm_s390_get_cpu_timer(vcpu);
3484         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3485                               &cputm, 8);
3486         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3487         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3488                               &clkcomp, 8);
3489         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3490                               &vcpu->run->s.regs.acrs, 64);
3491         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3492                               &vcpu->arch.sie_block->gcr, 128);
3493         return rc ? -EFAULT : 0;
3494 }
3495
3496 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3497 {
3498         /*
3499          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3500          * switch in the run ioctl. Let's update our copies before we save
3501          * it into the save area
3502          */
3503         save_fpu_regs();
3504         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3505         save_access_regs(vcpu->run->s.regs.acrs);
3506
3507         return kvm_s390_store_status_unloaded(vcpu, addr);
3508 }
3509
3510 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3511 {
3512         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3513         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3514 }
3515
3516 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3517 {
3518         unsigned int i;
3519         struct kvm_vcpu *vcpu;
3520
3521         kvm_for_each_vcpu(i, vcpu, kvm) {
3522                 __disable_ibs_on_vcpu(vcpu);
3523         }
3524 }
3525
3526 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3527 {
3528         if (!sclp.has_ibs)
3529                 return;
3530         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3531         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3532 }
3533
3534 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3535 {
3536         int i, online_vcpus, started_vcpus = 0;
3537
3538         if (!is_vcpu_stopped(vcpu))
3539                 return;
3540
3541         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3542         /* Only one cpu at a time may enter/leave the STOPPED state. */
3543         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3544         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3545
3546         for (i = 0; i < online_vcpus; i++) {
3547                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3548                         started_vcpus++;
3549         }
3550
3551         if (started_vcpus == 0) {
3552                 /* we're the only active VCPU -> speed it up */
3553                 __enable_ibs_on_vcpu(vcpu);
3554         } else if (started_vcpus == 1) {
3555                 /*
3556                  * As we are starting a second VCPU, we have to disable
3557                  * the IBS facility on all VCPUs to remove potentially
3558                  * oustanding ENABLE requests.
3559                  */
3560                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3561         }
3562
3563         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3564         /*
3565          * Another VCPU might have used IBS while we were offline.
3566          * Let's play safe and flush the VCPU at startup.
3567          */
3568         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3569         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3570         return;
3571 }
3572
3573 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3574 {
3575         int i, online_vcpus, started_vcpus = 0;
3576         struct kvm_vcpu *started_vcpu = NULL;
3577
3578         if (is_vcpu_stopped(vcpu))
3579                 return;
3580
3581         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3582         /* Only one cpu at a time may enter/leave the STOPPED state. */
3583         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3584         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3585
3586         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3587         kvm_s390_clear_stop_irq(vcpu);
3588
3589         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3590         __disable_ibs_on_vcpu(vcpu);
3591
3592         for (i = 0; i < online_vcpus; i++) {
3593                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3594                         started_vcpus++;
3595                         started_vcpu = vcpu->kvm->vcpus[i];
3596                 }
3597         }
3598
3599         if (started_vcpus == 1) {
3600                 /*
3601                  * As we only have one VCPU left, we want to enable the
3602                  * IBS facility for that VCPU to speed it up.
3603                  */
3604                 __enable_ibs_on_vcpu(started_vcpu);
3605         }
3606
3607         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3608         return;
3609 }
3610
3611 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3612                                      struct kvm_enable_cap *cap)
3613 {
3614         int r;
3615
3616         if (cap->flags)
3617                 return -EINVAL;
3618
3619         switch (cap->cap) {
3620         case KVM_CAP_S390_CSS_SUPPORT:
3621                 if (!vcpu->kvm->arch.css_support) {
3622                         vcpu->kvm->arch.css_support = 1;
3623                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3624                         trace_kvm_s390_enable_css(vcpu->kvm);
3625                 }
3626                 r = 0;
3627                 break;
3628         default:
3629                 r = -EINVAL;
3630                 break;
3631         }
3632         return r;
3633 }
3634
3635 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3636                                   struct kvm_s390_mem_op *mop)
3637 {
3638         void __user *uaddr = (void __user *)mop->buf;
3639         void *tmpbuf = NULL;
3640         int r, srcu_idx;
3641         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3642                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3643
3644         if (mop->flags & ~supported_flags)
3645                 return -EINVAL;
3646
3647         if (mop->size > MEM_OP_MAX_SIZE)
3648                 return -E2BIG;
3649
3650         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3651                 tmpbuf = vmalloc(mop->size);
3652                 if (!tmpbuf)
3653                         return -ENOMEM;
3654         }
3655
3656         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3657
3658         switch (mop->op) {
3659         case KVM_S390_MEMOP_LOGICAL_READ:
3660                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3661                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3662                                             mop->size, GACC_FETCH);
3663                         break;
3664                 }
3665                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3666                 if (r == 0) {
3667                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3668                                 r = -EFAULT;
3669                 }
3670                 break;
3671         case KVM_S390_MEMOP_LOGICAL_WRITE:
3672                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3673                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3674                                             mop->size, GACC_STORE);
3675                         break;
3676                 }
3677                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3678                         r = -EFAULT;
3679                         break;
3680                 }
3681                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3682                 break;
3683         default:
3684                 r = -EINVAL;
3685         }
3686
3687         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3688
3689         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3690                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3691
3692         vfree(tmpbuf);
3693         return r;
3694 }
3695
3696 long kvm_arch_vcpu_ioctl(struct file *filp,
3697                          unsigned int ioctl, unsigned long arg)
3698 {
3699         struct kvm_vcpu *vcpu = filp->private_data;
3700         void __user *argp = (void __user *)arg;
3701         int idx;
3702         long r;
3703
3704         switch (ioctl) {
3705         case KVM_S390_IRQ: {
3706                 struct kvm_s390_irq s390irq;
3707
3708                 r = -EFAULT;
3709                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3710                         break;
3711                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3712                 break;
3713         }
3714         case KVM_S390_INTERRUPT: {
3715                 struct kvm_s390_interrupt s390int;
3716                 struct kvm_s390_irq s390irq;
3717
3718                 r = -EFAULT;
3719                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3720                         break;
3721                 if (s390int_to_s390irq(&s390int, &s390irq))
3722                         return -EINVAL;
3723                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3724                 break;
3725         }
3726         case KVM_S390_STORE_STATUS:
3727                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3728                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3729                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3730                 break;
3731         case KVM_S390_SET_INITIAL_PSW: {
3732                 psw_t psw;
3733
3734                 r = -EFAULT;
3735                 if (copy_from_user(&psw, argp, sizeof(psw)))
3736                         break;
3737                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3738                 break;
3739         }
3740         case KVM_S390_INITIAL_RESET:
3741                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3742                 break;
3743         case KVM_SET_ONE_REG:
3744         case KVM_GET_ONE_REG: {
3745                 struct kvm_one_reg reg;
3746                 r = -EFAULT;
3747                 if (copy_from_user(&reg, argp, sizeof(reg)))
3748                         break;
3749                 if (ioctl == KVM_SET_ONE_REG)
3750                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3751                 else
3752                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3753                 break;
3754         }
3755 #ifdef CONFIG_KVM_S390_UCONTROL
3756         case KVM_S390_UCAS_MAP: {
3757                 struct kvm_s390_ucas_mapping ucasmap;
3758
3759                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3760                         r = -EFAULT;
3761                         break;
3762                 }
3763
3764                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3765                         r = -EINVAL;
3766                         break;
3767                 }
3768
3769                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3770                                      ucasmap.vcpu_addr, ucasmap.length);
3771                 break;
3772         }
3773         case KVM_S390_UCAS_UNMAP: {
3774                 struct kvm_s390_ucas_mapping ucasmap;
3775
3776                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3777                         r = -EFAULT;
3778                         break;
3779                 }
3780
3781                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3782                         r = -EINVAL;
3783                         break;
3784                 }
3785
3786                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3787                         ucasmap.length);
3788                 break;
3789         }
3790 #endif
3791         case KVM_S390_VCPU_FAULT: {
3792                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3793                 break;
3794         }
3795         case KVM_ENABLE_CAP:
3796         {
3797                 struct kvm_enable_cap cap;
3798                 r = -EFAULT;
3799                 if (copy_from_user(&cap, argp, sizeof(cap)))
3800                         break;
3801                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3802                 break;
3803         }
3804         case KVM_S390_MEM_OP: {
3805                 struct kvm_s390_mem_op mem_op;
3806
3807                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3808                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3809                 else
3810                         r = -EFAULT;
3811                 break;
3812         }
3813         case KVM_S390_SET_IRQ_STATE: {
3814                 struct kvm_s390_irq_state irq_state;
3815
3816                 r = -EFAULT;
3817                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3818                         break;
3819                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3820                     irq_state.len == 0 ||
3821                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3822                         r = -EINVAL;
3823                         break;
3824                 }
3825                 /* do not use irq_state.flags, it will break old QEMUs */
3826                 r = kvm_s390_set_irq_state(vcpu,
3827                                            (void __user *) irq_state.buf,
3828                                            irq_state.len);
3829                 break;
3830         }
3831         case KVM_S390_GET_IRQ_STATE: {
3832                 struct kvm_s390_irq_state irq_state;
3833
3834                 r = -EFAULT;
3835                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3836                         break;
3837                 if (irq_state.len == 0) {
3838                         r = -EINVAL;
3839                         break;
3840                 }
3841                 /* do not use irq_state.flags, it will break old QEMUs */
3842                 r = kvm_s390_get_irq_state(vcpu,
3843                                            (__u8 __user *)  irq_state.buf,
3844                                            irq_state.len);
3845                 break;
3846         }
3847         default:
3848                 r = -ENOTTY;
3849         }
3850         return r;
3851 }
3852
3853 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3854 {
3855 #ifdef CONFIG_KVM_S390_UCONTROL
3856         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3857                  && (kvm_is_ucontrol(vcpu->kvm))) {
3858                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3859                 get_page(vmf->page);
3860                 return 0;
3861         }
3862 #endif
3863         return VM_FAULT_SIGBUS;
3864 }
3865
3866 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3867                             unsigned long npages)
3868 {
3869         return 0;
3870 }
3871
3872 /* Section: memory related */
3873 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3874                                    struct kvm_memory_slot *memslot,
3875                                    const struct kvm_userspace_memory_region *mem,
3876                                    enum kvm_mr_change change)
3877 {
3878         /* A few sanity checks. We can have memory slots which have to be
3879            located/ended at a segment boundary (1MB). The memory in userland is
3880            ok to be fragmented into various different vmas. It is okay to mmap()
3881            and munmap() stuff in this slot after doing this call at any time */
3882
3883         if (mem->userspace_addr & 0xffffful)
3884                 return -EINVAL;
3885
3886         if (mem->memory_size & 0xffffful)
3887                 return -EINVAL;
3888
3889         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3890                 return -EINVAL;
3891
3892         return 0;
3893 }
3894
3895 void kvm_arch_commit_memory_region(struct kvm *kvm,
3896                                 const struct kvm_userspace_memory_region *mem,
3897                                 const struct kvm_memory_slot *old,
3898                                 const struct kvm_memory_slot *new,
3899                                 enum kvm_mr_change change)
3900 {
3901         int rc;
3902
3903         /* If the basics of the memslot do not change, we do not want
3904          * to update the gmap. Every update causes several unnecessary
3905          * segment translation exceptions. This is usually handled just
3906          * fine by the normal fault handler + gmap, but it will also
3907          * cause faults on the prefix page of running guest CPUs.
3908          */
3909         if (old->userspace_addr == mem->userspace_addr &&
3910             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3911             old->npages * PAGE_SIZE == mem->memory_size)
3912                 return;
3913
3914         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3915                 mem->guest_phys_addr, mem->memory_size);
3916         if (rc)
3917                 pr_warn("failed to commit memory region\n");
3918         return;
3919 }
3920
3921 static inline unsigned long nonhyp_mask(int i)
3922 {
3923         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3924
3925         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3926 }
3927
3928 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3929 {
3930         vcpu->valid_wakeup = false;
3931 }
3932
3933 static int __init kvm_s390_init(void)
3934 {
3935         int i;
3936
3937         if (!sclp.has_sief2) {
3938                 pr_info("SIE not available\n");
3939                 return -ENODEV;
3940         }
3941
3942         for (i = 0; i < 16; i++)
3943                 kvm_s390_fac_list_mask[i] |=
3944                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3945
3946         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3947 }
3948
3949 static void __exit kvm_s390_exit(void)
3950 {
3951         kvm_exit();
3952 }
3953
3954 module_init(kvm_s390_init);
3955 module_exit(kvm_s390_exit);
3956
3957 /*
3958  * Enable autoloading of the kvm module.
3959  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3960  * since x86 takes a different approach.
3961  */
3962 #include <linux/miscdevice.h>
3963 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3964 MODULE_ALIAS("devname:kvm");
This page took 0.271856 seconds and 4 git commands to generate.