2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/asm-offsets.h>
27 #include <asm/lowcore.h>
28 #include <asm/pgtable.h>
30 #include <asm/switch_to.h>
31 #include <asm/facility.h>
36 #define CREATE_TRACE_POINTS
38 #include "trace-s390.h"
40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
42 struct kvm_stats_debugfs_item debugfs_entries[] = {
43 { "userspace_handled", VCPU_STAT(exit_userspace) },
44 { "exit_null", VCPU_STAT(exit_null) },
45 { "exit_validity", VCPU_STAT(exit_validity) },
46 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
47 { "exit_external_request", VCPU_STAT(exit_external_request) },
48 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
49 { "exit_instruction", VCPU_STAT(exit_instruction) },
50 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
51 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
52 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
53 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
54 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
55 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
56 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
57 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
58 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
59 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
60 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
61 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
62 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
63 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
64 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
65 { "instruction_spx", VCPU_STAT(instruction_spx) },
66 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
67 { "instruction_stap", VCPU_STAT(instruction_stap) },
68 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
69 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
70 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
71 { "instruction_essa", VCPU_STAT(instruction_essa) },
72 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
73 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
74 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
75 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
76 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
77 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
78 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
79 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
80 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
81 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
82 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
83 { "diagnose_10", VCPU_STAT(diagnose_10) },
84 { "diagnose_44", VCPU_STAT(diagnose_44) },
85 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
89 unsigned long *vfacilities;
90 static struct gmap_notifier gmap_notifier;
92 /* test availability of vfacility */
93 static inline int test_vfacility(unsigned long nr)
95 return __test_facility(nr, (void *) vfacilities);
98 /* Section: not file related */
99 int kvm_arch_hardware_enable(void *garbage)
101 /* every s390 is virtualization enabled ;-) */
105 void kvm_arch_hardware_disable(void *garbage)
109 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
111 int kvm_arch_hardware_setup(void)
113 gmap_notifier.notifier_call = kvm_gmap_notifier;
114 gmap_register_ipte_notifier(&gmap_notifier);
118 void kvm_arch_hardware_unsetup(void)
120 gmap_unregister_ipte_notifier(&gmap_notifier);
123 void kvm_arch_check_processor_compat(void *rtn)
127 int kvm_arch_init(void *opaque)
132 void kvm_arch_exit(void)
136 /* Section: device related */
137 long kvm_arch_dev_ioctl(struct file *filp,
138 unsigned int ioctl, unsigned long arg)
140 if (ioctl == KVM_S390_ENABLE_SIE)
141 return s390_enable_sie();
145 int kvm_dev_ioctl_check_extension(long ext)
150 case KVM_CAP_S390_PSW:
151 case KVM_CAP_S390_GMAP:
152 case KVM_CAP_SYNC_MMU:
153 #ifdef CONFIG_KVM_S390_UCONTROL
154 case KVM_CAP_S390_UCONTROL:
156 case KVM_CAP_ASYNC_PF:
157 case KVM_CAP_SYNC_REGS:
158 case KVM_CAP_ONE_REG:
159 case KVM_CAP_ENABLE_CAP:
160 case KVM_CAP_S390_CSS_SUPPORT:
161 case KVM_CAP_IOEVENTFD:
162 case KVM_CAP_DEVICE_CTRL:
163 case KVM_CAP_ENABLE_CAP_VM:
166 case KVM_CAP_NR_VCPUS:
167 case KVM_CAP_MAX_VCPUS:
170 case KVM_CAP_NR_MEMSLOTS:
171 r = KVM_USER_MEM_SLOTS;
173 case KVM_CAP_S390_COW:
174 r = MACHINE_HAS_ESOP;
182 /* Section: vm related */
184 * Get (and clear) the dirty memory log for a memory slot.
186 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
187 struct kvm_dirty_log *log)
192 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
200 case KVM_CAP_S390_IRQCHIP:
201 kvm->arch.use_irqchip = 1;
211 long kvm_arch_vm_ioctl(struct file *filp,
212 unsigned int ioctl, unsigned long arg)
214 struct kvm *kvm = filp->private_data;
215 void __user *argp = (void __user *)arg;
219 case KVM_S390_INTERRUPT: {
220 struct kvm_s390_interrupt s390int;
223 if (copy_from_user(&s390int, argp, sizeof(s390int)))
225 r = kvm_s390_inject_vm(kvm, &s390int);
228 case KVM_ENABLE_CAP: {
229 struct kvm_enable_cap cap;
231 if (copy_from_user(&cap, argp, sizeof(cap)))
233 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
236 case KVM_CREATE_IRQCHIP: {
237 struct kvm_irq_routing_entry routing;
240 if (kvm->arch.use_irqchip) {
241 /* Set up dummy routing. */
242 memset(&routing, 0, sizeof(routing));
243 kvm_set_irq_routing(kvm, &routing, 0, 0);
255 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
259 static unsigned long sca_offset;
262 #ifdef CONFIG_KVM_S390_UCONTROL
263 if (type & ~KVM_VM_S390_UCONTROL)
265 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
272 rc = s390_enable_sie();
278 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
281 spin_lock(&kvm_lock);
282 sca_offset = (sca_offset + 16) & 0x7f0;
283 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
284 spin_unlock(&kvm_lock);
286 sprintf(debug_name, "kvm-%u", current->pid);
288 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
292 spin_lock_init(&kvm->arch.float_int.lock);
293 INIT_LIST_HEAD(&kvm->arch.float_int.list);
295 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
296 VM_EVENT(kvm, 3, "%s", "vm created");
298 if (type & KVM_VM_S390_UCONTROL) {
299 kvm->arch.gmap = NULL;
301 kvm->arch.gmap = gmap_alloc(current->mm);
304 kvm->arch.gmap->private = kvm;
305 kvm->arch.gmap->pfault_enabled = 0;
308 kvm->arch.css_support = 0;
309 kvm->arch.use_irqchip = 0;
313 debug_unregister(kvm->arch.dbf);
315 free_page((unsigned long)(kvm->arch.sca));
320 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
322 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
323 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
324 kvm_clear_async_pf_completion_queue(vcpu);
325 if (!kvm_is_ucontrol(vcpu->kvm)) {
326 clear_bit(63 - vcpu->vcpu_id,
327 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
328 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
329 (__u64) vcpu->arch.sie_block)
330 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
334 if (kvm_is_ucontrol(vcpu->kvm))
335 gmap_free(vcpu->arch.gmap);
337 if (vcpu->arch.sie_block->cbrlo)
338 __free_page(__pfn_to_page(
339 vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
340 free_page((unsigned long)(vcpu->arch.sie_block));
342 kvm_vcpu_uninit(vcpu);
343 kmem_cache_free(kvm_vcpu_cache, vcpu);
346 static void kvm_free_vcpus(struct kvm *kvm)
349 struct kvm_vcpu *vcpu;
351 kvm_for_each_vcpu(i, vcpu, kvm)
352 kvm_arch_vcpu_destroy(vcpu);
354 mutex_lock(&kvm->lock);
355 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
356 kvm->vcpus[i] = NULL;
358 atomic_set(&kvm->online_vcpus, 0);
359 mutex_unlock(&kvm->lock);
362 void kvm_arch_sync_events(struct kvm *kvm)
366 void kvm_arch_destroy_vm(struct kvm *kvm)
369 free_page((unsigned long)(kvm->arch.sca));
370 debug_unregister(kvm->arch.dbf);
371 if (!kvm_is_ucontrol(kvm))
372 gmap_free(kvm->arch.gmap);
373 kvm_s390_destroy_adapters(kvm);
376 /* Section: vcpu related */
377 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
379 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
380 kvm_clear_async_pf_completion_queue(vcpu);
381 if (kvm_is_ucontrol(vcpu->kvm)) {
382 vcpu->arch.gmap = gmap_alloc(current->mm);
383 if (!vcpu->arch.gmap)
385 vcpu->arch.gmap->private = vcpu->kvm;
389 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
390 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
397 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
402 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
404 save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
405 save_fp_regs(vcpu->arch.host_fpregs.fprs);
406 save_access_regs(vcpu->arch.host_acrs);
407 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
408 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
409 restore_access_regs(vcpu->run->s.regs.acrs);
410 gmap_enable(vcpu->arch.gmap);
411 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
414 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
416 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
417 gmap_disable(vcpu->arch.gmap);
418 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
419 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
420 save_access_regs(vcpu->run->s.regs.acrs);
421 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
422 restore_fp_regs(vcpu->arch.host_fpregs.fprs);
423 restore_access_regs(vcpu->arch.host_acrs);
426 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
428 /* this equals initial cpu reset in pop, but we don't switch to ESA */
429 vcpu->arch.sie_block->gpsw.mask = 0UL;
430 vcpu->arch.sie_block->gpsw.addr = 0UL;
431 kvm_s390_set_prefix(vcpu, 0);
432 vcpu->arch.sie_block->cputm = 0UL;
433 vcpu->arch.sie_block->ckc = 0UL;
434 vcpu->arch.sie_block->todpr = 0;
435 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
436 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
437 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
438 vcpu->arch.guest_fpregs.fpc = 0;
439 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
440 vcpu->arch.sie_block->gbea = 1;
441 vcpu->arch.sie_block->pp = 0;
442 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
443 kvm_clear_async_pf_completion_queue(vcpu);
444 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
445 kvm_s390_clear_local_irqs(vcpu);
448 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
453 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
457 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
461 vcpu->arch.sie_block->ecb = 6;
462 if (test_vfacility(50) && test_vfacility(73))
463 vcpu->arch.sie_block->ecb |= 0x10;
465 vcpu->arch.sie_block->ecb2 = 8;
466 vcpu->arch.sie_block->eca = 0xC1002001U;
467 vcpu->arch.sie_block->fac = (int) (long) vfacilities;
468 if (kvm_enabled_cmma()) {
469 cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
471 vcpu->arch.sie_block->ecb2 |= 0x80;
472 vcpu->arch.sie_block->ecb2 &= ~0x08;
473 vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl);
476 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
477 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
478 (unsigned long) vcpu);
479 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
480 get_cpu_id(&vcpu->arch.cpu_id);
481 vcpu->arch.cpu_id.version = 0xff;
485 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
488 struct kvm_vcpu *vcpu;
489 struct sie_page *sie_page;
492 if (id >= KVM_MAX_VCPUS)
497 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
501 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
505 vcpu->arch.sie_block = &sie_page->sie_block;
506 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
508 vcpu->arch.sie_block->icpua = id;
509 if (!kvm_is_ucontrol(kvm)) {
510 if (!kvm->arch.sca) {
514 if (!kvm->arch.sca->cpu[id].sda)
515 kvm->arch.sca->cpu[id].sda =
516 (__u64) vcpu->arch.sie_block;
517 vcpu->arch.sie_block->scaoh =
518 (__u32)(((__u64)kvm->arch.sca) >> 32);
519 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
520 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
523 spin_lock_init(&vcpu->arch.local_int.lock);
524 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
525 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
526 vcpu->arch.local_int.wq = &vcpu->wq;
527 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
529 rc = kvm_vcpu_init(vcpu, kvm, id);
531 goto out_free_sie_block;
532 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
533 vcpu->arch.sie_block);
534 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
538 free_page((unsigned long)(vcpu->arch.sie_block));
540 kmem_cache_free(kvm_vcpu_cache, vcpu);
545 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
547 return kvm_cpu_has_interrupt(vcpu);
550 void s390_vcpu_block(struct kvm_vcpu *vcpu)
552 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
555 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
557 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
561 * Kick a guest cpu out of SIE and wait until SIE is not running.
562 * If the CPU is not running (e.g. waiting as idle) the function will
563 * return immediately. */
564 void exit_sie(struct kvm_vcpu *vcpu)
566 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
567 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
571 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
572 void exit_sie_sync(struct kvm_vcpu *vcpu)
574 s390_vcpu_block(vcpu);
578 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
581 struct kvm *kvm = gmap->private;
582 struct kvm_vcpu *vcpu;
584 kvm_for_each_vcpu(i, vcpu, kvm) {
585 /* match against both prefix pages */
586 if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
587 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
588 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
594 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
596 /* kvm common code refers to this, but never calls it */
601 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
602 struct kvm_one_reg *reg)
607 case KVM_REG_S390_TODPR:
608 r = put_user(vcpu->arch.sie_block->todpr,
609 (u32 __user *)reg->addr);
611 case KVM_REG_S390_EPOCHDIFF:
612 r = put_user(vcpu->arch.sie_block->epoch,
613 (u64 __user *)reg->addr);
615 case KVM_REG_S390_CPU_TIMER:
616 r = put_user(vcpu->arch.sie_block->cputm,
617 (u64 __user *)reg->addr);
619 case KVM_REG_S390_CLOCK_COMP:
620 r = put_user(vcpu->arch.sie_block->ckc,
621 (u64 __user *)reg->addr);
623 case KVM_REG_S390_PFTOKEN:
624 r = put_user(vcpu->arch.pfault_token,
625 (u64 __user *)reg->addr);
627 case KVM_REG_S390_PFCOMPARE:
628 r = put_user(vcpu->arch.pfault_compare,
629 (u64 __user *)reg->addr);
631 case KVM_REG_S390_PFSELECT:
632 r = put_user(vcpu->arch.pfault_select,
633 (u64 __user *)reg->addr);
635 case KVM_REG_S390_PP:
636 r = put_user(vcpu->arch.sie_block->pp,
637 (u64 __user *)reg->addr);
639 case KVM_REG_S390_GBEA:
640 r = put_user(vcpu->arch.sie_block->gbea,
641 (u64 __user *)reg->addr);
650 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
651 struct kvm_one_reg *reg)
656 case KVM_REG_S390_TODPR:
657 r = get_user(vcpu->arch.sie_block->todpr,
658 (u32 __user *)reg->addr);
660 case KVM_REG_S390_EPOCHDIFF:
661 r = get_user(vcpu->arch.sie_block->epoch,
662 (u64 __user *)reg->addr);
664 case KVM_REG_S390_CPU_TIMER:
665 r = get_user(vcpu->arch.sie_block->cputm,
666 (u64 __user *)reg->addr);
668 case KVM_REG_S390_CLOCK_COMP:
669 r = get_user(vcpu->arch.sie_block->ckc,
670 (u64 __user *)reg->addr);
672 case KVM_REG_S390_PFTOKEN:
673 r = get_user(vcpu->arch.pfault_token,
674 (u64 __user *)reg->addr);
676 case KVM_REG_S390_PFCOMPARE:
677 r = get_user(vcpu->arch.pfault_compare,
678 (u64 __user *)reg->addr);
680 case KVM_REG_S390_PFSELECT:
681 r = get_user(vcpu->arch.pfault_select,
682 (u64 __user *)reg->addr);
684 case KVM_REG_S390_PP:
685 r = get_user(vcpu->arch.sie_block->pp,
686 (u64 __user *)reg->addr);
688 case KVM_REG_S390_GBEA:
689 r = get_user(vcpu->arch.sie_block->gbea,
690 (u64 __user *)reg->addr);
699 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
701 kvm_s390_vcpu_initial_reset(vcpu);
705 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
707 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
711 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
713 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
717 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
718 struct kvm_sregs *sregs)
720 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
721 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
722 restore_access_regs(vcpu->run->s.regs.acrs);
726 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
727 struct kvm_sregs *sregs)
729 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
730 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
734 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
736 if (test_fp_ctl(fpu->fpc))
738 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
739 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
740 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
741 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
745 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
747 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
748 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
752 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
756 if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
759 vcpu->run->psw_mask = psw.mask;
760 vcpu->run->psw_addr = psw.addr;
765 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
766 struct kvm_translation *tr)
768 return -EINVAL; /* not implemented yet */
771 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
772 struct kvm_guest_debug *dbg)
774 return -EINVAL; /* not implemented yet */
777 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
778 struct kvm_mp_state *mp_state)
780 return -EINVAL; /* not implemented yet */
783 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
784 struct kvm_mp_state *mp_state)
786 return -EINVAL; /* not implemented yet */
789 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
792 * We use MMU_RELOAD just to re-arm the ipte notifier for the
793 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
794 * This ensures that the ipte instruction for this request has
795 * already finished. We might race against a second unmapper that
796 * wants to set the blocking bit. Lets just retry the request loop.
798 while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
800 rc = gmap_ipte_notify(vcpu->arch.gmap,
801 vcpu->arch.sie_block->prefix,
805 s390_vcpu_unblock(vcpu);
810 static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
813 hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
814 struct mm_struct *mm = current->mm;
815 down_read(&mm->mmap_sem);
816 rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
817 up_read(&mm->mmap_sem);
821 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
824 struct kvm_s390_interrupt inti;
828 inti.type = KVM_S390_INT_PFAULT_INIT;
829 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
831 inti.type = KVM_S390_INT_PFAULT_DONE;
832 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
836 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
837 struct kvm_async_pf *work)
839 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
840 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
843 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
844 struct kvm_async_pf *work)
846 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
847 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
850 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
851 struct kvm_async_pf *work)
853 /* s390 will always inject the page directly */
856 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
859 * s390 will always inject the page directly,
860 * but we still want check_async_completion to cleanup
865 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
868 struct kvm_arch_async_pf arch;
871 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
873 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
874 vcpu->arch.pfault_compare)
876 if (psw_extint_disabled(vcpu))
878 if (kvm_cpu_has_interrupt(vcpu))
880 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
882 if (!vcpu->arch.gmap->pfault_enabled)
885 hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
886 if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
889 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
893 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
898 * On s390 notifications for arriving pages will be delivered directly
899 * to the guest but the house keeping for completed pfaults is
900 * handled outside the worker.
902 kvm_check_async_pf_completion(vcpu);
904 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
909 if (test_thread_flag(TIF_MCCK_PENDING))
912 if (!kvm_is_ucontrol(vcpu->kvm))
913 kvm_s390_deliver_pending_interrupts(vcpu);
915 rc = kvm_s390_handle_requests(vcpu);
919 vcpu->arch.sie_block->icptcode = 0;
920 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
921 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
922 trace_kvm_s390_sie_enter(vcpu, cpuflags);
927 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
931 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
932 vcpu->arch.sie_block->icptcode);
933 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
935 if (exit_reason >= 0) {
937 } else if (kvm_is_ucontrol(vcpu->kvm)) {
938 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
939 vcpu->run->s390_ucontrol.trans_exc_code =
940 current->thread.gmap_addr;
941 vcpu->run->s390_ucontrol.pgm_code = 0x10;
944 } else if (current->thread.gmap_pfault) {
945 trace_kvm_s390_major_guest_pfault(vcpu);
946 current->thread.gmap_pfault = 0;
947 if (kvm_arch_setup_async_pf(vcpu) ||
948 (kvm_arch_fault_in_sync(vcpu) >= 0))
953 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
954 trace_kvm_s390_sie_fault(vcpu);
955 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
958 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
961 if (kvm_is_ucontrol(vcpu->kvm))
962 /* Don't exit for host interrupts. */
963 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
965 rc = kvm_handle_sie_intercept(vcpu);
971 bool kvm_enabled_cmma(void)
973 if (!MACHINE_IS_LPAR)
975 /* only enable for z10 and later */
976 if (!MACHINE_HAS_EDAT1)
981 static int __vcpu_run(struct kvm_vcpu *vcpu)
986 * We try to hold kvm->srcu during most of vcpu_run (except when run-
987 * ning the guest), so that memslots (and other stuff) are protected
989 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
992 rc = vcpu_pre_run(vcpu);
996 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
998 * As PF_VCPU will be used in fault handler, between
999 * guest_enter and guest_exit should be no uaccess.
1004 exit_reason = sie64a(vcpu->arch.sie_block,
1005 vcpu->run->s.regs.gprs);
1007 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1009 rc = vcpu_post_run(vcpu, exit_reason);
1010 } while (!signal_pending(current) && !rc);
1012 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1016 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1021 if (vcpu->sigset_active)
1022 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1024 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
1026 switch (kvm_run->exit_reason) {
1027 case KVM_EXIT_S390_SIEIC:
1028 case KVM_EXIT_UNKNOWN:
1030 case KVM_EXIT_S390_RESET:
1031 case KVM_EXIT_S390_UCONTROL:
1032 case KVM_EXIT_S390_TSCH:
1038 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
1039 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1040 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
1041 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
1042 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1044 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1045 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
1046 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1047 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1051 rc = __vcpu_run(vcpu);
1053 if (signal_pending(current) && !rc) {
1054 kvm_run->exit_reason = KVM_EXIT_INTR;
1058 if (rc == -EOPNOTSUPP) {
1059 /* intercept cannot be handled in-kernel, prepare kvm-run */
1060 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
1061 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
1062 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
1063 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
1067 if (rc == -EREMOTE) {
1068 /* intercept was handled, but userspace support is needed
1069 * kvm_run has been prepared by the handler */
1073 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
1074 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1075 kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
1076 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1078 if (vcpu->sigset_active)
1079 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1081 vcpu->stat.exit_userspace++;
1085 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
1086 unsigned long n, int prefix)
1089 return copy_to_guest(vcpu, guestdest, from, n);
1091 return copy_to_guest_absolute(vcpu, guestdest, from, n);
1095 * store status at address
1096 * we use have two special cases:
1097 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
1098 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
1100 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
1102 unsigned char archmode = 1;
1106 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
1107 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
1109 addr = SAVE_AREA_BASE;
1111 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
1112 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
1114 addr = SAVE_AREA_BASE;
1119 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
1120 vcpu->arch.guest_fpregs.fprs, 128, prefix))
1123 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
1124 vcpu->run->s.regs.gprs, 128, prefix))
1127 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
1128 &vcpu->arch.sie_block->gpsw, 16, prefix))
1131 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
1132 &vcpu->arch.sie_block->prefix, 4, prefix))
1135 if (__guestcopy(vcpu,
1136 addr + offsetof(struct save_area, fp_ctrl_reg),
1137 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
1140 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
1141 &vcpu->arch.sie_block->todpr, 4, prefix))
1144 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
1145 &vcpu->arch.sie_block->cputm, 8, prefix))
1148 clkcomp = vcpu->arch.sie_block->ckc >> 8;
1149 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
1150 &clkcomp, 8, prefix))
1153 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
1154 &vcpu->run->s.regs.acrs, 64, prefix))
1157 if (__guestcopy(vcpu,
1158 addr + offsetof(struct save_area, ctrl_regs),
1159 &vcpu->arch.sie_block->gcr, 128, prefix))
1164 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
1167 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
1168 * copying in vcpu load/put. Lets update our copies before we save
1169 * it into the save area
1171 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1172 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1173 save_access_regs(vcpu->run->s.regs.acrs);
1175 return kvm_s390_store_status_unloaded(vcpu, addr);
1178 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
1179 struct kvm_enable_cap *cap)
1187 case KVM_CAP_S390_CSS_SUPPORT:
1188 if (!vcpu->kvm->arch.css_support) {
1189 vcpu->kvm->arch.css_support = 1;
1190 trace_kvm_s390_enable_css(vcpu->kvm);
1201 long kvm_arch_vcpu_ioctl(struct file *filp,
1202 unsigned int ioctl, unsigned long arg)
1204 struct kvm_vcpu *vcpu = filp->private_data;
1205 void __user *argp = (void __user *)arg;
1210 case KVM_S390_INTERRUPT: {
1211 struct kvm_s390_interrupt s390int;
1214 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1216 r = kvm_s390_inject_vcpu(vcpu, &s390int);
1219 case KVM_S390_STORE_STATUS:
1220 idx = srcu_read_lock(&vcpu->kvm->srcu);
1221 r = kvm_s390_vcpu_store_status(vcpu, arg);
1222 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1224 case KVM_S390_SET_INITIAL_PSW: {
1228 if (copy_from_user(&psw, argp, sizeof(psw)))
1230 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
1233 case KVM_S390_INITIAL_RESET:
1234 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
1236 case KVM_SET_ONE_REG:
1237 case KVM_GET_ONE_REG: {
1238 struct kvm_one_reg reg;
1240 if (copy_from_user(®, argp, sizeof(reg)))
1242 if (ioctl == KVM_SET_ONE_REG)
1243 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
1245 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
1248 #ifdef CONFIG_KVM_S390_UCONTROL
1249 case KVM_S390_UCAS_MAP: {
1250 struct kvm_s390_ucas_mapping ucasmap;
1252 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1257 if (!kvm_is_ucontrol(vcpu->kvm)) {
1262 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
1263 ucasmap.vcpu_addr, ucasmap.length);
1266 case KVM_S390_UCAS_UNMAP: {
1267 struct kvm_s390_ucas_mapping ucasmap;
1269 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1274 if (!kvm_is_ucontrol(vcpu->kvm)) {
1279 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
1284 case KVM_S390_VCPU_FAULT: {
1285 r = gmap_fault(arg, vcpu->arch.gmap);
1286 if (!IS_ERR_VALUE(r))
1290 case KVM_ENABLE_CAP:
1292 struct kvm_enable_cap cap;
1294 if (copy_from_user(&cap, argp, sizeof(cap)))
1296 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
1305 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1307 #ifdef CONFIG_KVM_S390_UCONTROL
1308 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
1309 && (kvm_is_ucontrol(vcpu->kvm))) {
1310 vmf->page = virt_to_page(vcpu->arch.sie_block);
1311 get_page(vmf->page);
1315 return VM_FAULT_SIGBUS;
1318 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1319 struct kvm_memory_slot *dont)
1323 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1324 unsigned long npages)
1329 void kvm_arch_memslots_updated(struct kvm *kvm)
1333 /* Section: memory related */
1334 int kvm_arch_prepare_memory_region(struct kvm *kvm,
1335 struct kvm_memory_slot *memslot,
1336 struct kvm_userspace_memory_region *mem,
1337 enum kvm_mr_change change)
1339 /* A few sanity checks. We can have memory slots which have to be
1340 located/ended at a segment boundary (1MB). The memory in userland is
1341 ok to be fragmented into various different vmas. It is okay to mmap()
1342 and munmap() stuff in this slot after doing this call at any time */
1344 if (mem->userspace_addr & 0xffffful)
1347 if (mem->memory_size & 0xffffful)
1353 void kvm_arch_commit_memory_region(struct kvm *kvm,
1354 struct kvm_userspace_memory_region *mem,
1355 const struct kvm_memory_slot *old,
1356 enum kvm_mr_change change)
1360 /* If the basics of the memslot do not change, we do not want
1361 * to update the gmap. Every update causes several unnecessary
1362 * segment translation exceptions. This is usually handled just
1363 * fine by the normal fault handler + gmap, but it will also
1364 * cause faults on the prefix page of running guest CPUs.
1366 if (old->userspace_addr == mem->userspace_addr &&
1367 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
1368 old->npages * PAGE_SIZE == mem->memory_size)
1371 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
1372 mem->guest_phys_addr, mem->memory_size);
1374 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
1378 void kvm_arch_flush_shadow_all(struct kvm *kvm)
1382 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1383 struct kvm_memory_slot *slot)
1387 static int __init kvm_s390_init(void)
1390 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1395 * guests can ask for up to 255+1 double words, we need a full page
1396 * to hold the maximum amount of facilities. On the other hand, we
1397 * only set facilities that are known to work in KVM.
1399 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
1404 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
1405 vfacilities[0] &= 0xff82fff3f4fc2000UL;
1406 vfacilities[1] &= 0x005c000000000000UL;
1410 static void __exit kvm_s390_exit(void)
1412 free_page((unsigned long) vfacilities);
1416 module_init(kvm_s390_init);
1417 module_exit(kvm_s390_exit);
1420 * Enable autoloading of the kvm module.
1421 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
1422 * since x86 takes a different approach.
1424 #include <linux/miscdevice.h>
1425 MODULE_ALIAS_MISCDEV(KVM_MINOR);
1426 MODULE_ALIAS("devname:kvm");