1 // SPDX-License-Identifier: GPL-2.0
3 * s390 kvm PCI passthrough support
5 * Copyright IBM Corp. 2022
10 #include <linux/kvm_host.h>
11 #include <linux/pci.h>
13 #include <asm/pci_insn.h>
14 #include <asm/pci_io.h>
19 struct zpci_aift *aift;
21 static inline int __set_irq_noiib(u16 ctl, u8 isc)
23 union zpci_sic_iib iib = {{0}};
25 return zpci_set_irq_ctrl(ctl, isc, &iib);
28 void kvm_s390_pci_aen_exit(void)
31 struct kvm_zdev **gait_kzdev;
33 lockdep_assert_held(&aift->aift_lock);
36 * Contents of the aipb remain registered for the life of the host
37 * kernel, the information preserved in zpci_aipb and zpci_aif_sbv
38 * in case we insert the KVM module again later. Clear the AIFT
39 * information and free anything not registered with underlying
42 spin_lock_irqsave(&aift->gait_lock, flags);
43 gait_kzdev = aift->kzdev;
47 spin_unlock_irqrestore(&aift->gait_lock, flags);
52 static int zpci_setup_aipb(u8 nisc)
57 zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL);
61 aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
66 zpci_aif_sbv = aift->sbv;
67 size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES *
68 sizeof(struct zpci_gaite)));
69 page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size);
74 aift->gait = (struct zpci_gaite *)page_to_virt(page);
76 zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector);
77 zpci_aipb->aipb.gait = virt_to_phys(aift->gait);
78 zpci_aipb->aipb.afi = nisc;
79 zpci_aipb->aipb.faal = ZPCI_NR_DEVICES;
81 /* Setup Adapter Event Notification Interpretation */
82 if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) {
90 free_pages((unsigned long)aift->gait, size);
92 airq_iv_release(aift->sbv);
101 static int zpci_reset_aipb(u8 nisc)
104 * AEN registration can only happen once per system boot. If
105 * an aipb already exists then AEN was already registered and
106 * we can reuse the aipb contents. This can only happen if
107 * the KVM module was removed and re-inserted. However, we must
108 * ensure that the same forwarding ISC is used as this is assigned
109 * during KVM module load.
111 if (zpci_aipb->aipb.afi != nisc)
114 aift->sbv = zpci_aif_sbv;
115 aift->gait = phys_to_virt(zpci_aipb->aipb.gait);
120 int kvm_s390_pci_aen_init(u8 nisc)
124 /* If already enabled for AEN, bail out now */
125 if (aift->gait || aift->sbv)
128 mutex_lock(&aift->aift_lock);
129 aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *),
137 rc = zpci_setup_aipb(nisc);
139 rc = zpci_reset_aipb(nisc);
143 /* Enable floating IRQs */
144 if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) {
146 kvm_s390_pci_aen_exit();
154 mutex_unlock(&aift->aift_lock);
158 /* Modify PCI: Register floating adapter interruption forwarding */
159 static int kvm_zpci_set_airq(struct zpci_dev *zdev)
161 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
162 struct zpci_fib fib = {};
165 fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
166 fib.fmt0.sum = 1; /* enable summary notifications */
167 fib.fmt0.noi = airq_iv_end(zdev->aibv);
168 fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
170 fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
171 fib.fmt0.aisbo = zdev->aisb & 63;
174 return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
177 /* Modify PCI: Unregister floating adapter interruption forwarding */
178 static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
180 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
181 struct zpci_fib fib = {};
186 cc = zpci_mod_fc(req, &fib, &status);
187 if (cc == 3 || (cc == 1 && status == 24))
188 /* Function already gone or IRQs already deregistered. */
191 return cc ? -EIO : 0;
194 static inline void unaccount_mem(unsigned long nr_pages)
196 struct user_struct *user = get_uid(current_user());
199 atomic_long_sub(nr_pages, &user->locked_vm);
201 atomic64_sub(nr_pages, ¤t->mm->pinned_vm);
204 static inline int account_mem(unsigned long nr_pages)
206 struct user_struct *user = get_uid(current_user());
207 unsigned long page_limit, cur_pages, new_pages;
209 page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
211 cur_pages = atomic_long_read(&user->locked_vm);
213 new_pages = cur_pages + nr_pages;
214 if (new_pages > page_limit)
216 } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages));
218 atomic64_add(nr_pages, ¤t->mm->pinned_vm);
223 static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
226 struct page *pages[1], *aibv_page, *aisb_page = NULL;
227 unsigned int msi_vecs, idx;
228 struct zpci_gaite *gaite;
229 unsigned long hva, bit;
232 int rc = 0, gisc, npages, pcount = 0;
235 * Interrupt forwarding is only applicable if the device is already
236 * enabled for interpretation
241 kvm = zdev->kzdev->kvm;
242 msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
244 /* Get the associated forwarding ISC - if invalid, return the error */
245 gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc);
249 /* Replace AIBV address */
250 idx = srcu_read_lock(&kvm->srcu);
251 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
252 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages);
253 srcu_read_unlock(&kvm->srcu, idx);
258 aibv_page = pages[0];
260 gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
261 fib->fmt0.aibv = gaddr;
263 /* Pin the guest AISB if one was specified */
264 if (fib->fmt0.sum == 1) {
265 idx = srcu_read_lock(&kvm->srcu);
266 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
267 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM,
269 srcu_read_unlock(&kvm->srcu, idx);
274 aisb_page = pages[0];
278 /* Account for pinned pages, roll back on failure */
279 if (account_mem(pcount))
282 /* AISB must be allocated before we can fill in GAITE */
283 mutex_lock(&aift->aift_lock);
284 bit = airq_iv_alloc_bit(aift->sbv);
287 zdev->aisb = bit; /* store the summary bit number */
288 zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
291 phys_to_virt(fib->fmt0.aibv));
293 spin_lock_irq(&aift->gait_lock);
294 gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
295 sizeof(struct zpci_gaite));
297 /* If assist not requested, host will get all alerts */
299 gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
303 gaite->gisc = fib->fmt0.isc;
305 gaite->aisbo = fib->fmt0.aisbo;
306 gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
308 aift->kzdev[zdev->aisb] = zdev->kzdev;
309 spin_unlock_irq(&aift->gait_lock);
311 /* Update guest FIB for re-issue */
312 fib->fmt0.aisbo = zdev->aisb & 63;
313 fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
314 fib->fmt0.isc = gisc;
316 /* Save some guest fib values in the host for later use */
317 zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
318 zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
319 mutex_unlock(&aift->aift_lock);
321 /* Issue the clp to setup the irq now */
322 rc = kvm_zpci_set_airq(zdev);
326 mutex_unlock(&aift->aift_lock);
328 if (fib->fmt0.sum == 1)
329 unpin_user_page(aisb_page);
331 unpin_user_page(aibv_page);
336 static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
338 struct kvm_zdev *kzdev = zdev->kzdev;
339 struct zpci_gaite *gaite;
340 struct page *vpage = NULL, *spage = NULL;
347 mutex_lock(&aift->aift_lock);
350 * If the clear fails due to an error, leave now unless we know this
351 * device is about to go away (force) -- In that case clear the GAITE
354 rc = kvm_zpci_clear_airq(zdev);
358 if (zdev->kzdev->fib.fmt0.aibv == 0)
360 spin_lock_irq(&aift->gait_lock);
361 gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
362 sizeof(struct zpci_gaite));
365 if (gaite->count == 0) {
366 /* Release guest AIBV and AISB */
367 vpage = phys_to_page(kzdev->fib.fmt0.aibv);
368 if (gaite->aisb != 0)
369 spage = phys_to_page(gaite->aisb);
370 /* Clear the GAIT entry */
375 aift->kzdev[zdev->aisb] = NULL;
376 /* Clear zdev info */
377 airq_iv_free_bit(aift->sbv, zdev->aisb);
378 airq_iv_release(zdev->aibv);
382 spin_unlock_irq(&aift->gait_lock);
383 kvm_s390_gisc_unregister(kzdev->kvm, isc);
384 kzdev->fib.fmt0.isc = 0;
385 kzdev->fib.fmt0.aibv = 0;
388 unpin_user_page(vpage);
392 unpin_user_page(spage);
396 unaccount_mem(pcount);
398 mutex_unlock(&aift->aift_lock);
403 static int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
405 struct kvm_zdev *kzdev;
407 kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL);
417 static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
419 struct kvm_zdev *kzdev;
422 WARN_ON(kzdev->zdev != zdev);
429 * Register device with the specified KVM. If interpretation facilities are
430 * available, enable them and let userspace indicate whether or not they will
431 * be used (specify SHM bit to disable).
433 static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
435 struct zpci_dev *zdev = opaque;
442 mutex_lock(&zdev->kzdev_lock);
444 if (zdev->kzdev || zdev->gisa != 0 || !kvm) {
445 mutex_unlock(&zdev->kzdev_lock);
451 mutex_lock(&kvm->lock);
453 rc = kvm_s390_pci_dev_open(zdev);
458 * If interpretation facilities aren't available, add the device to
459 * the kzdev list but don't enable for interpretation.
461 if (!kvm_s390_pci_interp_allowed())
465 * If this is the first request to use an interpreted device, make the
466 * necessary vcpu changes
468 if (!kvm->arch.use_zpci_interp)
469 kvm_s390_vcpu_pci_enable_interp(kvm);
471 if (zdev_enabled(zdev)) {
472 rc = zpci_disable_device(zdev);
478 * Store information about the identity of the kvm guest allowed to
479 * access this device via interpretation to be used by host CLP
481 zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
483 rc = zpci_enable_device(zdev);
487 /* Re-register the IOMMU that was already created */
488 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
489 virt_to_phys(zdev->dma_table), &status);
494 zdev->kzdev->kvm = kvm;
496 spin_lock(&kvm->arch.kzdev_list_lock);
497 list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
498 spin_unlock(&kvm->arch.kzdev_list_lock);
500 mutex_unlock(&kvm->lock);
501 mutex_unlock(&zdev->kzdev_lock);
508 kvm_s390_pci_dev_release(zdev);
509 mutex_unlock(&kvm->lock);
510 mutex_unlock(&zdev->kzdev_lock);
515 static void kvm_s390_pci_unregister_kvm(void *opaque)
517 struct zpci_dev *zdev = opaque;
524 mutex_lock(&zdev->kzdev_lock);
526 if (WARN_ON(!zdev->kzdev)) {
527 mutex_unlock(&zdev->kzdev_lock);
531 kvm = zdev->kzdev->kvm;
532 mutex_lock(&kvm->lock);
535 * A 0 gisa means interpretation was never enabled, just remove the
536 * device from the list.
541 /* Forwarding must be turned off before interpretation */
542 if (zdev->kzdev->fib.fmt0.aibv != 0)
543 kvm_s390_pci_aif_disable(zdev, true);
545 /* Remove the host CLP guest designation */
548 if (zdev_enabled(zdev)) {
549 if (zpci_disable_device(zdev))
553 if (zpci_enable_device(zdev))
556 /* Re-register the IOMMU that was already created */
557 zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
558 virt_to_phys(zdev->dma_table), &status);
561 spin_lock(&kvm->arch.kzdev_list_lock);
562 list_del(&zdev->kzdev->entry);
563 spin_unlock(&kvm->arch.kzdev_list_lock);
564 kvm_s390_pci_dev_release(zdev);
566 mutex_unlock(&kvm->lock);
567 mutex_unlock(&zdev->kzdev_lock);
572 void kvm_s390_pci_init_list(struct kvm *kvm)
574 spin_lock_init(&kvm->arch.kzdev_list_lock);
575 INIT_LIST_HEAD(&kvm->arch.kzdev_list);
578 void kvm_s390_pci_clear_list(struct kvm *kvm)
581 * This list should already be empty, either via vfio device closures
584 spin_lock(&kvm->arch.kzdev_list_lock);
585 WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list));
586 spin_unlock(&kvm->arch.kzdev_list_lock);
589 static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
591 struct zpci_dev *zdev = NULL;
592 struct kvm_zdev *kzdev;
594 spin_lock(&kvm->arch.kzdev_list_lock);
595 list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
596 if (kzdev->zdev->fh == fh) {
601 spin_unlock(&kvm->arch.kzdev_list_lock);
606 static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
607 struct kvm_s390_zpci_op *args)
609 struct zpci_fib fib = {};
612 fib.fmt0.aibv = args->u.reg_aen.ibv;
613 fib.fmt0.isc = args->u.reg_aen.isc;
614 fib.fmt0.noi = args->u.reg_aen.noi;
615 if (args->u.reg_aen.sb != 0) {
616 fib.fmt0.aisb = args->u.reg_aen.sb;
617 fib.fmt0.aisbo = args->u.reg_aen.sbo;
625 hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
626 return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);
629 int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
631 struct kvm_zdev *kzdev;
632 struct zpci_dev *zdev;
635 zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
639 mutex_lock(&zdev->kzdev_lock);
640 mutex_lock(&kvm->lock);
647 if (kzdev->kvm != kvm) {
653 case KVM_S390_ZPCIOP_REG_AEN:
654 /* Fail on unknown flags */
655 if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) {
659 r = kvm_s390_pci_zpci_reg_aen(zdev, args);
661 case KVM_S390_ZPCIOP_DEREG_AEN:
662 r = kvm_s390_pci_aif_disable(zdev, false);
669 mutex_unlock(&kvm->lock);
670 mutex_unlock(&zdev->kzdev_lock);
674 int __init kvm_s390_pci_init(void)
676 zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
677 zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
679 if (!kvm_s390_pci_interp_allowed())
682 aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
686 spin_lock_init(&aift->gait_lock);
687 mutex_init(&aift->aift_lock);
692 void kvm_s390_pci_exit(void)
694 zpci_kvm_hook.kvm_register = NULL;
695 zpci_kvm_hook.kvm_unregister = NULL;
697 if (!kvm_s390_pci_interp_allowed())
700 mutex_destroy(&aift->aift_lock);