]> Git Repo - J-linux.git/blob - drivers/iommu/intel/svm.c
Merge branches 'apple/dart', 'arm/rockchip', 'arm/smmu', 'virtio', 'x86/vt-d', 'x86...
[J-linux.git] / drivers / iommu / intel / svm.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2015 Intel Corporation.
4  *
5  * Authors: David Woodhouse <[email protected]>
6  */
7
8 #include <linux/mmu_notifier.h>
9 #include <linux/sched.h>
10 #include <linux/sched/mm.h>
11 #include <linux/slab.h>
12 #include <linux/rculist.h>
13 #include <linux/pci.h>
14 #include <linux/pci-ats.h>
15 #include <linux/dmar.h>
16 #include <linux/interrupt.h>
17 #include <linux/mm_types.h>
18 #include <linux/xarray.h>
19 #include <asm/page.h>
20 #include <asm/fpu/api.h>
21
22 #include "iommu.h"
23 #include "pasid.h"
24 #include "perf.h"
25 #include "../iommu-sva.h"
26 #include "trace.h"
27
28 static irqreturn_t prq_event_thread(int irq, void *d);
29
30 static DEFINE_XARRAY_ALLOC(pasid_private_array);
31 static int pasid_private_add(ioasid_t pasid, void *priv)
32 {
33         return xa_alloc(&pasid_private_array, &pasid, priv,
34                         XA_LIMIT(pasid, pasid), GFP_ATOMIC);
35 }
36
37 static void pasid_private_remove(ioasid_t pasid)
38 {
39         xa_erase(&pasid_private_array, pasid);
40 }
41
42 static void *pasid_private_find(ioasid_t pasid)
43 {
44         return xa_load(&pasid_private_array, pasid);
45 }
46
47 static struct intel_svm_dev *
48 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
49 {
50         struct intel_svm_dev *sdev = NULL, *t;
51
52         rcu_read_lock();
53         list_for_each_entry_rcu(t, &svm->devs, list) {
54                 if (t->dev == dev) {
55                         sdev = t;
56                         break;
57                 }
58         }
59         rcu_read_unlock();
60
61         return sdev;
62 }
63
64 int intel_svm_enable_prq(struct intel_iommu *iommu)
65 {
66         struct iopf_queue *iopfq;
67         struct page *pages;
68         int irq, ret;
69
70         pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
71         if (!pages) {
72                 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
73                         iommu->name);
74                 return -ENOMEM;
75         }
76         iommu->prq = page_address(pages);
77
78         irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
79         if (irq <= 0) {
80                 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
81                        iommu->name);
82                 ret = -EINVAL;
83                 goto free_prq;
84         }
85         iommu->pr_irq = irq;
86
87         snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
88                  "dmar%d-iopfq", iommu->seq_id);
89         iopfq = iopf_queue_alloc(iommu->iopfq_name);
90         if (!iopfq) {
91                 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
92                 ret = -ENOMEM;
93                 goto free_hwirq;
94         }
95         iommu->iopf_queue = iopfq;
96
97         snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
98
99         ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
100                                    iommu->prq_name, iommu);
101         if (ret) {
102                 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
103                        iommu->name);
104                 goto free_iopfq;
105         }
106         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
107         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
108         dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
109
110         init_completion(&iommu->prq_complete);
111
112         return 0;
113
114 free_iopfq:
115         iopf_queue_free(iommu->iopf_queue);
116         iommu->iopf_queue = NULL;
117 free_hwirq:
118         dmar_free_hwirq(irq);
119         iommu->pr_irq = 0;
120 free_prq:
121         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
122         iommu->prq = NULL;
123
124         return ret;
125 }
126
127 int intel_svm_finish_prq(struct intel_iommu *iommu)
128 {
129         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
130         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
131         dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
132
133         if (iommu->pr_irq) {
134                 free_irq(iommu->pr_irq, iommu);
135                 dmar_free_hwirq(iommu->pr_irq);
136                 iommu->pr_irq = 0;
137         }
138
139         if (iommu->iopf_queue) {
140                 iopf_queue_free(iommu->iopf_queue);
141                 iommu->iopf_queue = NULL;
142         }
143
144         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
145         iommu->prq = NULL;
146
147         return 0;
148 }
149
150 void intel_svm_check(struct intel_iommu *iommu)
151 {
152         if (!pasid_supported(iommu))
153                 return;
154
155         if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
156             !cap_fl1gp_support(iommu->cap)) {
157                 pr_err("%s SVM disabled, incompatible 1GB page capability\n",
158                        iommu->name);
159                 return;
160         }
161
162         if (cpu_feature_enabled(X86_FEATURE_LA57) &&
163             !cap_fl5lp_support(iommu->cap)) {
164                 pr_err("%s SVM disabled, incompatible paging mode\n",
165                        iommu->name);
166                 return;
167         }
168
169         iommu->flags |= VTD_FLAG_SVM_CAPABLE;
170 }
171
172 static void __flush_svm_range_dev(struct intel_svm *svm,
173                                   struct intel_svm_dev *sdev,
174                                   unsigned long address,
175                                   unsigned long pages, int ih)
176 {
177         struct device_domain_info *info = dev_iommu_priv_get(sdev->dev);
178
179         if (WARN_ON(!pages))
180                 return;
181
182         qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
183         if (info->ats_enabled) {
184                 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
185                                          svm->pasid, sdev->qdep, address,
186                                          order_base_2(pages));
187                 quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
188                                           svm->pasid, sdev->qdep);
189         }
190 }
191
192 static void intel_flush_svm_range_dev(struct intel_svm *svm,
193                                       struct intel_svm_dev *sdev,
194                                       unsigned long address,
195                                       unsigned long pages, int ih)
196 {
197         unsigned long shift = ilog2(__roundup_pow_of_two(pages));
198         unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
199         unsigned long start = ALIGN_DOWN(address, align);
200         unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
201
202         while (start < end) {
203                 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
204                 start += align;
205         }
206 }
207
208 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
209                                 unsigned long pages, int ih)
210 {
211         struct intel_svm_dev *sdev;
212
213         rcu_read_lock();
214         list_for_each_entry_rcu(sdev, &svm->devs, list)
215                 intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
216         rcu_read_unlock();
217 }
218
219 static void intel_flush_svm_all(struct intel_svm *svm)
220 {
221         struct device_domain_info *info;
222         struct intel_svm_dev *sdev;
223
224         rcu_read_lock();
225         list_for_each_entry_rcu(sdev, &svm->devs, list) {
226                 info = dev_iommu_priv_get(sdev->dev);
227
228                 qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0);
229                 if (info->ats_enabled) {
230                         qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
231                                                  svm->pasid, sdev->qdep,
232                                                  0, 64 - VTD_PAGE_SHIFT);
233                         quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT,
234                                                   svm->pasid, sdev->qdep);
235                 }
236         }
237         rcu_read_unlock();
238 }
239
240 /* Pages have been freed at this point */
241 static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
242                                         struct mm_struct *mm,
243                                         unsigned long start, unsigned long end)
244 {
245         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
246
247         if (start == 0 && end == -1UL) {
248                 intel_flush_svm_all(svm);
249                 return;
250         }
251
252         intel_flush_svm_range(svm, start,
253                               (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
254 }
255
256 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
257 {
258         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
259         struct intel_svm_dev *sdev;
260
261         /* This might end up being called from exit_mmap(), *before* the page
262          * tables are cleared. And __mmu_notifier_release() will delete us from
263          * the list of notifiers so that our invalidate_range() callback doesn't
264          * get called when the page tables are cleared. So we need to protect
265          * against hardware accessing those page tables.
266          *
267          * We do it by clearing the entry in the PASID table and then flushing
268          * the IOTLB and the PASID table caches. This might upset hardware;
269          * perhaps we'll want to point the PASID to a dummy PGD (like the zero
270          * page) so that we end up taking a fault that the hardware really
271          * *has* to handle gracefully without affecting other processes.
272          */
273         rcu_read_lock();
274         list_for_each_entry_rcu(sdev, &svm->devs, list)
275                 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
276                                             svm->pasid, true);
277         rcu_read_unlock();
278
279 }
280
281 static const struct mmu_notifier_ops intel_mmuops = {
282         .release = intel_mm_release,
283         .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
284 };
285
286 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
287                              struct intel_svm **rsvm,
288                              struct intel_svm_dev **rsdev)
289 {
290         struct intel_svm_dev *sdev = NULL;
291         struct intel_svm *svm;
292
293         if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX)
294                 return -EINVAL;
295
296         svm = pasid_private_find(pasid);
297         if (IS_ERR(svm))
298                 return PTR_ERR(svm);
299
300         if (!svm)
301                 goto out;
302
303         /*
304          * If we found svm for the PASID, there must be at least one device
305          * bond.
306          */
307         if (WARN_ON(list_empty(&svm->devs)))
308                 return -EINVAL;
309         sdev = svm_lookup_device_by_dev(svm, dev);
310
311 out:
312         *rsvm = svm;
313         *rsdev = sdev;
314
315         return 0;
316 }
317
318 static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev,
319                              struct iommu_domain *domain, ioasid_t pasid)
320 {
321         struct device_domain_info *info = dev_iommu_priv_get(dev);
322         struct mm_struct *mm = domain->mm;
323         struct intel_svm_dev *sdev;
324         struct intel_svm *svm;
325         unsigned long sflags;
326         int ret = 0;
327
328         svm = pasid_private_find(pasid);
329         if (!svm) {
330                 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
331                 if (!svm)
332                         return -ENOMEM;
333
334                 svm->pasid = pasid;
335                 svm->mm = mm;
336                 INIT_LIST_HEAD_RCU(&svm->devs);
337
338                 svm->notifier.ops = &intel_mmuops;
339                 ret = mmu_notifier_register(&svm->notifier, mm);
340                 if (ret) {
341                         kfree(svm);
342                         return ret;
343                 }
344
345                 ret = pasid_private_add(svm->pasid, svm);
346                 if (ret) {
347                         mmu_notifier_unregister(&svm->notifier, mm);
348                         kfree(svm);
349                         return ret;
350                 }
351         }
352
353         sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
354         if (!sdev) {
355                 ret = -ENOMEM;
356                 goto free_svm;
357         }
358
359         sdev->dev = dev;
360         sdev->iommu = iommu;
361         sdev->did = FLPT_DEFAULT_DID;
362         sdev->sid = PCI_DEVID(info->bus, info->devfn);
363         init_rcu_head(&sdev->rcu);
364         if (info->ats_enabled) {
365                 sdev->qdep = info->ats_qdep;
366                 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
367                         sdev->qdep = 0;
368         }
369
370         /* Setup the pasid table: */
371         sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
372         ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid,
373                                             FLPT_DEFAULT_DID, sflags);
374         if (ret)
375                 goto free_sdev;
376
377         list_add_rcu(&sdev->list, &svm->devs);
378
379         return 0;
380
381 free_sdev:
382         kfree(sdev);
383 free_svm:
384         if (list_empty(&svm->devs)) {
385                 mmu_notifier_unregister(&svm->notifier, mm);
386                 pasid_private_remove(pasid);
387                 kfree(svm);
388         }
389
390         return ret;
391 }
392
393 void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid)
394 {
395         struct intel_svm_dev *sdev;
396         struct intel_svm *svm;
397         struct mm_struct *mm;
398
399         if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev))
400                 return;
401         mm = svm->mm;
402
403         if (sdev) {
404                 list_del_rcu(&sdev->list);
405                 kfree_rcu(sdev, rcu);
406
407                 if (list_empty(&svm->devs)) {
408                         if (svm->notifier.ops)
409                                 mmu_notifier_unregister(&svm->notifier, mm);
410                         pasid_private_remove(svm->pasid);
411                         /*
412                          * We mandate that no page faults may be outstanding
413                          * for the PASID when intel_svm_unbind_mm() is called.
414                          * If that is not obeyed, subtle errors will happen.
415                          * Let's make them less subtle...
416                          */
417                         memset(svm, 0x6b, sizeof(*svm));
418                         kfree(svm);
419                 }
420         }
421 }
422
423 /* Page request queue descriptor */
424 struct page_req_dsc {
425         union {
426                 struct {
427                         u64 type:8;
428                         u64 pasid_present:1;
429                         u64 priv_data_present:1;
430                         u64 rsvd:6;
431                         u64 rid:16;
432                         u64 pasid:20;
433                         u64 exe_req:1;
434                         u64 pm_req:1;
435                         u64 rsvd2:10;
436                 };
437                 u64 qw_0;
438         };
439         union {
440                 struct {
441                         u64 rd_req:1;
442                         u64 wr_req:1;
443                         u64 lpig:1;
444                         u64 prg_index:9;
445                         u64 addr:52;
446                 };
447                 u64 qw_1;
448         };
449         u64 priv_data[2];
450 };
451
452 static bool is_canonical_address(u64 addr)
453 {
454         int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
455         long saddr = (long) addr;
456
457         return (((saddr << shift) >> shift) == saddr);
458 }
459
460 /**
461  * intel_drain_pasid_prq - Drain page requests and responses for a pasid
462  * @dev: target device
463  * @pasid: pasid for draining
464  *
465  * Drain all pending page requests and responses related to @pasid in both
466  * software and hardware. This is supposed to be called after the device
467  * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
468  * and DevTLB have been invalidated.
469  *
470  * It waits until all pending page requests for @pasid in the page fault
471  * queue are completed by the prq handling thread. Then follow the steps
472  * described in VT-d spec CH7.10 to drain all page requests and page
473  * responses pending in the hardware.
474  */
475 void intel_drain_pasid_prq(struct device *dev, u32 pasid)
476 {
477         struct device_domain_info *info;
478         struct dmar_domain *domain;
479         struct intel_iommu *iommu;
480         struct qi_desc desc[3];
481         struct pci_dev *pdev;
482         int head, tail;
483         u16 sid, did;
484         int qdep;
485
486         info = dev_iommu_priv_get(dev);
487         if (WARN_ON(!info || !dev_is_pci(dev)))
488                 return;
489
490         if (!info->pri_enabled)
491                 return;
492
493         iommu = info->iommu;
494         domain = info->domain;
495         pdev = to_pci_dev(dev);
496         sid = PCI_DEVID(info->bus, info->devfn);
497         did = domain_id_iommu(domain, iommu);
498         qdep = pci_ats_queue_depth(pdev);
499
500         /*
501          * Check and wait until all pending page requests in the queue are
502          * handled by the prq handling thread.
503          */
504 prq_retry:
505         reinit_completion(&iommu->prq_complete);
506         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
507         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
508         while (head != tail) {
509                 struct page_req_dsc *req;
510
511                 req = &iommu->prq[head / sizeof(*req)];
512                 if (!req->pasid_present || req->pasid != pasid) {
513                         head = (head + sizeof(*req)) & PRQ_RING_MASK;
514                         continue;
515                 }
516
517                 wait_for_completion(&iommu->prq_complete);
518                 goto prq_retry;
519         }
520
521         iopf_queue_flush_dev(dev);
522
523         /*
524          * Perform steps described in VT-d spec CH7.10 to drain page
525          * requests and responses in hardware.
526          */
527         memset(desc, 0, sizeof(desc));
528         desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
529                         QI_IWD_FENCE |
530                         QI_IWD_TYPE;
531         desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
532                         QI_EIOTLB_DID(did) |
533                         QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
534                         QI_EIOTLB_TYPE;
535         desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
536                         QI_DEV_EIOTLB_SID(sid) |
537                         QI_DEV_EIOTLB_QDEP(qdep) |
538                         QI_DEIOTLB_TYPE |
539                         QI_DEV_IOTLB_PFSID(info->pfsid);
540 qi_retry:
541         reinit_completion(&iommu->prq_complete);
542         qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
543         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
544                 wait_for_completion(&iommu->prq_complete);
545                 goto qi_retry;
546         }
547 }
548
549 static int prq_to_iommu_prot(struct page_req_dsc *req)
550 {
551         int prot = 0;
552
553         if (req->rd_req)
554                 prot |= IOMMU_FAULT_PERM_READ;
555         if (req->wr_req)
556                 prot |= IOMMU_FAULT_PERM_WRITE;
557         if (req->exe_req)
558                 prot |= IOMMU_FAULT_PERM_EXEC;
559         if (req->pm_req)
560                 prot |= IOMMU_FAULT_PERM_PRIV;
561
562         return prot;
563 }
564
565 static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
566                                 struct page_req_dsc *desc)
567 {
568         struct iommu_fault_event event;
569
570         if (!dev || !dev_is_pci(dev))
571                 return -ENODEV;
572
573         /* Fill in event data for device specific processing */
574         memset(&event, 0, sizeof(struct iommu_fault_event));
575         event.fault.type = IOMMU_FAULT_PAGE_REQ;
576         event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
577         event.fault.prm.pasid = desc->pasid;
578         event.fault.prm.grpid = desc->prg_index;
579         event.fault.prm.perm = prq_to_iommu_prot(desc);
580
581         if (desc->lpig)
582                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
583         if (desc->pasid_present) {
584                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
585                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
586         }
587         if (desc->priv_data_present) {
588                 /*
589                  * Set last page in group bit if private data is present,
590                  * page response is required as it does for LPIG.
591                  * iommu_report_device_fault() doesn't understand this vendor
592                  * specific requirement thus we set last_page as a workaround.
593                  */
594                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
595                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
596                 event.fault.prm.private_data[0] = desc->priv_data[0];
597                 event.fault.prm.private_data[1] = desc->priv_data[1];
598         } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
599                 /*
600                  * If the private data fields are not used by hardware, use it
601                  * to monitor the prq handle latency.
602                  */
603                 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
604         }
605
606         return iommu_report_device_fault(dev, &event);
607 }
608
609 static void handle_bad_prq_event(struct intel_iommu *iommu,
610                                  struct page_req_dsc *req, int result)
611 {
612         struct qi_desc desc;
613
614         pr_err("%s: Invalid page request: %08llx %08llx\n",
615                iommu->name, ((unsigned long long *)req)[0],
616                ((unsigned long long *)req)[1]);
617
618         /*
619          * Per VT-d spec. v3.0 ch7.7, system software must
620          * respond with page group response if private data
621          * is present (PDP) or last page in group (LPIG) bit
622          * is set. This is an additional VT-d feature beyond
623          * PCI ATS spec.
624          */
625         if (!req->lpig && !req->priv_data_present)
626                 return;
627
628         desc.qw0 = QI_PGRP_PASID(req->pasid) |
629                         QI_PGRP_DID(req->rid) |
630                         QI_PGRP_PASID_P(req->pasid_present) |
631                         QI_PGRP_PDP(req->priv_data_present) |
632                         QI_PGRP_RESP_CODE(result) |
633                         QI_PGRP_RESP_TYPE;
634         desc.qw1 = QI_PGRP_IDX(req->prg_index) |
635                         QI_PGRP_LPIG(req->lpig);
636
637         if (req->priv_data_present) {
638                 desc.qw2 = req->priv_data[0];
639                 desc.qw3 = req->priv_data[1];
640         } else {
641                 desc.qw2 = 0;
642                 desc.qw3 = 0;
643         }
644
645         qi_submit_sync(iommu, &desc, 1, 0);
646 }
647
648 static irqreturn_t prq_event_thread(int irq, void *d)
649 {
650         struct intel_iommu *iommu = d;
651         struct page_req_dsc *req;
652         int head, tail, handled;
653         struct pci_dev *pdev;
654         u64 address;
655
656         /*
657          * Clear PPR bit before reading head/tail registers, to ensure that
658          * we get a new interrupt if needed.
659          */
660         writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
661
662         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
663         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
664         handled = (head != tail);
665         while (head != tail) {
666                 req = &iommu->prq[head / sizeof(*req)];
667                 address = (u64)req->addr << VTD_PAGE_SHIFT;
668
669                 if (unlikely(!req->pasid_present)) {
670                         pr_err("IOMMU: %s: Page request without PASID\n",
671                                iommu->name);
672 bad_req:
673                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
674                         goto prq_advance;
675                 }
676
677                 if (unlikely(!is_canonical_address(address))) {
678                         pr_err("IOMMU: %s: Address is not canonical\n",
679                                iommu->name);
680                         goto bad_req;
681                 }
682
683                 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
684                         pr_err("IOMMU: %s: Page request in Privilege Mode\n",
685                                iommu->name);
686                         goto bad_req;
687                 }
688
689                 if (unlikely(req->exe_req && req->rd_req)) {
690                         pr_err("IOMMU: %s: Execution request not supported\n",
691                                iommu->name);
692                         goto bad_req;
693                 }
694
695                 /* Drop Stop Marker message. No need for a response. */
696                 if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
697                         goto prq_advance;
698
699                 pdev = pci_get_domain_bus_and_slot(iommu->segment,
700                                                    PCI_BUS_NUM(req->rid),
701                                                    req->rid & 0xff);
702                 /*
703                  * If prq is to be handled outside iommu driver via receiver of
704                  * the fault notifiers, we skip the page response here.
705                  */
706                 if (!pdev)
707                         goto bad_req;
708
709                 if (intel_svm_prq_report(iommu, &pdev->dev, req))
710                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
711                 else
712                         trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
713                                          req->priv_data[0], req->priv_data[1],
714                                          iommu->prq_seq_number++);
715                 pci_dev_put(pdev);
716 prq_advance:
717                 head = (head + sizeof(*req)) & PRQ_RING_MASK;
718         }
719
720         dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
721
722         /*
723          * Clear the page request overflow bit and wake up all threads that
724          * are waiting for the completion of this handling.
725          */
726         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
727                 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
728                                     iommu->name);
729                 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
730                 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
731                 if (head == tail) {
732                         iopf_queue_discard_partial(iommu->iopf_queue);
733                         writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
734                         pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
735                                             iommu->name);
736                 }
737         }
738
739         if (!completion_done(&iommu->prq_complete))
740                 complete(&iommu->prq_complete);
741
742         return IRQ_RETVAL(handled);
743 }
744
745 int intel_svm_page_response(struct device *dev,
746                             struct iommu_fault_event *evt,
747                             struct iommu_page_response *msg)
748 {
749         struct device_domain_info *info = dev_iommu_priv_get(dev);
750         struct intel_iommu *iommu = info->iommu;
751         u8 bus = info->bus, devfn = info->devfn;
752         struct iommu_fault_page_request *prm;
753         bool private_present;
754         bool pasid_present;
755         bool last_page;
756         int ret = 0;
757         u16 sid;
758
759         prm = &evt->fault.prm;
760         sid = PCI_DEVID(bus, devfn);
761         pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
762         private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
763         last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
764
765         if (!pasid_present) {
766                 ret = -EINVAL;
767                 goto out;
768         }
769
770         if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
771                 ret = -EINVAL;
772                 goto out;
773         }
774
775         /*
776          * Per VT-d spec. v3.0 ch7.7, system software must respond
777          * with page group response if private data is present (PDP)
778          * or last page in group (LPIG) bit is set. This is an
779          * additional VT-d requirement beyond PCI ATS spec.
780          */
781         if (last_page || private_present) {
782                 struct qi_desc desc;
783
784                 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
785                                 QI_PGRP_PASID_P(pasid_present) |
786                                 QI_PGRP_PDP(private_present) |
787                                 QI_PGRP_RESP_CODE(msg->code) |
788                                 QI_PGRP_RESP_TYPE;
789                 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
790                 desc.qw2 = 0;
791                 desc.qw3 = 0;
792
793                 if (private_present) {
794                         desc.qw2 = prm->private_data[0];
795                         desc.qw3 = prm->private_data[1];
796                 } else if (prm->private_data[0]) {
797                         dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
798                                 ktime_to_ns(ktime_get()) - prm->private_data[0]);
799                 }
800
801                 qi_submit_sync(iommu, &desc, 1, 0);
802         }
803 out:
804         return ret;
805 }
806
807 static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
808                                    struct device *dev, ioasid_t pasid)
809 {
810         struct device_domain_info *info = dev_iommu_priv_get(dev);
811         struct intel_iommu *iommu = info->iommu;
812
813         return intel_svm_bind_mm(iommu, dev, domain, pasid);
814 }
815
816 static void intel_svm_domain_free(struct iommu_domain *domain)
817 {
818         kfree(to_dmar_domain(domain));
819 }
820
821 static const struct iommu_domain_ops intel_svm_domain_ops = {
822         .set_dev_pasid          = intel_svm_set_dev_pasid,
823         .free                   = intel_svm_domain_free
824 };
825
826 struct iommu_domain *intel_svm_domain_alloc(void)
827 {
828         struct dmar_domain *domain;
829
830         domain = kzalloc(sizeof(*domain), GFP_KERNEL);
831         if (!domain)
832                 return NULL;
833         domain->domain.ops = &intel_svm_domain_ops;
834
835         return &domain->domain;
836 }
This page took 0.081474 seconds and 4 git commands to generate.