]>
Commit | Line | Data |
---|---|---|
2025cf9e | 1 | // SPDX-License-Identifier: GPL-2.0-only |
8a94ade4 DW |
2 | /* |
3 | * Copyright © 2015 Intel Corporation. | |
4 | * | |
8a94ade4 DW |
5 | * Authors: David Woodhouse <[email protected]> |
6 | */ | |
7 | ||
2f26e0a9 DW |
8 | #include <linux/mmu_notifier.h> |
9 | #include <linux/sched.h> | |
6e84f315 | 10 | #include <linux/sched/mm.h> |
2f26e0a9 | 11 | #include <linux/slab.h> |
2f26e0a9 DW |
12 | #include <linux/rculist.h> |
13 | #include <linux/pci.h> | |
14 | #include <linux/pci-ats.h> | |
a222a7f0 DW |
15 | #include <linux/dmar.h> |
16 | #include <linux/interrupt.h> | |
50a7ca3c | 17 | #include <linux/mm_types.h> |
100b8a14 | 18 | #include <linux/xarray.h> |
9d8c3af3 | 19 | #include <asm/page.h> |
20f0afd1 | 20 | #include <asm/fpu/api.h> |
a222a7f0 | 21 | |
2585a279 | 22 | #include "iommu.h" |
02f3effd | 23 | #include "pasid.h" |
0f4834ab | 24 | #include "perf.h" |
933ab6d3 | 25 | #include "trace.h" |
af395073 | 26 | |
a222a7f0 | 27 | static irqreturn_t prq_event_thread(int irq, void *d); |
2f26e0a9 | 28 | |
100b8a14 LB |
29 | static DEFINE_XARRAY_ALLOC(pasid_private_array); |
30 | static int pasid_private_add(ioasid_t pasid, void *priv) | |
31 | { | |
32 | return xa_alloc(&pasid_private_array, &pasid, priv, | |
33 | XA_LIMIT(pasid, pasid), GFP_ATOMIC); | |
34 | } | |
35 | ||
36 | static void pasid_private_remove(ioasid_t pasid) | |
37 | { | |
38 | xa_erase(&pasid_private_array, pasid); | |
39 | } | |
40 | ||
41 | static void *pasid_private_find(ioasid_t pasid) | |
42 | { | |
43 | return xa_load(&pasid_private_array, pasid); | |
44 | } | |
45 | ||
9e52cc0f LB |
46 | static struct intel_svm_dev * |
47 | svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) | |
48 | { | |
49 | struct intel_svm_dev *sdev = NULL, *t; | |
50 | ||
51 | rcu_read_lock(); | |
52 | list_for_each_entry_rcu(t, &svm->devs, list) { | |
53 | if (t->dev == dev) { | |
54 | sdev = t; | |
55 | break; | |
56 | } | |
57 | } | |
58 | rcu_read_unlock(); | |
59 | ||
60 | return sdev; | |
61 | } | |
62 | ||
a222a7f0 DW |
63 | int intel_svm_enable_prq(struct intel_iommu *iommu) |
64 | { | |
4c82b886 | 65 | struct iopf_queue *iopfq; |
a222a7f0 DW |
66 | struct page *pages; |
67 | int irq, ret; | |
68 | ||
a34f3e20 | 69 | pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); |
a222a7f0 DW |
70 | if (!pages) { |
71 | pr_warn("IOMMU: %s: Failed to allocate page request queue\n", | |
72 | iommu->name); | |
73 | return -ENOMEM; | |
74 | } | |
75 | iommu->prq = page_address(pages); | |
76 | ||
4a0d4265 | 77 | irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu); |
a222a7f0 DW |
78 | if (irq <= 0) { |
79 | pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", | |
80 | iommu->name); | |
81 | ret = -EINVAL; | |
4c82b886 | 82 | goto free_prq; |
a222a7f0 DW |
83 | } |
84 | iommu->pr_irq = irq; | |
85 | ||
4c82b886 LB |
86 | snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), |
87 | "dmar%d-iopfq", iommu->seq_id); | |
88 | iopfq = iopf_queue_alloc(iommu->iopfq_name); | |
89 | if (!iopfq) { | |
90 | pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); | |
91 | ret = -ENOMEM; | |
92 | goto free_hwirq; | |
93 | } | |
94 | iommu->iopf_queue = iopfq; | |
95 | ||
a222a7f0 DW |
96 | snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); |
97 | ||
98 | ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, | |
99 | iommu->prq_name, iommu); | |
100 | if (ret) { | |
101 | pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", | |
102 | iommu->name); | |
4c82b886 | 103 | goto free_iopfq; |
a222a7f0 DW |
104 | } |
105 | dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); | |
106 | dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); | |
107 | dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); | |
108 | ||
66ac4db3 LB |
109 | init_completion(&iommu->prq_complete); |
110 | ||
a222a7f0 | 111 | return 0; |
4c82b886 LB |
112 | |
113 | free_iopfq: | |
114 | iopf_queue_free(iommu->iopf_queue); | |
115 | iommu->iopf_queue = NULL; | |
116 | free_hwirq: | |
117 | dmar_free_hwirq(irq); | |
118 | iommu->pr_irq = 0; | |
119 | free_prq: | |
120 | free_pages((unsigned long)iommu->prq, PRQ_ORDER); | |
121 | iommu->prq = NULL; | |
122 | ||
123 | return ret; | |
a222a7f0 DW |
124 | } |
125 | ||
126 | int intel_svm_finish_prq(struct intel_iommu *iommu) | |
127 | { | |
128 | dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); | |
129 | dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); | |
130 | dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); | |
131 | ||
72d54811 JS |
132 | if (iommu->pr_irq) { |
133 | free_irq(iommu->pr_irq, iommu); | |
134 | dmar_free_hwirq(iommu->pr_irq); | |
135 | iommu->pr_irq = 0; | |
136 | } | |
a222a7f0 | 137 | |
4c82b886 LB |
138 | if (iommu->iopf_queue) { |
139 | iopf_queue_free(iommu->iopf_queue); | |
140 | iommu->iopf_queue = NULL; | |
141 | } | |
142 | ||
a222a7f0 DW |
143 | free_pages((unsigned long)iommu->prq, PRQ_ORDER); |
144 | iommu->prq = NULL; | |
145 | ||
146 | return 0; | |
147 | } | |
148 | ||
ff3dc652 JP |
149 | void intel_svm_check(struct intel_iommu *iommu) |
150 | { | |
151 | if (!pasid_supported(iommu)) | |
152 | return; | |
153 | ||
154 | if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && | |
155 | !cap_fl1gp_support(iommu->cap)) { | |
156 | pr_err("%s SVM disabled, incompatible 1GB page capability\n", | |
157 | iommu->name); | |
158 | return; | |
159 | } | |
160 | ||
161 | if (cpu_feature_enabled(X86_FEATURE_LA57) && | |
b722cb32 | 162 | !cap_fl5lp_support(iommu->cap)) { |
ff3dc652 JP |
163 | pr_err("%s SVM disabled, incompatible paging mode\n", |
164 | iommu->name); | |
165 | return; | |
166 | } | |
167 | ||
168 | iommu->flags |= VTD_FLAG_SVM_CAPABLE; | |
169 | } | |
170 | ||
2d6ffc63 LB |
171 | static void __flush_svm_range_dev(struct intel_svm *svm, |
172 | struct intel_svm_dev *sdev, | |
173 | unsigned long address, | |
174 | unsigned long pages, int ih) | |
2f26e0a9 | 175 | { |
586081d3 | 176 | struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); |
9872f9bd LB |
177 | |
178 | if (WARN_ON(!pages)) | |
179 | return; | |
180 | ||
181 | qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); | |
e65a6897 | 182 | if (info->ats_enabled) { |
9872f9bd LB |
183 | qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, |
184 | svm->pasid, sdev->qdep, address, | |
185 | order_base_2(pages)); | |
e65a6897 JP |
186 | quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), |
187 | svm->pasid, sdev->qdep); | |
188 | } | |
2f26e0a9 DW |
189 | } |
190 | ||
2d6ffc63 LB |
191 | static void intel_flush_svm_range_dev(struct intel_svm *svm, |
192 | struct intel_svm_dev *sdev, | |
193 | unsigned long address, | |
194 | unsigned long pages, int ih) | |
195 | { | |
196 | unsigned long shift = ilog2(__roundup_pow_of_two(pages)); | |
197 | unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); | |
198 | unsigned long start = ALIGN_DOWN(address, align); | |
199 | unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); | |
200 | ||
201 | while (start < end) { | |
202 | __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); | |
203 | start += align; | |
204 | } | |
205 | } | |
206 | ||
2f26e0a9 | 207 | static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, |
8744daf4 | 208 | unsigned long pages, int ih) |
2f26e0a9 DW |
209 | { |
210 | struct intel_svm_dev *sdev; | |
211 | ||
212 | rcu_read_lock(); | |
213 | list_for_each_entry_rcu(sdev, &svm->devs, list) | |
8744daf4 | 214 | intel_flush_svm_range_dev(svm, sdev, address, pages, ih); |
2f26e0a9 DW |
215 | rcu_read_unlock(); |
216 | } | |
217 | ||
e7ad6c2a LB |
218 | static void intel_flush_svm_all(struct intel_svm *svm) |
219 | { | |
220 | struct device_domain_info *info; | |
221 | struct intel_svm_dev *sdev; | |
222 | ||
223 | rcu_read_lock(); | |
224 | list_for_each_entry_rcu(sdev, &svm->devs, list) { | |
225 | info = dev_iommu_priv_get(sdev->dev); | |
226 | ||
227 | qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0); | |
228 | if (info->ats_enabled) { | |
229 | qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, | |
230 | svm->pasid, sdev->qdep, | |
231 | 0, 64 - VTD_PAGE_SHIFT); | |
232 | quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT, | |
233 | svm->pasid, sdev->qdep); | |
234 | } | |
235 | } | |
236 | rcu_read_unlock(); | |
237 | } | |
238 | ||
2f26e0a9 | 239 | /* Pages have been freed at this point */ |
1af5a810 AP |
240 | static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, |
241 | struct mm_struct *mm, | |
242 | unsigned long start, unsigned long end) | |
2f26e0a9 DW |
243 | { |
244 | struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); | |
245 | ||
e7ad6c2a LB |
246 | if (start == 0 && end == -1UL) { |
247 | intel_flush_svm_all(svm); | |
248 | return; | |
249 | } | |
250 | ||
2f26e0a9 | 251 | intel_flush_svm_range(svm, start, |
8744daf4 | 252 | (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); |
2f26e0a9 DW |
253 | } |
254 | ||
2f26e0a9 DW |
255 | static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) |
256 | { | |
257 | struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); | |
e57e58bd | 258 | struct intel_svm_dev *sdev; |
2f26e0a9 | 259 | |
e57e58bd DW |
260 | /* This might end up being called from exit_mmap(), *before* the page |
261 | * tables are cleared. And __mmu_notifier_release() will delete us from | |
262 | * the list of notifiers so that our invalidate_range() callback doesn't | |
263 | * get called when the page tables are cleared. So we need to protect | |
264 | * against hardware accessing those page tables. | |
265 | * | |
266 | * We do it by clearing the entry in the PASID table and then flushing | |
267 | * the IOTLB and the PASID table caches. This might upset hardware; | |
268 | * perhaps we'll want to point the PASID to a dummy PGD (like the zero | |
269 | * page) so that we end up taking a fault that the hardware really | |
270 | * *has* to handle gracefully without affecting other processes. | |
271 | */ | |
e57e58bd | 272 | rcu_read_lock(); |
81ebd91a | 273 | list_for_each_entry_rcu(sdev, &svm->devs, list) |
9ad9f45b | 274 | intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, |
37e91bd4 | 275 | svm->pasid, true); |
e57e58bd | 276 | rcu_read_unlock(); |
2f26e0a9 | 277 | |
2f26e0a9 DW |
278 | } |
279 | ||
280 | static const struct mmu_notifier_ops intel_mmuops = { | |
281 | .release = intel_mm_release, | |
1af5a810 | 282 | .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs, |
2f26e0a9 DW |
283 | }; |
284 | ||
19abcf70 LB |
285 | static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, |
286 | struct intel_svm **rsvm, | |
287 | struct intel_svm_dev **rsdev) | |
288 | { | |
9e52cc0f | 289 | struct intel_svm_dev *sdev = NULL; |
19abcf70 LB |
290 | struct intel_svm *svm; |
291 | ||
fffaed1e | 292 | if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX) |
19abcf70 LB |
293 | return -EINVAL; |
294 | ||
100b8a14 | 295 | svm = pasid_private_find(pasid); |
19abcf70 LB |
296 | if (IS_ERR(svm)) |
297 | return PTR_ERR(svm); | |
298 | ||
299 | if (!svm) | |
300 | goto out; | |
301 | ||
302 | /* | |
303 | * If we found svm for the PASID, there must be at least one device | |
304 | * bond. | |
305 | */ | |
306 | if (WARN_ON(list_empty(&svm->devs))) | |
307 | return -EINVAL; | |
9e52cc0f | 308 | sdev = svm_lookup_device_by_dev(svm, dev); |
19abcf70 LB |
309 | |
310 | out: | |
311 | *rsvm = svm; | |
312 | *rsdev = sdev; | |
313 | ||
314 | return 0; | |
315 | } | |
316 | ||
8ca918cb TZ |
317 | static int intel_svm_set_dev_pasid(struct iommu_domain *domain, |
318 | struct device *dev, ioasid_t pasid) | |
40483774 | 319 | { |
586081d3 | 320 | struct device_domain_info *info = dev_iommu_priv_get(dev); |
8ca918cb | 321 | struct intel_iommu *iommu = info->iommu; |
5c79705d | 322 | struct mm_struct *mm = domain->mm; |
40483774 LB |
323 | struct intel_svm_dev *sdev; |
324 | struct intel_svm *svm; | |
ffd5869d | 325 | unsigned long sflags; |
40483774 | 326 | int ret = 0; |
2f26e0a9 | 327 | |
5c79705d | 328 | svm = pasid_private_find(pasid); |
40483774 LB |
329 | if (!svm) { |
330 | svm = kzalloc(sizeof(*svm), GFP_KERNEL); | |
331 | if (!svm) | |
ec9ab12d | 332 | return -ENOMEM; |
5cec7537 | 333 | |
5c79705d | 334 | svm->pasid = pasid; |
40483774 | 335 | svm->mm = mm; |
40483774 | 336 | INIT_LIST_HEAD_RCU(&svm->devs); |
2f26e0a9 | 337 | |
942fd543 LB |
338 | svm->notifier.ops = &intel_mmuops; |
339 | ret = mmu_notifier_register(&svm->notifier, mm); | |
340 | if (ret) { | |
341 | kfree(svm); | |
ec9ab12d | 342 | return ret; |
06905ea8 | 343 | } |
2f26e0a9 | 344 | |
40483774 LB |
345 | ret = pasid_private_add(svm->pasid, svm); |
346 | if (ret) { | |
942fd543 | 347 | mmu_notifier_unregister(&svm->notifier, mm); |
40483774 | 348 | kfree(svm); |
ec9ab12d | 349 | return ret; |
2f26e0a9 | 350 | } |
40483774 | 351 | } |
06905ea8 | 352 | |
2f26e0a9 DW |
353 | sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); |
354 | if (!sdev) { | |
355 | ret = -ENOMEM; | |
40483774 | 356 | goto free_svm; |
2f26e0a9 | 357 | } |
40483774 | 358 | |
2f26e0a9 | 359 | sdev->dev = dev; |
9ad9f45b | 360 | sdev->iommu = iommu; |
d7cbc0f3 LB |
361 | sdev->did = FLPT_DEFAULT_DID; |
362 | sdev->sid = PCI_DEVID(info->bus, info->devfn); | |
363 | if (info->ats_enabled) { | |
d7cbc0f3 LB |
364 | sdev->qdep = info->ats_qdep; |
365 | if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) | |
366 | sdev->qdep = 0; | |
367 | } | |
368 | ||
40483774 | 369 | /* Setup the pasid table: */ |
942fd543 | 370 | sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; |
5c79705d | 371 | ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid, |
40483774 | 372 | FLPT_DEFAULT_DID, sflags); |
40483774 LB |
373 | if (ret) |
374 | goto free_sdev; | |
97140101 | 375 | |
2f26e0a9 | 376 | list_add_rcu(&sdev->list, &svm->devs); |
49cab9d2 | 377 | |
ec9ab12d | 378 | return 0; |
40483774 LB |
379 | |
380 | free_sdev: | |
381 | kfree(sdev); | |
382 | free_svm: | |
383 | if (list_empty(&svm->devs)) { | |
942fd543 | 384 | mmu_notifier_unregister(&svm->notifier, mm); |
5c79705d | 385 | pasid_private_remove(pasid); |
40483774 LB |
386 | kfree(svm); |
387 | } | |
388 | ||
ec9ab12d | 389 | return ret; |
2f26e0a9 | 390 | } |
2f26e0a9 | 391 | |
b6170188 | 392 | void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) |
2f26e0a9 DW |
393 | { |
394 | struct intel_svm_dev *sdev; | |
2f26e0a9 | 395 | struct intel_svm *svm; |
40483774 | 396 | struct mm_struct *mm; |
2f26e0a9 | 397 | |
b6170188 LB |
398 | if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev)) |
399 | return; | |
40483774 | 400 | mm = svm->mm; |
59a62337 | 401 | |
19abcf70 | 402 | if (sdev) { |
49cab9d2 | 403 | list_del_rcu(&sdev->list); |
49cab9d2 LB |
404 | kfree_rcu(sdev, rcu); |
405 | ||
406 | if (list_empty(&svm->devs)) { | |
407 | if (svm->notifier.ops) | |
408 | mmu_notifier_unregister(&svm->notifier, mm); | |
409 | pasid_private_remove(svm->pasid); | |
49cab9d2 | 410 | kfree(svm); |
2f26e0a9 DW |
411 | } |
412 | } | |
2f26e0a9 | 413 | } |
15060aba | 414 | |
a222a7f0 DW |
415 | /* Page request queue descriptor */ |
416 | struct page_req_dsc { | |
5b438f4b JP |
417 | union { |
418 | struct { | |
419 | u64 type:8; | |
420 | u64 pasid_present:1; | |
421 | u64 priv_data_present:1; | |
422 | u64 rsvd:6; | |
423 | u64 rid:16; | |
424 | u64 pasid:20; | |
425 | u64 exe_req:1; | |
426 | u64 pm_req:1; | |
427 | u64 rsvd2:10; | |
428 | }; | |
429 | u64 qw_0; | |
430 | }; | |
431 | union { | |
432 | struct { | |
433 | u64 rd_req:1; | |
434 | u64 wr_req:1; | |
435 | u64 lpig:1; | |
436 | u64 prg_index:9; | |
437 | u64 addr:52; | |
438 | }; | |
439 | u64 qw_1; | |
440 | }; | |
441 | u64 priv_data[2]; | |
a222a7f0 DW |
442 | }; |
443 | ||
9d8c3af3 AR |
444 | static bool is_canonical_address(u64 addr) |
445 | { | |
446 | int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); | |
447 | long saddr = (long) addr; | |
448 | ||
449 | return (((saddr << shift) >> shift) == saddr); | |
450 | } | |
451 | ||
66ac4db3 | 452 | /** |
15478623 | 453 | * intel_drain_pasid_prq - Drain page requests and responses for a pasid |
66ac4db3 LB |
454 | * @dev: target device |
455 | * @pasid: pasid for draining | |
456 | * | |
457 | * Drain all pending page requests and responses related to @pasid in both | |
458 | * software and hardware. This is supposed to be called after the device | |
459 | * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB | |
460 | * and DevTLB have been invalidated. | |
461 | * | |
462 | * It waits until all pending page requests for @pasid in the page fault | |
463 | * queue are completed by the prq handling thread. Then follow the steps | |
464 | * described in VT-d spec CH7.10 to drain all page requests and page | |
465 | * responses pending in the hardware. | |
466 | */ | |
15478623 | 467 | void intel_drain_pasid_prq(struct device *dev, u32 pasid) |
66ac4db3 LB |
468 | { |
469 | struct device_domain_info *info; | |
470 | struct dmar_domain *domain; | |
471 | struct intel_iommu *iommu; | |
472 | struct qi_desc desc[3]; | |
473 | struct pci_dev *pdev; | |
474 | int head, tail; | |
475 | u16 sid, did; | |
476 | int qdep; | |
477 | ||
586081d3 | 478 | info = dev_iommu_priv_get(dev); |
66ac4db3 LB |
479 | if (WARN_ON(!info || !dev_is_pci(dev))) |
480 | return; | |
481 | ||
482 | if (!info->pri_enabled) | |
483 | return; | |
484 | ||
485 | iommu = info->iommu; | |
486 | domain = info->domain; | |
487 | pdev = to_pci_dev(dev); | |
488 | sid = PCI_DEVID(info->bus, info->devfn); | |
ba949f4c | 489 | did = domain_id_iommu(domain, iommu); |
66ac4db3 LB |
490 | qdep = pci_ats_queue_depth(pdev); |
491 | ||
492 | /* | |
493 | * Check and wait until all pending page requests in the queue are | |
494 | * handled by the prq handling thread. | |
495 | */ | |
496 | prq_retry: | |
497 | reinit_completion(&iommu->prq_complete); | |
498 | tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; | |
499 | head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; | |
500 | while (head != tail) { | |
501 | struct page_req_dsc *req; | |
502 | ||
503 | req = &iommu->prq[head / sizeof(*req)]; | |
504 | if (!req->pasid_present || req->pasid != pasid) { | |
505 | head = (head + sizeof(*req)) & PRQ_RING_MASK; | |
506 | continue; | |
507 | } | |
508 | ||
509 | wait_for_completion(&iommu->prq_complete); | |
510 | goto prq_retry; | |
511 | } | |
512 | ||
d5b9e4bf LB |
513 | iopf_queue_flush_dev(dev); |
514 | ||
66ac4db3 LB |
515 | /* |
516 | * Perform steps described in VT-d spec CH7.10 to drain page | |
517 | * requests and responses in hardware. | |
518 | */ | |
519 | memset(desc, 0, sizeof(desc)); | |
520 | desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) | | |
521 | QI_IWD_FENCE | | |
522 | QI_IWD_TYPE; | |
523 | desc[1].qw0 = QI_EIOTLB_PASID(pasid) | | |
524 | QI_EIOTLB_DID(did) | | |
525 | QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | | |
526 | QI_EIOTLB_TYPE; | |
527 | desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) | | |
528 | QI_DEV_EIOTLB_SID(sid) | | |
529 | QI_DEV_EIOTLB_QDEP(qdep) | | |
530 | QI_DEIOTLB_TYPE | | |
531 | QI_DEV_IOTLB_PFSID(info->pfsid); | |
532 | qi_retry: | |
533 | reinit_completion(&iommu->prq_complete); | |
534 | qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); | |
535 | if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { | |
536 | wait_for_completion(&iommu->prq_complete); | |
537 | goto qi_retry; | |
538 | } | |
539 | } | |
540 | ||
eb8d93ea LB |
541 | static int prq_to_iommu_prot(struct page_req_dsc *req) |
542 | { | |
543 | int prot = 0; | |
544 | ||
545 | if (req->rd_req) | |
546 | prot |= IOMMU_FAULT_PERM_READ; | |
547 | if (req->wr_req) | |
548 | prot |= IOMMU_FAULT_PERM_WRITE; | |
549 | if (req->exe_req) | |
550 | prot |= IOMMU_FAULT_PERM_EXEC; | |
551 | if (req->pm_req) | |
552 | prot |= IOMMU_FAULT_PERM_PRIV; | |
553 | ||
554 | return prot; | |
555 | } | |
556 | ||
3dfa64ae LB |
557 | static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, |
558 | struct page_req_dsc *desc) | |
eb8d93ea | 559 | { |
3f02a9dc | 560 | struct iopf_fault event = { }; |
eb8d93ea LB |
561 | |
562 | /* Fill in event data for device specific processing */ | |
eb8d93ea | 563 | event.fault.type = IOMMU_FAULT_PAGE_REQ; |
03d20509 | 564 | event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; |
eb8d93ea LB |
565 | event.fault.prm.pasid = desc->pasid; |
566 | event.fault.prm.grpid = desc->prg_index; | |
567 | event.fault.prm.perm = prq_to_iommu_prot(desc); | |
568 | ||
569 | if (desc->lpig) | |
570 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; | |
571 | if (desc->pasid_present) { | |
572 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; | |
573 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; | |
574 | } | |
575 | if (desc->priv_data_present) { | |
576 | /* | |
577 | * Set last page in group bit if private data is present, | |
578 | * page response is required as it does for LPIG. | |
579 | * iommu_report_device_fault() doesn't understand this vendor | |
580 | * specific requirement thus we set last_page as a workaround. | |
581 | */ | |
582 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; | |
583 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; | |
606636dc GS |
584 | event.fault.prm.private_data[0] = desc->priv_data[0]; |
585 | event.fault.prm.private_data[1] = desc->priv_data[1]; | |
0f4834ab LB |
586 | } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { |
587 | /* | |
588 | * If the private data fields are not used by hardware, use it | |
589 | * to monitor the prq handle latency. | |
590 | */ | |
591 | event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); | |
eb8d93ea LB |
592 | } |
593 | ||
3dfa64ae | 594 | iommu_report_device_fault(dev, &event); |
eb8d93ea LB |
595 | } |
596 | ||
ae7f09b1 LB |
597 | static void handle_bad_prq_event(struct intel_iommu *iommu, |
598 | struct page_req_dsc *req, int result) | |
599 | { | |
600 | struct qi_desc desc; | |
601 | ||
602 | pr_err("%s: Invalid page request: %08llx %08llx\n", | |
603 | iommu->name, ((unsigned long long *)req)[0], | |
604 | ((unsigned long long *)req)[1]); | |
605 | ||
606 | /* | |
607 | * Per VT-d spec. v3.0 ch7.7, system software must | |
608 | * respond with page group response if private data | |
609 | * is present (PDP) or last page in group (LPIG) bit | |
610 | * is set. This is an additional VT-d feature beyond | |
611 | * PCI ATS spec. | |
612 | */ | |
613 | if (!req->lpig && !req->priv_data_present) | |
614 | return; | |
615 | ||
616 | desc.qw0 = QI_PGRP_PASID(req->pasid) | | |
617 | QI_PGRP_DID(req->rid) | | |
618 | QI_PGRP_PASID_P(req->pasid_present) | | |
619 | QI_PGRP_PDP(req->priv_data_present) | | |
620 | QI_PGRP_RESP_CODE(result) | | |
621 | QI_PGRP_RESP_TYPE; | |
622 | desc.qw1 = QI_PGRP_IDX(req->prg_index) | | |
623 | QI_PGRP_LPIG(req->lpig); | |
ae7f09b1 | 624 | |
606636dc GS |
625 | if (req->priv_data_present) { |
626 | desc.qw2 = req->priv_data[0]; | |
627 | desc.qw3 = req->priv_data[1]; | |
628 | } else { | |
629 | desc.qw2 = 0; | |
630 | desc.qw3 = 0; | |
631 | } | |
632 | ||
ae7f09b1 LB |
633 | qi_submit_sync(iommu, &desc, 1, 0); |
634 | } | |
635 | ||
a222a7f0 DW |
636 | static irqreturn_t prq_event_thread(int irq, void *d) |
637 | { | |
638 | struct intel_iommu *iommu = d; | |
ae7f09b1 LB |
639 | struct page_req_dsc *req; |
640 | int head, tail, handled; | |
def054b0 | 641 | struct device *dev; |
ae7f09b1 | 642 | u64 address; |
a222a7f0 | 643 | |
ae7f09b1 LB |
644 | /* |
645 | * Clear PPR bit before reading head/tail registers, to ensure that | |
646 | * we get a new interrupt if needed. | |
647 | */ | |
46924008 DW |
648 | writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); |
649 | ||
a222a7f0 DW |
650 | tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; |
651 | head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; | |
ae7f09b1 | 652 | handled = (head != tail); |
a222a7f0 | 653 | while (head != tail) { |
a222a7f0 | 654 | req = &iommu->prq[head / sizeof(*req)]; |
7f92a2e9 | 655 | address = (u64)req->addr << VTD_PAGE_SHIFT; |
ae7f09b1 LB |
656 | |
657 | if (unlikely(!req->pasid_present)) { | |
658 | pr_err("IOMMU: %s: Page request without PASID\n", | |
659 | iommu->name); | |
660 | bad_req: | |
ae7f09b1 LB |
661 | handle_bad_prq_event(iommu, req, QI_RESP_INVALID); |
662 | goto prq_advance; | |
a222a7f0 | 663 | } |
ae7f09b1 LB |
664 | |
665 | if (unlikely(!is_canonical_address(address))) { | |
666 | pr_err("IOMMU: %s: Address is not canonical\n", | |
667 | iommu->name); | |
668 | goto bad_req; | |
669 | } | |
670 | ||
671 | if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { | |
672 | pr_err("IOMMU: %s: Page request in Privilege Mode\n", | |
673 | iommu->name); | |
674 | goto bad_req; | |
78a523fe | 675 | } |
ae7f09b1 LB |
676 | |
677 | if (unlikely(req->exe_req && req->rd_req)) { | |
678 | pr_err("IOMMU: %s: Execution request not supported\n", | |
679 | iommu->name); | |
680 | goto bad_req; | |
78a523fe | 681 | } |
ae7f09b1 | 682 | |
da8669ff LB |
683 | /* Drop Stop Marker message. No need for a response. */ |
684 | if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) | |
685 | goto prq_advance; | |
686 | ||
eb8d93ea LB |
687 | /* |
688 | * If prq is to be handled outside iommu driver via receiver of | |
689 | * the fault notifiers, we skip the page response here. | |
690 | */ | |
def054b0 LB |
691 | mutex_lock(&iommu->iopf_lock); |
692 | dev = device_rbtree_find(iommu, req->rid); | |
693 | if (!dev) { | |
694 | mutex_unlock(&iommu->iopf_lock); | |
6927d352 | 695 | goto bad_req; |
def054b0 | 696 | } |
e93a67f5 | 697 | |
f379a7e9 JR |
698 | intel_svm_prq_report(iommu, dev, req); |
699 | trace_prq_report(iommu, dev, req->qw_0, req->qw_1, | |
3dfa64ae LB |
700 | req->priv_data[0], req->priv_data[1], |
701 | iommu->prq_seq_number++); | |
def054b0 | 702 | mutex_unlock(&iommu->iopf_lock); |
eb8d93ea | 703 | prq_advance: |
a222a7f0 DW |
704 | head = (head + sizeof(*req)) & PRQ_RING_MASK; |
705 | } | |
706 | ||
707 | dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); | |
708 | ||
66ac4db3 LB |
709 | /* |
710 | * Clear the page request overflow bit and wake up all threads that | |
711 | * are waiting for the completion of this handling. | |
712 | */ | |
28a77185 LB |
713 | if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { |
714 | pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", | |
715 | iommu->name); | |
716 | head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; | |
717 | tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; | |
718 | if (head == tail) { | |
d5b9e4bf | 719 | iopf_queue_discard_partial(iommu->iopf_queue); |
28a77185 LB |
720 | writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); |
721 | pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", | |
722 | iommu->name); | |
723 | } | |
724 | } | |
66ac4db3 LB |
725 | |
726 | if (!completion_done(&iommu->prq_complete)) | |
727 | complete(&iommu->prq_complete); | |
728 | ||
a222a7f0 DW |
729 | return IRQ_RETVAL(handled); |
730 | } | |
064a57d7 | 731 | |
b554e396 LB |
732 | void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, |
733 | struct iommu_page_response *msg) | |
8b737121 | 734 | { |
1903ef8f LB |
735 | struct device_domain_info *info = dev_iommu_priv_get(dev); |
736 | struct intel_iommu *iommu = info->iommu; | |
737 | u8 bus = info->bus, devfn = info->devfn; | |
8b737121 | 738 | struct iommu_fault_page_request *prm; |
8b737121 LB |
739 | bool private_present; |
740 | bool pasid_present; | |
741 | bool last_page; | |
8b737121 LB |
742 | u16 sid; |
743 | ||
8b737121 LB |
744 | prm = &evt->fault.prm; |
745 | sid = PCI_DEVID(bus, devfn); | |
746 | pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; | |
747 | private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; | |
748 | last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; | |
749 | ||
8b737121 LB |
750 | /* |
751 | * Per VT-d spec. v3.0 ch7.7, system software must respond | |
752 | * with page group response if private data is present (PDP) | |
753 | * or last page in group (LPIG) bit is set. This is an | |
754 | * additional VT-d requirement beyond PCI ATS spec. | |
755 | */ | |
756 | if (last_page || private_present) { | |
757 | struct qi_desc desc; | |
758 | ||
759 | desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | | |
760 | QI_PGRP_PASID_P(pasid_present) | | |
761 | QI_PGRP_PDP(private_present) | | |
762 | QI_PGRP_RESP_CODE(msg->code) | | |
763 | QI_PGRP_RESP_TYPE; | |
764 | desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); | |
765 | desc.qw2 = 0; | |
766 | desc.qw3 = 0; | |
606636dc GS |
767 | |
768 | if (private_present) { | |
769 | desc.qw2 = prm->private_data[0]; | |
770 | desc.qw3 = prm->private_data[1]; | |
771 | } else if (prm->private_data[0]) { | |
0f4834ab LB |
772 | dmar_latency_update(iommu, DMAR_LATENCY_PRQ, |
773 | ktime_to_ns(ktime_get()) - prm->private_data[0]); | |
606636dc | 774 | } |
8b737121 LB |
775 | |
776 | qi_submit_sync(iommu, &desc, 1, 0); | |
777 | } | |
eaca8889 LB |
778 | } |
779 | ||
780 | static void intel_svm_domain_free(struct iommu_domain *domain) | |
781 | { | |
782 | kfree(to_dmar_domain(domain)); | |
783 | } | |
784 | ||
785 | static const struct iommu_domain_ops intel_svm_domain_ops = { | |
786 | .set_dev_pasid = intel_svm_set_dev_pasid, | |
787 | .free = intel_svm_domain_free | |
788 | }; | |
789 | ||
790 | struct iommu_domain *intel_svm_domain_alloc(void) | |
791 | { | |
792 | struct dmar_domain *domain; | |
793 | ||
794 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | |
795 | if (!domain) | |
796 | return NULL; | |
797 | domain->domain.ops = &intel_svm_domain_ops; | |
798 | ||
799 | return &domain->domain; | |
800 | } |