]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/gt/selftest_tlb.c
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux...
[linux.git] / drivers / gpu / drm / i915 / gt / selftest_tlb.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5
6 #include "i915_selftest.h"
7
8 #include "gem/i915_gem_internal.h"
9 #include "gem/i915_gem_region.h"
10
11 #include "gen8_engine_cs.h"
12 #include "i915_gem_ww.h"
13 #include "intel_engine_regs.h"
14 #include "intel_gpu_commands.h"
15 #include "intel_context.h"
16 #include "intel_gt.h"
17 #include "intel_ring.h"
18
19 #include "selftests/igt_flush_test.h"
20 #include "selftests/i915_random.h"
21
22 static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val)
23 {
24         GEM_BUG_ON(addr < i915_vma_offset(vma));
25         GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val));
26         memset64(page_mask_bits(vma->obj->mm.mapping) +
27                  (addr - i915_vma_offset(vma)), val, 1);
28 }
29
30 static int
31 pte_tlbinv(struct intel_context *ce,
32            struct i915_vma *va,
33            struct i915_vma *vb,
34            u64 align,
35            void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length),
36            u64 length,
37            struct rnd_state *prng)
38 {
39         const unsigned int pat_index =
40                 i915_gem_get_pat_index(ce->vm->i915, I915_CACHE_NONE);
41         struct drm_i915_gem_object *batch;
42         struct drm_mm_node vb_node;
43         struct i915_request *rq;
44         struct i915_vma *vma;
45         u64 addr;
46         int err;
47         u32 *cs;
48
49         batch = i915_gem_object_create_internal(ce->vm->i915, 4096);
50         if (IS_ERR(batch))
51                 return PTR_ERR(batch);
52
53         vma = i915_vma_instance(batch, ce->vm, NULL);
54         if (IS_ERR(vma)) {
55                 err = PTR_ERR(vma);
56                 goto out;
57         }
58
59         err = i915_vma_pin(vma, 0, 0, PIN_USER);
60         if (err)
61                 goto out;
62
63         /* Pin va at random but aligned offset after vma */
64         addr = round_up(vma->node.start + vma->node.size, align);
65         /* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */
66         addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)),
67                                  va->size, align);
68         err = i915_vma_pin(va,  0, 0, addr | PIN_OFFSET_FIXED | PIN_USER);
69         if (err) {
70                 pr_err("Cannot pin at %llx+%llx\n", addr, va->size);
71                 goto out;
72         }
73         GEM_BUG_ON(i915_vma_offset(va) != addr);
74         if (vb != va) {
75                 vb_node = vb->node;
76                 vb->node = va->node; /* overwrites the _same_ PTE  */
77         }
78
79         /*
80          * Now choose random dword at the 1st pinned page.
81          *
82          * SZ_64K pages on dg1 require that the whole PT be marked
83          * containing 64KiB entries. So we make sure that vma
84          * covers the whole PT, despite being randomly aligned to 64KiB
85          * and restrict our sampling to the 2MiB PT within where
86          * we know that we will be using 64KiB pages.
87          */
88         if (align == SZ_64K)
89                 addr = round_up(addr, SZ_2M);
90         addr = igt_random_offset(prng, addr, addr + align, 8, 8);
91
92         if (va != vb)
93                 pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n",
94                         ce->engine->name, va->obj->mm.region->name ?: "smem",
95                         addr, align, va->resource->page_sizes_gtt,
96                         va->page_sizes.phys, va->page_sizes.sg,
97                         addr & -length, length);
98
99         cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC);
100         *cs++ = MI_NOOP; /* for later termination */
101         /*
102          * Sample the target to see if we spot the updated backing store.
103          * Gen8 VCS compares immediate value with bitwise-and of two
104          * consecutive DWORDS pointed by addr, other gen/engines compare value
105          * with DWORD pointed by addr. Moreover we want to exercise DWORD size
106          * invalidations. To fulfill all these requirements below values
107          * have been chosen.
108          */
109         *cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2;
110         *cs++ = 0; /* break if *addr == 0 */
111         *cs++ = lower_32_bits(addr);
112         *cs++ = upper_32_bits(addr);
113         vma_set_qw(va, addr, -1);
114         vma_set_qw(vb, addr, 0);
115
116         /* Keep sampling until we get bored */
117         *cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1;
118         *cs++ = lower_32_bits(i915_vma_offset(vma));
119         *cs++ = upper_32_bits(i915_vma_offset(vma));
120
121         i915_gem_object_flush_map(batch);
122
123         rq = i915_request_create(ce);
124         if (IS_ERR(rq)) {
125                 err = PTR_ERR(rq);
126                 goto out_va;
127         }
128
129         err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0);
130         if (err) {
131                 i915_request_add(rq);
132                 goto out_va;
133         }
134
135         i915_request_get(rq);
136         i915_request_add(rq);
137
138         /* Short sleep to sanitycheck the batch is spinning before we begin */
139         msleep(10);
140         if (va == vb) {
141                 if (!i915_request_completed(rq)) {
142                         pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n",
143                                ce->engine->name, va->obj->mm.region->name ?: "smem",
144                                addr, align, va->resource->page_sizes_gtt,
145                                va->page_sizes.phys, va->page_sizes.sg);
146                         err = -EIO;
147                 }
148         } else if (!i915_request_completed(rq)) {
149                 struct i915_vma_resource vb_res = {
150                         .bi.pages = vb->obj->mm.pages,
151                         .bi.page_sizes = vb->obj->mm.page_sizes,
152                         .start = i915_vma_offset(vb),
153                         .vma_size = i915_vma_size(vb)
154                 };
155                 unsigned int pte_flags = 0;
156
157                 /* Flip the PTE between A and B */
158                 if (i915_gem_object_is_lmem(vb->obj))
159                         pte_flags |= PTE_LM;
160                 ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags);
161
162                 /* Flush the PTE update to concurrent HW */
163                 tlbinv(ce->vm, addr & -length, length);
164
165                 if (wait_for(i915_request_completed(rq), HZ / 2)) {
166                         pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n",
167                                ce->engine->name);
168                         err = -EINVAL;
169                 }
170         } else {
171                 pr_err("Spinner ended unexpectedly\n");
172                 err = -EIO;
173         }
174         i915_request_put(rq);
175
176         cs = page_mask_bits(batch->mm.mapping);
177         *cs = MI_BATCH_BUFFER_END;
178         wmb();
179
180 out_va:
181         if (vb != va)
182                 vb->node = vb_node;
183         i915_vma_unpin(va);
184         if (i915_vma_unbind_unlocked(va))
185                 err = -EIO;
186 out:
187         i915_gem_object_put(batch);
188         return err;
189 }
190
191 static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
192 {
193         struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
194         resource_size_t size = SZ_1G;
195
196         /*
197          * Allocation of largest possible page size allows to test all types
198          * of pages. To succeed with both allocations, especially in case of Small
199          * BAR, try to allocate no more than quarter of mappable memory.
200          */
201         if (mr && size > mr->io_size / 4)
202                 size = mr->io_size / 4;
203
204         return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
205 }
206
207 static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)
208 {
209         /*
210          * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1).
211          * While that does not require the whole 2M block to be contiguous
212          * it is easier to make it so, since we need that for SZ_2M pagees.
213          * Since we randomly offset the start of the vma, we need a 4M object
214          * so that there is a 2M range within it is suitable for SZ_64K PTE.
215          */
216         return i915_gem_object_create_internal(gt->i915, SZ_4M);
217 }
218
219 static int
220 mem_tlbinv(struct intel_gt *gt,
221            struct drm_i915_gem_object *(*create_fn)(struct intel_gt *),
222            void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length))
223 {
224         unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size;
225         struct intel_engine_cs *engine;
226         struct drm_i915_gem_object *A, *B;
227         struct i915_ppgtt *ppgtt;
228         struct i915_vma *va, *vb;
229         enum intel_engine_id id;
230         I915_RND_STATE(prng);
231         void *vaddr;
232         int err;
233
234         /*
235          * Check that the TLB invalidate is able to revoke an active
236          * page. We load a page into a spinning COND_BBE loop and then
237          * remap that page to a new physical address. The old address, and
238          * so the loop keeps spinning, is retained in the TLB cache until
239          * we issue an invalidate.
240          */
241
242         A = create_fn(gt);
243         if (IS_ERR(A))
244                 return PTR_ERR(A);
245
246         vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC);
247         if (IS_ERR(vaddr)) {
248                 err = PTR_ERR(vaddr);
249                 goto out_a;
250         }
251
252         B = create_fn(gt);
253         if (IS_ERR(B)) {
254                 err = PTR_ERR(B);
255                 goto out_a;
256         }
257
258         vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC);
259         if (IS_ERR(vaddr)) {
260                 err = PTR_ERR(vaddr);
261                 goto out_b;
262         }
263
264         GEM_BUG_ON(A->base.size != B->base.size);
265         if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1))
266                 pr_warn("Failed to allocate contiguous pages for size %zx\n",
267                         A->base.size);
268
269         ppgtt = i915_ppgtt_create(gt, 0);
270         if (IS_ERR(ppgtt)) {
271                 err = PTR_ERR(ppgtt);
272                 goto out_b;
273         }
274
275         va = i915_vma_instance(A, &ppgtt->vm, NULL);
276         if (IS_ERR(va)) {
277                 err = PTR_ERR(va);
278                 goto out_vm;
279         }
280
281         vb = i915_vma_instance(B, &ppgtt->vm, NULL);
282         if (IS_ERR(vb)) {
283                 err = PTR_ERR(vb);
284                 goto out_vm;
285         }
286
287         err = 0;
288         for_each_engine(engine, gt, id) {
289                 struct i915_gem_ww_ctx ww;
290                 struct intel_context *ce;
291                 int bit;
292
293                 ce = intel_context_create(engine);
294                 if (IS_ERR(ce)) {
295                         err = PTR_ERR(ce);
296                         break;
297                 }
298
299                 i915_vm_put(ce->vm);
300                 ce->vm = i915_vm_get(&ppgtt->vm);
301
302                 for_i915_gem_ww(&ww, err, true)
303                         err = intel_context_pin_ww(ce, &ww);
304                 if (err)
305                         goto err_put;
306
307                 for_each_set_bit(bit,
308                                  (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes,
309                                  BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) {
310                         unsigned int len;
311
312                         if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, va->obj))
313                                 continue;
314
315                         /* sanitycheck the semaphore wake up */
316                         err = pte_tlbinv(ce, va, va,
317                                          BIT_ULL(bit),
318                                          NULL, SZ_4K,
319                                          &prng);
320                         if (err)
321                                 goto err_unpin;
322
323                         for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) {
324                                 err = pte_tlbinv(ce, va, vb,
325                                                  BIT_ULL(bit),
326                                                  tlbinv,
327                                                  BIT_ULL(len),
328                                                  &prng);
329                                 if (err)
330                                         goto err_unpin;
331                                 if (len == ppgtt_size)
332                                         break;
333                         }
334                 }
335 err_unpin:
336                 intel_context_unpin(ce);
337 err_put:
338                 intel_context_put(ce);
339                 if (err)
340                         break;
341         }
342
343         if (igt_flush_test(gt->i915))
344                 err = -EIO;
345
346 out_vm:
347         i915_vm_put(&ppgtt->vm);
348 out_b:
349         i915_gem_object_put(B);
350 out_a:
351         i915_gem_object_put(A);
352         return err;
353 }
354
355 static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length)
356 {
357         intel_gt_invalidate_tlb(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1);
358 }
359
360 static int invalidate_full(void *arg)
361 {
362         struct intel_gt *gt = arg;
363         int err;
364
365         if (GRAPHICS_VER(gt->i915) < 8)
366                 return 0; /* TLB invalidate not implemented */
367
368         err = mem_tlbinv(gt, create_smem, tlbinv_full);
369         if (err == 0)
370                 err = mem_tlbinv(gt, create_lmem, tlbinv_full);
371         if (err == -ENODEV || err == -ENXIO)
372                 err = 0;
373
374         return err;
375 }
376
377 int intel_tlb_live_selftests(struct drm_i915_private *i915)
378 {
379         static const struct i915_subtest tests[] = {
380                 SUBTEST(invalidate_full),
381         };
382         struct intel_gt *gt;
383         unsigned int i;
384
385         for_each_gt(gt, i915, i) {
386                 int err;
387
388                 if (intel_gt_is_wedged(gt))
389                         continue;
390
391                 err = intel_gt_live_subtests(tests, gt);
392                 if (err)
393                         return err;
394         }
395
396         return 0;
397 }
This page took 0.059553 seconds and 4 git commands to generate.