]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
Linux 6.14-rc3
[linux.git] / drivers / gpu / drm / i915 / gem / selftests / i915_gem_coherency.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6
7 #include <linux/prime_numbers.h>
8
9 #include "gt/intel_engine_pm.h"
10 #include "gt/intel_gpu_commands.h"
11 #include "gt/intel_gt.h"
12 #include "gt/intel_gt_pm.h"
13 #include "gt/intel_ring.h"
14
15 #include "i915_selftest.h"
16 #include "selftests/i915_random.h"
17
18 struct context {
19         struct drm_i915_gem_object *obj;
20         struct intel_engine_cs *engine;
21 };
22
23 static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
24 {
25         unsigned int needs_clflush;
26         struct page *page;
27         u32 *cpu;
28         int err;
29
30         i915_gem_object_lock(ctx->obj, NULL);
31         err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
32         if (err)
33                 goto out;
34
35         page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
36         cpu = kmap_local_page(page) + offset_in_page(offset);
37
38         if (needs_clflush & CLFLUSH_BEFORE)
39                 drm_clflush_virt_range(cpu, sizeof(*cpu));
40
41         *cpu = v;
42
43         if (needs_clflush & CLFLUSH_AFTER)
44                 drm_clflush_virt_range(cpu, sizeof(*cpu));
45
46         kunmap_local(cpu);
47         i915_gem_object_finish_access(ctx->obj);
48
49 out:
50         i915_gem_object_unlock(ctx->obj);
51         return err;
52 }
53
54 static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
55 {
56         unsigned int needs_clflush;
57         struct page *page;
58         u32 *cpu;
59         int err;
60
61         i915_gem_object_lock(ctx->obj, NULL);
62         err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
63         if (err)
64                 goto out;
65
66         page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
67         cpu = kmap_local_page(page) + offset_in_page(offset);
68
69         if (needs_clflush & CLFLUSH_BEFORE)
70                 drm_clflush_virt_range(cpu, sizeof(*cpu));
71
72         *v = *cpu;
73
74         kunmap_local(cpu);
75         i915_gem_object_finish_access(ctx->obj);
76
77 out:
78         i915_gem_object_unlock(ctx->obj);
79         return err;
80 }
81
82 static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
83 {
84         intel_wakeref_t wakeref;
85         struct i915_vma *vma;
86         u32 __iomem *map;
87         int err = 0;
88
89         i915_gem_object_lock(ctx->obj, NULL);
90         err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
91         i915_gem_object_unlock(ctx->obj);
92         if (err)
93                 return err;
94
95         vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
96         if (IS_ERR(vma))
97                 return PTR_ERR(vma);
98
99         wakeref = intel_gt_pm_get(vma->vm->gt);
100
101         map = i915_vma_pin_iomap(vma);
102         i915_vma_unpin(vma);
103         if (IS_ERR(map)) {
104                 err = PTR_ERR(map);
105                 goto out_rpm;
106         }
107
108         iowrite32(v, &map[offset / sizeof(*map)]);
109         i915_vma_unpin_iomap(vma);
110
111 out_rpm:
112         intel_gt_pm_put(vma->vm->gt, wakeref);
113         return err;
114 }
115
116 static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
117 {
118         intel_wakeref_t wakeref;
119         struct i915_vma *vma;
120         u32 __iomem *map;
121         int err = 0;
122
123         i915_gem_object_lock(ctx->obj, NULL);
124         err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
125         i915_gem_object_unlock(ctx->obj);
126         if (err)
127                 return err;
128
129         vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
130         if (IS_ERR(vma))
131                 return PTR_ERR(vma);
132
133         wakeref = intel_gt_pm_get(vma->vm->gt);
134
135         map = i915_vma_pin_iomap(vma);
136         i915_vma_unpin(vma);
137         if (IS_ERR(map)) {
138                 err = PTR_ERR(map);
139                 goto out_rpm;
140         }
141
142         *v = ioread32(&map[offset / sizeof(*map)]);
143         i915_vma_unpin_iomap(vma);
144
145 out_rpm:
146         intel_gt_pm_put(vma->vm->gt, wakeref);
147         return err;
148 }
149
150 static int wc_set(struct context *ctx, unsigned long offset, u32 v)
151 {
152         u32 *map;
153         int err;
154
155         i915_gem_object_lock(ctx->obj, NULL);
156         err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
157         i915_gem_object_unlock(ctx->obj);
158         if (err)
159                 return err;
160
161         map = i915_gem_object_pin_map_unlocked(ctx->obj, I915_MAP_WC);
162         if (IS_ERR(map))
163                 return PTR_ERR(map);
164
165         map[offset / sizeof(*map)] = v;
166
167         __i915_gem_object_flush_map(ctx->obj, offset, sizeof(*map));
168         i915_gem_object_unpin_map(ctx->obj);
169
170         return 0;
171 }
172
173 static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
174 {
175         u32 *map;
176         int err;
177
178         i915_gem_object_lock(ctx->obj, NULL);
179         err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
180         i915_gem_object_unlock(ctx->obj);
181         if (err)
182                 return err;
183
184         map = i915_gem_object_pin_map_unlocked(ctx->obj, I915_MAP_WC);
185         if (IS_ERR(map))
186                 return PTR_ERR(map);
187
188         *v = map[offset / sizeof(*map)];
189         i915_gem_object_unpin_map(ctx->obj);
190
191         return 0;
192 }
193
194 static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
195 {
196         struct i915_request *rq;
197         struct i915_vma *vma;
198         u32 *cs;
199         int err;
200
201         vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
202         if (IS_ERR(vma))
203                 return PTR_ERR(vma);
204
205         i915_gem_object_lock(ctx->obj, NULL);
206         err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
207         if (err)
208                 goto out_unlock;
209
210         rq = intel_engine_create_kernel_request(ctx->engine);
211         if (IS_ERR(rq)) {
212                 err = PTR_ERR(rq);
213                 goto out_unpin;
214         }
215
216         cs = intel_ring_begin(rq, 4);
217         if (IS_ERR(cs)) {
218                 err = PTR_ERR(cs);
219                 goto out_rq;
220         }
221
222         if (GRAPHICS_VER(ctx->engine->i915) >= 8) {
223                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
224                 *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset);
225                 *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset);
226                 *cs++ = v;
227         } else if (GRAPHICS_VER(ctx->engine->i915) >= 4) {
228                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
229                 *cs++ = 0;
230                 *cs++ = i915_ggtt_offset(vma) + offset;
231                 *cs++ = v;
232         } else {
233                 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
234                 *cs++ = i915_ggtt_offset(vma) + offset;
235                 *cs++ = v;
236                 *cs++ = MI_NOOP;
237         }
238         intel_ring_advance(rq, cs);
239
240         err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
241
242 out_rq:
243         i915_request_add(rq);
244 out_unpin:
245         i915_vma_unpin(vma);
246 out_unlock:
247         i915_gem_object_unlock(ctx->obj);
248
249         return err;
250 }
251
252 static bool always_valid(struct context *ctx)
253 {
254         return true;
255 }
256
257 static bool needs_fence_registers(struct context *ctx)
258 {
259         struct intel_gt *gt = ctx->engine->gt;
260
261         if (intel_gt_is_wedged(gt))
262                 return false;
263
264         return gt->ggtt->num_fences;
265 }
266
267 static bool needs_mi_store_dword(struct context *ctx)
268 {
269         if (intel_gt_is_wedged(ctx->engine->gt))
270                 return false;
271
272         return intel_engine_can_store_dword(ctx->engine);
273 }
274
275 static const struct igt_coherency_mode {
276         const char *name;
277         int (*set)(struct context *ctx, unsigned long offset, u32 v);
278         int (*get)(struct context *ctx, unsigned long offset, u32 *v);
279         bool (*valid)(struct context *ctx);
280 } igt_coherency_mode[] = {
281         { "cpu", cpu_set, cpu_get, always_valid },
282         { "gtt", gtt_set, gtt_get, needs_fence_registers },
283         { "wc", wc_set, wc_get, always_valid },
284         { "gpu", gpu_set, NULL, needs_mi_store_dword },
285         { },
286 };
287
288 static struct intel_engine_cs *
289 random_engine(struct drm_i915_private *i915, struct rnd_state *prng)
290 {
291         struct intel_engine_cs *engine;
292         unsigned int count;
293
294         count = 0;
295         for_each_uabi_engine(engine, i915)
296                 count++;
297
298         count = i915_prandom_u32_max_state(count, prng);
299         for_each_uabi_engine(engine, i915)
300                 if (count-- == 0)
301                         return engine;
302
303         return NULL;
304 }
305
306 static int igt_gem_coherency(void *arg)
307 {
308         const unsigned int ncachelines = PAGE_SIZE/64;
309         struct drm_i915_private *i915 = arg;
310         const struct igt_coherency_mode *read, *write, *over;
311         unsigned long count, n;
312         u32 *offsets, *values;
313         I915_RND_STATE(prng);
314         struct context ctx;
315         int err = 0;
316
317         /*
318          * We repeatedly write, overwrite and read from a sequence of
319          * cachelines in order to try and detect incoherency (unflushed writes
320          * from either the CPU or GPU). Each setter/getter uses our cache
321          * domain API which should prevent incoherency.
322          */
323
324         offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL);
325         if (!offsets)
326                 return -ENOMEM;
327         for (count = 0; count < ncachelines; count++)
328                 offsets[count] = count * 64 + 4 * (count % 16);
329
330         values = offsets + ncachelines;
331
332         ctx.engine = random_engine(i915, &prng);
333         if (!ctx.engine) {
334                 err = -ENODEV;
335                 goto out_free;
336         }
337         pr_info("%s: using %s\n", __func__, ctx.engine->name);
338         intel_engine_pm_get(ctx.engine);
339
340         for (over = igt_coherency_mode; over->name; over++) {
341                 if (!over->set)
342                         continue;
343
344                 if (!over->valid(&ctx))
345                         continue;
346
347                 for (write = igt_coherency_mode; write->name; write++) {
348                         if (!write->set)
349                                 continue;
350
351                         if (!write->valid(&ctx))
352                                 continue;
353
354                         for (read = igt_coherency_mode; read->name; read++) {
355                                 if (!read->get)
356                                         continue;
357
358                                 if (!read->valid(&ctx))
359                                         continue;
360
361                                 for_each_prime_number_from(count, 1, ncachelines) {
362                                         ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
363                                         if (IS_ERR(ctx.obj)) {
364                                                 err = PTR_ERR(ctx.obj);
365                                                 goto out_pm;
366                                         }
367
368                                         i915_random_reorder(offsets, ncachelines, &prng);
369                                         for (n = 0; n < count; n++)
370                                                 values[n] = prandom_u32_state(&prng);
371
372                                         for (n = 0; n < count; n++) {
373                                                 err = over->set(&ctx, offsets[n], ~values[n]);
374                                                 if (err) {
375                                                         pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
376                                                                n, count, over->name, err);
377                                                         goto put_object;
378                                                 }
379                                         }
380
381                                         for (n = 0; n < count; n++) {
382                                                 err = write->set(&ctx, offsets[n], values[n]);
383                                                 if (err) {
384                                                         pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
385                                                                n, count, write->name, err);
386                                                         goto put_object;
387                                                 }
388                                         }
389
390                                         for (n = 0; n < count; n++) {
391                                                 u32 found;
392
393                                                 err = read->get(&ctx, offsets[n], &found);
394                                                 if (err) {
395                                                         pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
396                                                                n, count, read->name, err);
397                                                         goto put_object;
398                                                 }
399
400                                                 if (found != values[n]) {
401                                                         pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n",
402                                                                n, count, over->name,
403                                                                write->name, values[n],
404                                                                read->name, found,
405                                                                ~values[n], offsets[n]);
406                                                         err = -EINVAL;
407                                                         goto put_object;
408                                                 }
409                                         }
410
411                                         i915_gem_object_put(ctx.obj);
412                                 }
413                         }
414                 }
415         }
416 out_pm:
417         intel_engine_pm_put(ctx.engine);
418 out_free:
419         kfree(offsets);
420         return err;
421
422 put_object:
423         i915_gem_object_put(ctx.obj);
424         goto out_pm;
425 }
426
427 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
428 {
429         static const struct i915_subtest tests[] = {
430                 SUBTEST(igt_gem_coherency),
431         };
432
433         return i915_live_subtests(tests, i915);
434 }
This page took 0.05873 seconds and 4 git commands to generate.