2 * SPDX-License-Identifier: MIT
4 * Copyright © 2017 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gt/intel_engine_pm.h"
10 #include "gt/intel_gpu_commands.h"
11 #include "gt/intel_gt.h"
12 #include "gt/intel_gt_pm.h"
13 #include "gt/intel_ring.h"
15 #include "i915_selftest.h"
16 #include "selftests/i915_random.h"
19 struct drm_i915_gem_object *obj;
20 struct intel_engine_cs *engine;
23 static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
25 unsigned int needs_clflush;
30 i915_gem_object_lock(ctx->obj, NULL);
31 err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
35 page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
36 cpu = kmap_local_page(page) + offset_in_page(offset);
38 if (needs_clflush & CLFLUSH_BEFORE)
39 drm_clflush_virt_range(cpu, sizeof(*cpu));
43 if (needs_clflush & CLFLUSH_AFTER)
44 drm_clflush_virt_range(cpu, sizeof(*cpu));
47 i915_gem_object_finish_access(ctx->obj);
50 i915_gem_object_unlock(ctx->obj);
54 static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
56 unsigned int needs_clflush;
61 i915_gem_object_lock(ctx->obj, NULL);
62 err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
66 page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
67 cpu = kmap_local_page(page) + offset_in_page(offset);
69 if (needs_clflush & CLFLUSH_BEFORE)
70 drm_clflush_virt_range(cpu, sizeof(*cpu));
75 i915_gem_object_finish_access(ctx->obj);
78 i915_gem_object_unlock(ctx->obj);
82 static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
84 intel_wakeref_t wakeref;
89 i915_gem_object_lock(ctx->obj, NULL);
90 err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
91 i915_gem_object_unlock(ctx->obj);
95 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
99 wakeref = intel_gt_pm_get(vma->vm->gt);
101 map = i915_vma_pin_iomap(vma);
108 iowrite32(v, &map[offset / sizeof(*map)]);
109 i915_vma_unpin_iomap(vma);
112 intel_gt_pm_put(vma->vm->gt, wakeref);
116 static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
118 intel_wakeref_t wakeref;
119 struct i915_vma *vma;
123 i915_gem_object_lock(ctx->obj, NULL);
124 err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
125 i915_gem_object_unlock(ctx->obj);
129 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
133 wakeref = intel_gt_pm_get(vma->vm->gt);
135 map = i915_vma_pin_iomap(vma);
142 *v = ioread32(&map[offset / sizeof(*map)]);
143 i915_vma_unpin_iomap(vma);
146 intel_gt_pm_put(vma->vm->gt, wakeref);
150 static int wc_set(struct context *ctx, unsigned long offset, u32 v)
155 i915_gem_object_lock(ctx->obj, NULL);
156 err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
157 i915_gem_object_unlock(ctx->obj);
161 map = i915_gem_object_pin_map_unlocked(ctx->obj, I915_MAP_WC);
165 map[offset / sizeof(*map)] = v;
167 __i915_gem_object_flush_map(ctx->obj, offset, sizeof(*map));
168 i915_gem_object_unpin_map(ctx->obj);
173 static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
178 i915_gem_object_lock(ctx->obj, NULL);
179 err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
180 i915_gem_object_unlock(ctx->obj);
184 map = i915_gem_object_pin_map_unlocked(ctx->obj, I915_MAP_WC);
188 *v = map[offset / sizeof(*map)];
189 i915_gem_object_unpin_map(ctx->obj);
194 static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
196 struct i915_request *rq;
197 struct i915_vma *vma;
201 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
205 i915_gem_object_lock(ctx->obj, NULL);
206 err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
210 rq = intel_engine_create_kernel_request(ctx->engine);
216 cs = intel_ring_begin(rq, 4);
222 if (GRAPHICS_VER(ctx->engine->i915) >= 8) {
223 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
224 *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset);
225 *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset);
227 } else if (GRAPHICS_VER(ctx->engine->i915) >= 4) {
228 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
230 *cs++ = i915_ggtt_offset(vma) + offset;
233 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
234 *cs++ = i915_ggtt_offset(vma) + offset;
238 intel_ring_advance(rq, cs);
240 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
243 i915_request_add(rq);
247 i915_gem_object_unlock(ctx->obj);
252 static bool always_valid(struct context *ctx)
257 static bool needs_fence_registers(struct context *ctx)
259 struct intel_gt *gt = ctx->engine->gt;
261 if (intel_gt_is_wedged(gt))
264 return gt->ggtt->num_fences;
267 static bool needs_mi_store_dword(struct context *ctx)
269 if (intel_gt_is_wedged(ctx->engine->gt))
272 return intel_engine_can_store_dword(ctx->engine);
275 static const struct igt_coherency_mode {
277 int (*set)(struct context *ctx, unsigned long offset, u32 v);
278 int (*get)(struct context *ctx, unsigned long offset, u32 *v);
279 bool (*valid)(struct context *ctx);
280 } igt_coherency_mode[] = {
281 { "cpu", cpu_set, cpu_get, always_valid },
282 { "gtt", gtt_set, gtt_get, needs_fence_registers },
283 { "wc", wc_set, wc_get, always_valid },
284 { "gpu", gpu_set, NULL, needs_mi_store_dword },
288 static struct intel_engine_cs *
289 random_engine(struct drm_i915_private *i915, struct rnd_state *prng)
291 struct intel_engine_cs *engine;
295 for_each_uabi_engine(engine, i915)
298 count = i915_prandom_u32_max_state(count, prng);
299 for_each_uabi_engine(engine, i915)
306 static int igt_gem_coherency(void *arg)
308 const unsigned int ncachelines = PAGE_SIZE/64;
309 struct drm_i915_private *i915 = arg;
310 const struct igt_coherency_mode *read, *write, *over;
311 unsigned long count, n;
312 u32 *offsets, *values;
313 I915_RND_STATE(prng);
318 * We repeatedly write, overwrite and read from a sequence of
319 * cachelines in order to try and detect incoherency (unflushed writes
320 * from either the CPU or GPU). Each setter/getter uses our cache
321 * domain API which should prevent incoherency.
324 offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL);
327 for (count = 0; count < ncachelines; count++)
328 offsets[count] = count * 64 + 4 * (count % 16);
330 values = offsets + ncachelines;
332 ctx.engine = random_engine(i915, &prng);
337 pr_info("%s: using %s\n", __func__, ctx.engine->name);
338 intel_engine_pm_get(ctx.engine);
340 for (over = igt_coherency_mode; over->name; over++) {
344 if (!over->valid(&ctx))
347 for (write = igt_coherency_mode; write->name; write++) {
351 if (!write->valid(&ctx))
354 for (read = igt_coherency_mode; read->name; read++) {
358 if (!read->valid(&ctx))
361 for_each_prime_number_from(count, 1, ncachelines) {
362 ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
363 if (IS_ERR(ctx.obj)) {
364 err = PTR_ERR(ctx.obj);
368 i915_random_reorder(offsets, ncachelines, &prng);
369 for (n = 0; n < count; n++)
370 values[n] = prandom_u32_state(&prng);
372 for (n = 0; n < count; n++) {
373 err = over->set(&ctx, offsets[n], ~values[n]);
375 pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
376 n, count, over->name, err);
381 for (n = 0; n < count; n++) {
382 err = write->set(&ctx, offsets[n], values[n]);
384 pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
385 n, count, write->name, err);
390 for (n = 0; n < count; n++) {
393 err = read->get(&ctx, offsets[n], &found);
395 pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
396 n, count, read->name, err);
400 if (found != values[n]) {
401 pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n",
402 n, count, over->name,
403 write->name, values[n],
405 ~values[n], offsets[n]);
411 i915_gem_object_put(ctx.obj);
417 intel_engine_pm_put(ctx.engine);
423 i915_gem_object_put(ctx.obj);
427 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
429 static const struct i915_subtest tests[] = {
430 SUBTEST(igt_gem_coherency),
433 return i915_live_subtests(tests, i915);