1 // SPDX-License-Identifier: MIT
3 * Copyright © 2018 Intel Corporation
6 #include <linux/prime_numbers.h>
8 #include "gem/i915_gem_internal.h"
10 #include "i915_selftest.h"
11 #include "intel_engine_heartbeat.h"
12 #include "intel_engine_pm.h"
13 #include "intel_reset.h"
14 #include "intel_ring.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftests/i915_random.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_live_test.h"
19 #include "selftests/igt_spinner.h"
20 #include "selftests/lib_sw_fence.h"
21 #include "shmem_utils.h"
23 #include "gem/selftests/igt_gem_utils.h"
24 #include "gem/selftests/mock_context.h"
26 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
28 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
30 #define LRI_HEADER MI_INSTR(0x22, 0)
31 #define LRI_LENGTH_MASK GENMASK(7, 0)
33 static struct i915_vma *create_scratch(struct intel_gt *gt)
35 return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE);
38 static bool is_active(struct i915_request *rq)
40 if (i915_request_is_active(rq))
43 if (i915_request_on_hold(rq))
46 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
52 static int wait_for_submit(struct intel_engine_cs *engine,
53 struct i915_request *rq,
54 unsigned long timeout)
56 /* Ignore our own attempts to suppress excess tasklets */
57 tasklet_hi_schedule(&engine->sched_engine->tasklet);
61 bool done = time_after(jiffies, timeout);
63 if (i915_request_completed(rq)) /* that was quick! */
66 /* Wait until the HW has acknowleged the submission (or err) */
67 intel_engine_flush_submission(engine);
68 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
78 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
81 i915_ggtt_offset(ce->engine->status_page.vma) +
83 struct i915_request *rq;
86 rq = intel_context_create_request(ce);
90 cs = intel_ring_begin(rq, 4);
96 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
101 intel_ring_advance(rq, cs);
103 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
104 i915_request_add(rq);
108 static int context_flush(struct intel_context *ce, long timeout)
110 struct i915_request *rq;
111 struct dma_fence *fence;
114 rq = intel_engine_create_kernel_request(ce->engine);
118 fence = i915_active_fence_get(&ce->timeline->last_request);
120 i915_request_await_dma_fence(rq, fence);
121 dma_fence_put(fence);
124 rq = i915_request_get(rq);
125 i915_request_add(rq);
126 if (i915_request_wait(rq, 0, timeout) < 0)
128 i915_request_put(rq);
130 rmb(); /* We know the request is written, make sure all state is too! */
134 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
136 if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
139 if (GRAPHICS_VER(engine->i915) < 12)
142 switch (engine->class) {
147 case COPY_ENGINE_CLASS:
149 case VIDEO_DECODE_CLASS:
150 case VIDEO_ENHANCEMENT_CLASS:
155 static int live_lrc_layout(void *arg)
157 struct intel_gt *gt = arg;
158 struct intel_engine_cs *engine;
159 enum intel_engine_id id;
164 * Check the registers offsets we use to create the initial reg state
165 * match the layout saved by HW.
168 lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */
171 GEM_BUG_ON(offset_in_page(lrc));
174 for_each_engine(engine, gt, id) {
178 if (!engine->default_state)
181 hw = shmem_pin_map(engine->default_state);
186 hw += LRC_STATE_OFFSET / sizeof(*hw);
188 __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
189 engine->kernel_context, engine, true);
193 u32 lri = READ_ONCE(hw[dw]);
202 pr_debug("%s: skipped instruction %x at dword %d\n",
203 engine->name, lri, dw);
208 if ((lri & GENMASK(31, 23)) != LRI_HEADER) {
209 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
210 engine->name, dw, lri);
215 if (lrc[dw] != lri) {
216 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
217 engine->name, dw, lri, lrc[dw]);
223 * When bit 19 of MI_LOAD_REGISTER_IMM instruction
224 * opcode is set on Gen12+ devices, HW does not
225 * care about certain register address offsets, and
226 * instead check the following for valid address
227 * ranges on specific engines:
228 * RCS && CCS: BITS(0 - 10)
230 * VECS && VCS: BITS(0 - 13)
232 lri_mask = get_lri_mask(engine, lri);
239 u32 offset = READ_ONCE(hw[dw]);
241 if ((offset ^ lrc[dw]) & lri_mask) {
242 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
243 engine->name, dw, offset, lrc[dw]);
249 * Skip over the actual register value as we
250 * expect that to differ.
255 } while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
258 pr_info("%s: HW register image:\n", engine->name);
259 igt_hexdump(hw, PAGE_SIZE);
261 pr_info("%s: SW register image:\n", engine->name);
262 igt_hexdump(lrc, PAGE_SIZE);
265 shmem_unpin_map(engine->default_state, hw);
270 free_page((unsigned long)lrc);
274 static int find_offset(const u32 *lri, u32 offset)
278 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
279 if (lri[i] == offset)
285 static int live_lrc_fixed(void *arg)
287 struct intel_gt *gt = arg;
288 struct intel_engine_cs *engine;
289 enum intel_engine_id id;
293 * Check the assumed register offsets match the actual locations in
297 for_each_engine(engine, gt, id) {
304 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
309 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
314 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
319 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
324 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
325 lrc_ring_mi_mode(engine),
329 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
334 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
335 lrc_ring_wa_bb_per_ctx(engine),
336 "RING_BB_PER_CTX_PTR"
339 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
340 lrc_ring_indirect_ptr(engine),
341 "RING_INDIRECT_CTX_PTR"
344 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
345 lrc_ring_indirect_offset(engine),
346 "RING_INDIRECT_CTX_OFFSET"
349 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
354 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
355 lrc_ring_gpr0(engine),
359 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
360 lrc_ring_cmd_buf_cctl(engine),
364 i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)),
365 lrc_ring_bb_offset(engine),
372 if (!engine->default_state)
375 hw = shmem_pin_map(engine->default_state);
380 hw += LRC_STATE_OFFSET / sizeof(*hw);
382 for (t = tbl; t->name; t++) {
383 int dw = find_offset(hw, t->reg);
385 if (dw != t->offset) {
386 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
396 shmem_unpin_map(engine->default_state, hw);
402 static int __live_lrc_state(struct intel_engine_cs *engine,
403 struct i915_vma *scratch)
405 struct intel_context *ce;
406 struct i915_request *rq;
407 struct i915_gem_ww_ctx ww;
413 u32 expected[MAX_IDX];
418 ce = intel_context_create(engine);
422 i915_gem_ww_ctx_init(&ww, false);
424 err = i915_gem_object_lock(scratch->obj, &ww);
426 err = intel_context_pin_ww(ce, &ww);
430 rq = i915_request_create(ce);
436 cs = intel_ring_begin(rq, 4 * MAX_IDX);
439 i915_request_add(rq);
443 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
444 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
445 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
448 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
450 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
451 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
452 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
455 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
457 i915_request_get(rq);
458 i915_request_add(rq);
462 intel_engine_flush_submission(engine);
463 expected[RING_TAIL_IDX] = ce->ring->tail;
465 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
470 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
476 for (n = 0; n < MAX_IDX; n++) {
477 if (cs[n] != expected[n]) {
478 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
479 engine->name, n, cs[n], expected[n]);
485 i915_gem_object_unpin_map(scratch->obj);
488 i915_request_put(rq);
490 intel_context_unpin(ce);
492 if (err == -EDEADLK) {
493 err = i915_gem_ww_ctx_backoff(&ww);
497 i915_gem_ww_ctx_fini(&ww);
498 intel_context_put(ce);
502 static int live_lrc_state(void *arg)
504 struct intel_gt *gt = arg;
505 struct intel_engine_cs *engine;
506 struct i915_vma *scratch;
507 enum intel_engine_id id;
511 * Check the live register state matches what we expect for this
515 scratch = create_scratch(gt);
517 return PTR_ERR(scratch);
519 for_each_engine(engine, gt, id) {
520 err = __live_lrc_state(engine, scratch);
525 if (igt_flush_test(gt->i915))
528 i915_vma_unpin_and_release(&scratch, 0);
532 static int gpr_make_dirty(struct intel_context *ce)
534 struct i915_request *rq;
538 rq = intel_context_create_request(ce);
542 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
544 i915_request_add(rq);
548 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
549 for (n = 0; n < NUM_GPR_DW; n++) {
550 *cs++ = CS_GPR(ce->engine, n);
555 intel_ring_advance(rq, cs);
557 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
558 i915_request_add(rq);
563 static struct i915_request *
564 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
567 i915_ggtt_offset(ce->engine->status_page.vma) +
568 offset_in_page(slot);
569 struct i915_request *rq;
574 rq = intel_context_create_request(ce);
578 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
580 i915_request_add(rq);
584 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
587 *cs++ = MI_SEMAPHORE_WAIT |
588 MI_SEMAPHORE_GLOBAL_GTT |
590 MI_SEMAPHORE_SAD_NEQ_SDD;
595 for (n = 0; n < NUM_GPR_DW; n++) {
596 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
597 *cs++ = CS_GPR(ce->engine, n);
598 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
602 err = igt_vma_move_to_active_unlocked(scratch, rq, EXEC_OBJECT_WRITE);
604 i915_request_get(rq);
605 i915_request_add(rq);
607 i915_request_put(rq);
614 static int __live_lrc_gpr(struct intel_engine_cs *engine,
615 struct i915_vma *scratch,
618 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
619 struct intel_context *ce;
620 struct i915_request *rq;
625 if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS)
626 return 0; /* GPR only on rcs0 for gen8 */
628 err = gpr_make_dirty(engine->kernel_context);
632 ce = intel_context_create(engine);
636 rq = __gpr_read(ce, scratch, slot);
642 err = wait_for_submit(engine, rq, HZ / 2);
647 err = gpr_make_dirty(engine->kernel_context);
651 err = emit_semaphore_signal(engine->kernel_context, slot);
655 err = wait_for_submit(engine, rq, HZ / 2);
663 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
668 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
674 for (n = 0; n < NUM_GPR_DW; n++) {
676 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
678 n / 2, n & 1 ? "udw" : "ldw",
685 i915_gem_object_unpin_map(scratch->obj);
688 memset32(&slot[0], -1, 4);
690 i915_request_put(rq);
692 intel_context_put(ce);
696 static int live_lrc_gpr(void *arg)
698 struct intel_gt *gt = arg;
699 struct intel_engine_cs *engine;
700 struct i915_vma *scratch;
701 enum intel_engine_id id;
705 * Check that GPR registers are cleared in new contexts as we need
706 * to avoid leaking any information from previous contexts.
709 scratch = create_scratch(gt);
711 return PTR_ERR(scratch);
713 for_each_engine(engine, gt, id) {
714 st_engine_heartbeat_disable(engine);
716 err = __live_lrc_gpr(engine, scratch, false);
720 err = __live_lrc_gpr(engine, scratch, true);
725 st_engine_heartbeat_enable(engine);
726 if (igt_flush_test(gt->i915))
732 i915_vma_unpin_and_release(&scratch, 0);
736 static struct i915_request *
737 create_timestamp(struct intel_context *ce, void *slot, int idx)
740 i915_ggtt_offset(ce->engine->status_page.vma) +
741 offset_in_page(slot);
742 struct i915_request *rq;
746 rq = intel_context_create_request(ce);
750 cs = intel_ring_begin(rq, 10);
756 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
759 *cs++ = MI_SEMAPHORE_WAIT |
760 MI_SEMAPHORE_GLOBAL_GTT |
762 MI_SEMAPHORE_SAD_NEQ_SDD;
767 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
768 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
769 *cs++ = offset + idx * sizeof(u32);
772 intel_ring_advance(rq, cs);
776 i915_request_get(rq);
777 i915_request_add(rq);
779 i915_request_put(rq);
786 struct lrc_timestamp {
787 struct intel_engine_cs *engine;
788 struct intel_context *ce[2];
792 static bool timestamp_advanced(u32 start, u32 end)
794 return (s32)(end - start) > 0;
797 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
799 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
800 struct i915_request *rq;
804 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
805 rq = create_timestamp(arg->ce[0], slot, 1);
809 err = wait_for_submit(rq->engine, rq, HZ / 2);
814 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
815 err = emit_semaphore_signal(arg->ce[1], slot);
823 /* And wait for switch to kernel (to save our context to memory) */
824 err = context_flush(arg->ce[0], HZ / 2);
828 if (!timestamp_advanced(arg->poison, slot[1])) {
829 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
830 arg->engine->name, preempt ? "preempt" : "simple",
831 arg->poison, slot[1]);
835 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
836 if (!timestamp_advanced(slot[1], timestamp)) {
837 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
838 arg->engine->name, preempt ? "preempt" : "simple",
844 memset32(slot, -1, 4);
845 i915_request_put(rq);
849 static int live_lrc_timestamp(void *arg)
851 struct lrc_timestamp data = {};
852 struct intel_gt *gt = arg;
853 enum intel_engine_id id;
854 const u32 poison[] = {
862 * We want to verify that the timestamp is saved and restore across
863 * context switches and is monotonic.
865 * So we do this with a little bit of LRC poisoning to check various
866 * boundary conditions, and see what happens if we preempt the context
867 * with a second request (carrying more poison into the timestamp).
870 for_each_engine(data.engine, gt, id) {
873 st_engine_heartbeat_disable(data.engine);
875 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
876 struct intel_context *tmp;
878 tmp = intel_context_create(data.engine);
884 err = intel_context_pin(tmp);
886 intel_context_put(tmp);
893 for (i = 0; i < ARRAY_SIZE(poison); i++) {
894 data.poison = poison[i];
896 err = __lrc_timestamp(&data, false);
900 err = __lrc_timestamp(&data, true);
906 st_engine_heartbeat_enable(data.engine);
907 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
911 intel_context_unpin(data.ce[i]);
912 intel_context_put(data.ce[i]);
915 if (igt_flush_test(gt->i915))
924 static struct i915_vma *
925 create_user_vma(struct i915_address_space *vm, unsigned long size)
927 struct drm_i915_gem_object *obj;
928 struct i915_vma *vma;
931 obj = i915_gem_object_create_internal(vm->i915, size);
933 return ERR_CAST(obj);
935 vma = i915_vma_instance(obj, vm, NULL);
937 i915_gem_object_put(obj);
941 err = i915_vma_pin(vma, 0, 0, PIN_USER);
943 i915_gem_object_put(obj);
950 static u32 safe_poison(u32 offset, u32 poison)
953 * Do not enable predication as it will nop all subsequent commands,
954 * not only disabling the tests (by preventing all the other SRM) but
955 * also preventing the arbitration events at the end of the request.
957 if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
958 poison &= ~REG_BIT(0);
963 static struct i915_vma *
964 store_context(struct intel_context *ce, struct i915_vma *scratch)
966 struct i915_vma *batch;
970 batch = create_user_vma(ce->vm, SZ_64K);
974 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
980 defaults = shmem_pin_map(ce->engine->default_state);
982 i915_gem_object_unpin_map(batch->obj);
984 return ERR_PTR(-ENOMEM);
990 hw += LRC_STATE_OFFSET / sizeof(*hw);
992 u32 len = hw[dw] & LRI_LENGTH_MASK;
995 * Keep it simple, skip parsing complex commands
997 * At present, there are no more MI_LOAD_REGISTER_IMM
998 * commands after the first 3D state command. Rather
999 * than include a table (see i915_cmd_parser.c) of all
1000 * the possible commands and their instruction lengths
1001 * (or mask for variable length instructions), assume
1002 * we have gathered the complete list of registers and
1005 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1013 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1014 /* Assume all other MI commands match LRI length mask */
1020 pr_err("%s: invalid LRI found in context image\n",
1022 igt_hexdump(defaults, PAGE_SIZE);
1027 len = (len + 1) / 2;
1029 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
1031 *cs++ = lower_32_bits(i915_vma_offset(scratch) + x);
1032 *cs++ = upper_32_bits(i915_vma_offset(scratch) + x);
1037 } while (dw < PAGE_SIZE / sizeof(u32) &&
1038 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1040 *cs++ = MI_BATCH_BUFFER_END;
1042 shmem_unpin_map(ce->engine->default_state, defaults);
1044 i915_gem_object_flush_map(batch->obj);
1045 i915_gem_object_unpin_map(batch->obj);
1050 static struct i915_request *
1051 record_registers(struct intel_context *ce,
1052 struct i915_vma *before,
1053 struct i915_vma *after,
1056 struct i915_vma *b_before, *b_after;
1057 struct i915_request *rq;
1061 b_before = store_context(ce, before);
1062 if (IS_ERR(b_before))
1063 return ERR_CAST(b_before);
1065 b_after = store_context(ce, after);
1066 if (IS_ERR(b_after)) {
1067 rq = ERR_CAST(b_after);
1071 rq = intel_context_create_request(ce);
1075 err = igt_vma_move_to_active_unlocked(before, rq, EXEC_OBJECT_WRITE);
1079 err = igt_vma_move_to_active_unlocked(b_before, rq, 0);
1083 err = igt_vma_move_to_active_unlocked(after, rq, EXEC_OBJECT_WRITE);
1087 err = igt_vma_move_to_active_unlocked(b_after, rq, 0);
1091 cs = intel_ring_begin(rq, 14);
1097 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1098 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1099 *cs++ = lower_32_bits(i915_vma_offset(b_before));
1100 *cs++ = upper_32_bits(i915_vma_offset(b_before));
1102 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1103 *cs++ = MI_SEMAPHORE_WAIT |
1104 MI_SEMAPHORE_GLOBAL_GTT |
1106 MI_SEMAPHORE_SAD_NEQ_SDD;
1108 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1109 offset_in_page(sema);
1113 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1114 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1115 *cs++ = lower_32_bits(i915_vma_offset(b_after));
1116 *cs++ = upper_32_bits(i915_vma_offset(b_after));
1118 intel_ring_advance(rq, cs);
1120 WRITE_ONCE(*sema, 0);
1121 i915_request_get(rq);
1122 i915_request_add(rq);
1124 i915_vma_put(b_after);
1126 i915_vma_put(b_before);
1130 i915_request_add(rq);
1135 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
1137 struct i915_vma *batch;
1141 batch = create_user_vma(ce->vm, SZ_64K);
1145 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1147 i915_vma_put(batch);
1148 return ERR_CAST(cs);
1151 defaults = shmem_pin_map(ce->engine->default_state);
1153 i915_gem_object_unpin_map(batch->obj);
1154 i915_vma_put(batch);
1155 return ERR_PTR(-ENOMEM);
1160 hw += LRC_STATE_OFFSET / sizeof(*hw);
1162 u32 len = hw[dw] & LRI_LENGTH_MASK;
1164 /* For simplicity, break parsing at the first complex command */
1165 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1173 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1179 pr_err("%s: invalid LRI found in context image\n",
1181 igt_hexdump(defaults, PAGE_SIZE);
1186 len = (len + 1) / 2;
1187 *cs++ = MI_LOAD_REGISTER_IMM(len);
1190 *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
1191 MI_LRI_LRM_CS_MMIO),
1195 } while (dw < PAGE_SIZE / sizeof(u32) &&
1196 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1198 *cs++ = MI_BATCH_BUFFER_END;
1200 shmem_unpin_map(ce->engine->default_state, defaults);
1202 i915_gem_object_flush_map(batch->obj);
1203 i915_gem_object_unpin_map(batch->obj);
1208 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
1210 struct i915_request *rq;
1211 struct i915_vma *batch;
1215 batch = load_context(ce, poison);
1217 return PTR_ERR(batch);
1219 rq = intel_context_create_request(ce);
1225 err = igt_vma_move_to_active_unlocked(batch, rq, 0);
1229 cs = intel_ring_begin(rq, 8);
1235 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1236 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1237 *cs++ = lower_32_bits(i915_vma_offset(batch));
1238 *cs++ = upper_32_bits(i915_vma_offset(batch));
1240 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1241 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1242 offset_in_page(sema);
1246 intel_ring_advance(rq, cs);
1248 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1250 i915_request_add(rq);
1252 i915_vma_put(batch);
1256 static bool is_moving(u32 a, u32 b)
1261 static int compare_isolation(struct intel_engine_cs *engine,
1262 struct i915_vma *ref[2],
1263 struct i915_vma *result[2],
1264 struct intel_context *ce,
1267 u32 x, dw, *hw, *lrc;
1272 A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC);
1274 return PTR_ERR(A[0]);
1276 A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC);
1278 err = PTR_ERR(A[1]);
1282 B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC);
1284 err = PTR_ERR(B[0]);
1288 B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC);
1290 err = PTR_ERR(B[1]);
1294 lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
1295 intel_gt_coherent_map_type(engine->gt,
1302 lrc += LRC_STATE_OFFSET / sizeof(*hw);
1304 defaults = shmem_pin_map(ce->engine->default_state);
1313 hw += LRC_STATE_OFFSET / sizeof(*hw);
1315 u32 len = hw[dw] & LRI_LENGTH_MASK;
1317 /* For simplicity, break parsing at the first complex command */
1318 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1326 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1332 pr_err("%s: invalid LRI found in context image\n",
1334 igt_hexdump(defaults, PAGE_SIZE);
1339 len = (len + 1) / 2;
1341 if (!is_moving(A[0][x], A[1][x]) &&
1342 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
1343 switch (hw[dw] & 4095) {
1344 case 0x30: /* RING_HEAD */
1345 case 0x34: /* RING_TAIL */
1349 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
1352 A[0][x], B[0][x], B[1][x],
1353 poison, lrc[dw + 1]);
1360 } while (dw < PAGE_SIZE / sizeof(u32) &&
1361 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1363 shmem_unpin_map(ce->engine->default_state, defaults);
1365 i915_gem_object_unpin_map(ce->state->obj);
1367 i915_gem_object_unpin_map(result[1]->obj);
1369 i915_gem_object_unpin_map(result[0]->obj);
1371 i915_gem_object_unpin_map(ref[1]->obj);
1373 i915_gem_object_unpin_map(ref[0]->obj);
1377 static struct i915_vma *
1378 create_result_vma(struct i915_address_space *vm, unsigned long sz)
1380 struct i915_vma *vma;
1383 vma = create_user_vma(vm, sz);
1387 /* Set the results to a known value distinct from the poison */
1388 ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC);
1391 return ERR_CAST(ptr);
1394 memset(ptr, POISON_INUSE, vma->size);
1395 i915_gem_object_flush_map(vma->obj);
1396 i915_gem_object_unpin_map(vma->obj);
1401 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
1403 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
1404 struct i915_vma *ref[2], *result[2];
1405 struct intel_context *A, *B;
1406 struct i915_request *rq;
1409 A = intel_context_create(engine);
1413 B = intel_context_create(engine);
1419 ref[0] = create_result_vma(A->vm, SZ_64K);
1420 if (IS_ERR(ref[0])) {
1421 err = PTR_ERR(ref[0]);
1425 ref[1] = create_result_vma(A->vm, SZ_64K);
1426 if (IS_ERR(ref[1])) {
1427 err = PTR_ERR(ref[1]);
1431 rq = record_registers(A, ref[0], ref[1], sema);
1437 WRITE_ONCE(*sema, 1);
1440 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1441 i915_request_put(rq);
1445 i915_request_put(rq);
1447 result[0] = create_result_vma(A->vm, SZ_64K);
1448 if (IS_ERR(result[0])) {
1449 err = PTR_ERR(result[0]);
1453 result[1] = create_result_vma(A->vm, SZ_64K);
1454 if (IS_ERR(result[1])) {
1455 err = PTR_ERR(result[1]);
1459 rq = record_registers(A, result[0], result[1], sema);
1465 err = poison_registers(B, poison, sema);
1466 if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) {
1467 pr_err("%s(%s): wait for results timed out\n",
1468 __func__, engine->name);
1472 /* Always cancel the semaphore wait, just in case the GPU gets stuck */
1473 WRITE_ONCE(*sema, -1);
1474 i915_request_put(rq);
1478 err = compare_isolation(engine, ref, result, A, poison);
1481 i915_vma_put(result[1]);
1483 i915_vma_put(result[0]);
1485 i915_vma_put(ref[1]);
1487 i915_vma_put(ref[0]);
1489 intel_context_put(B);
1491 intel_context_put(A);
1495 static bool skip_isolation(const struct intel_engine_cs *engine)
1497 if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9)
1500 if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11)
1506 static int live_lrc_isolation(void *arg)
1508 struct intel_gt *gt = arg;
1509 struct intel_engine_cs *engine;
1510 enum intel_engine_id id;
1511 const u32 poison[] = {
1521 * Our goal is try and verify that per-context state cannot be
1522 * tampered with by another non-privileged client.
1524 * We take the list of context registers from the LRI in the default
1525 * context image and attempt to modify that list from a remote context.
1528 for_each_engine(engine, gt, id) {
1531 /* Just don't even ask */
1532 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
1533 skip_isolation(engine))
1536 intel_engine_pm_get(engine);
1537 for (i = 0; i < ARRAY_SIZE(poison); i++) {
1540 result = __lrc_isolation(engine, poison[i]);
1544 result = __lrc_isolation(engine, ~poison[i]);
1548 intel_engine_pm_put(engine);
1549 if (igt_flush_test(gt->i915)) {
1558 static int wabb_ctx_submit_req(struct intel_context *ce)
1560 struct i915_request *rq;
1563 rq = intel_context_create_request(ce);
1567 i915_request_get(rq);
1568 i915_request_add(rq);
1570 if (i915_request_wait(rq, 0, HZ / 5) < 0)
1573 i915_request_put(rq);
1578 #define CTX_BB_CANARY_OFFSET (3 * 1024)
1579 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
1582 emit_wabb_ctx_canary(const struct intel_context *ce,
1583 u32 *cs, bool per_ctx)
1585 *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
1586 MI_SRM_LRM_GLOBAL_GTT |
1588 *cs++ = i915_mmio_reg_offset(RING_START(0));
1589 *cs++ = i915_ggtt_offset(ce->state) +
1590 context_wa_bb_offset(ce) +
1591 CTX_BB_CANARY_OFFSET +
1592 (per_ctx ? PAGE_SIZE : 0);
1599 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1601 return emit_wabb_ctx_canary(ce, cs, false);
1605 emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1607 return emit_wabb_ctx_canary(ce, cs, true);
1611 wabb_ctx_setup(struct intel_context *ce, bool per_ctx)
1613 u32 *cs = context_wabb(ce, per_ctx);
1615 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
1618 setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary);
1620 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
1623 static bool check_ring_start(struct intel_context *ce, bool per_ctx)
1625 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
1626 LRC_STATE_OFFSET + context_wa_bb_offset(ce) +
1627 (per_ctx ? PAGE_SIZE : 0);
1629 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
1632 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
1633 ctx_bb[CTX_BB_CANARY_INDEX],
1634 ce->lrc_reg_state[CTX_RING_START]);
1639 static int wabb_ctx_check(struct intel_context *ce, bool per_ctx)
1643 err = wabb_ctx_submit_req(ce);
1647 if (!check_ring_start(ce, per_ctx))
1653 static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx)
1655 struct intel_context *a, *b;
1658 a = intel_context_create(engine);
1661 err = intel_context_pin(a);
1665 b = intel_context_create(engine);
1670 err = intel_context_pin(b);
1674 /* We use the already reserved extra page in context state */
1675 if (!a->wa_bb_page) {
1676 GEM_BUG_ON(b->wa_bb_page);
1677 GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12);
1682 * In order to test that our per context bb is truly per context,
1683 * and executes at the intended spot on context restoring process,
1684 * make the batch store the ring start value to memory.
1685 * As ring start is restored apriori of starting the indirect ctx bb and
1686 * as it will be different for each context, it fits to this purpose.
1688 wabb_ctx_setup(a, per_ctx);
1689 wabb_ctx_setup(b, per_ctx);
1691 err = wabb_ctx_check(a, per_ctx);
1695 err = wabb_ctx_check(b, per_ctx);
1698 intel_context_unpin(b);
1700 intel_context_put(b);
1702 intel_context_unpin(a);
1704 intel_context_put(a);
1709 static int lrc_wabb_ctx(void *arg, bool per_ctx)
1711 struct intel_gt *gt = arg;
1712 struct intel_engine_cs *engine;
1713 enum intel_engine_id id;
1716 for_each_engine(engine, gt, id) {
1717 intel_engine_pm_get(engine);
1718 err = __lrc_wabb_ctx(engine, per_ctx);
1719 intel_engine_pm_put(engine);
1721 if (igt_flush_test(gt->i915))
1731 static int live_lrc_indirect_ctx_bb(void *arg)
1733 return lrc_wabb_ctx(arg, false);
1736 static int live_lrc_per_ctx_bb(void *arg)
1738 return lrc_wabb_ctx(arg, true);
1741 static void garbage_reset(struct intel_engine_cs *engine,
1742 struct i915_request *rq)
1744 const unsigned int bit = I915_RESET_ENGINE + engine->id;
1745 unsigned long *lock = &engine->gt->reset.flags;
1748 if (!test_and_set_bit(bit, lock)) {
1749 tasklet_disable(&engine->sched_engine->tasklet);
1751 if (!rq->fence.error)
1752 __intel_engine_reset_bh(engine, NULL);
1754 tasklet_enable(&engine->sched_engine->tasklet);
1755 clear_and_wake_up_bit(bit, lock);
1760 static struct i915_request *garbage(struct intel_context *ce,
1761 struct rnd_state *prng)
1763 struct i915_request *rq;
1766 err = intel_context_pin(ce);
1768 return ERR_PTR(err);
1770 prandom_bytes_state(prng,
1772 ce->engine->context_size -
1775 rq = intel_context_create_request(ce);
1781 i915_request_get(rq);
1782 i915_request_add(rq);
1786 intel_context_unpin(ce);
1787 return ERR_PTR(err);
1790 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
1792 struct intel_context *ce;
1793 struct i915_request *hang;
1796 ce = intel_context_create(engine);
1800 hang = garbage(ce, prng);
1802 err = PTR_ERR(hang);
1806 if (wait_for_submit(engine, hang, HZ / 2)) {
1807 i915_request_put(hang);
1812 intel_context_set_banned(ce);
1813 garbage_reset(engine, hang);
1815 intel_engine_flush_submission(engine);
1816 if (!hang->fence.error) {
1817 i915_request_put(hang);
1818 pr_err("%s: corrupted context was not reset\n",
1824 if (i915_request_wait(hang, 0, HZ / 2) < 0) {
1825 pr_err("%s: corrupted context did not recover\n",
1827 i915_request_put(hang);
1831 i915_request_put(hang);
1834 intel_context_put(ce);
1838 static int live_lrc_garbage(void *arg)
1840 struct intel_gt *gt = arg;
1841 struct intel_engine_cs *engine;
1842 enum intel_engine_id id;
1845 * Verify that we can recover if one context state is completely
1849 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
1852 for_each_engine(engine, gt, id) {
1853 I915_RND_STATE(prng);
1856 if (!intel_has_reset_engine(engine->gt))
1859 intel_engine_pm_get(engine);
1860 for (i = 0; i < 3; i++) {
1861 err = __lrc_garbage(engine, &prng);
1865 intel_engine_pm_put(engine);
1867 if (igt_flush_test(gt->i915))
1876 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
1878 struct intel_context *ce;
1879 struct i915_request *rq;
1880 IGT_TIMEOUT(end_time);
1883 ce = intel_context_create(engine);
1887 ce->stats.runtime.num_underflow = 0;
1888 ce->stats.runtime.max_underflow = 0;
1891 unsigned int loop = 1024;
1894 rq = intel_context_create_request(ce);
1901 i915_request_get(rq);
1903 i915_request_add(rq);
1906 if (__igt_timeout(end_time, NULL))
1909 i915_request_put(rq);
1912 err = i915_request_wait(rq, 0, HZ / 5);
1914 pr_err("%s: request not completed!\n", engine->name);
1918 igt_flush_test(engine->i915);
1920 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
1922 intel_context_get_total_runtime_ns(ce),
1923 intel_context_get_avg_runtime_ns(ce));
1926 if (ce->stats.runtime.num_underflow) {
1927 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
1929 ce->stats.runtime.num_underflow,
1930 ce->stats.runtime.max_underflow);
1936 i915_request_put(rq);
1938 intel_context_put(ce);
1942 static int live_pphwsp_runtime(void *arg)
1944 struct intel_gt *gt = arg;
1945 struct intel_engine_cs *engine;
1946 enum intel_engine_id id;
1950 * Check that cumulative context runtime as stored in the pphwsp[16]
1954 for_each_engine(engine, gt, id) {
1955 err = __live_pphwsp_runtime(engine);
1960 if (igt_flush_test(gt->i915))
1966 int intel_lrc_live_selftests(struct drm_i915_private *i915)
1968 static const struct i915_subtest tests[] = {
1969 SUBTEST(live_lrc_layout),
1970 SUBTEST(live_lrc_fixed),
1971 SUBTEST(live_lrc_state),
1972 SUBTEST(live_lrc_gpr),
1973 SUBTEST(live_lrc_isolation),
1974 SUBTEST(live_lrc_timestamp),
1975 SUBTEST(live_lrc_garbage),
1976 SUBTEST(live_pphwsp_runtime),
1977 SUBTEST(live_lrc_indirect_ctx_bb),
1978 SUBTEST(live_lrc_per_ctx_bb),
1981 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
1984 return intel_gt_live_subtests(tests, to_gt(i915));