]> Git Repo - J-linux.git/blob - drivers/gpu/drm/i915/gt/selftest_lrc.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / drivers / gpu / drm / i915 / gt / selftest_lrc.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5
6 #include <linux/prime_numbers.h>
7
8 #include "gem/i915_gem_internal.h"
9
10 #include "i915_selftest.h"
11 #include "intel_engine_heartbeat.h"
12 #include "intel_engine_pm.h"
13 #include "intel_reset.h"
14 #include "intel_ring.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftests/i915_random.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_live_test.h"
19 #include "selftests/igt_spinner.h"
20 #include "selftests/lib_sw_fence.h"
21 #include "shmem_utils.h"
22
23 #include "gem/selftests/igt_gem_utils.h"
24 #include "gem/selftests/mock_context.h"
25
26 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
27 #define NUM_GPR 16
28 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
29
30 #define LRI_HEADER MI_INSTR(0x22, 0)
31 #define LRI_LENGTH_MASK GENMASK(7, 0)
32
33 static struct i915_vma *create_scratch(struct intel_gt *gt)
34 {
35         return __vm_create_scratch_for_read_pinned(&gt->ggtt->vm, PAGE_SIZE);
36 }
37
38 static bool is_active(struct i915_request *rq)
39 {
40         if (i915_request_is_active(rq))
41                 return true;
42
43         if (i915_request_on_hold(rq))
44                 return true;
45
46         if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
47                 return true;
48
49         return false;
50 }
51
52 static int wait_for_submit(struct intel_engine_cs *engine,
53                            struct i915_request *rq,
54                            unsigned long timeout)
55 {
56         /* Ignore our own attempts to suppress excess tasklets */
57         tasklet_hi_schedule(&engine->sched_engine->tasklet);
58
59         timeout += jiffies;
60         do {
61                 bool done = time_after(jiffies, timeout);
62
63                 if (i915_request_completed(rq)) /* that was quick! */
64                         return 0;
65
66                 /* Wait until the HW has acknowleged the submission (or err) */
67                 intel_engine_flush_submission(engine);
68                 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
69                         return 0;
70
71                 if (done)
72                         return -ETIME;
73
74                 cond_resched();
75         } while (1);
76 }
77
78 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
79 {
80         const u32 offset =
81                 i915_ggtt_offset(ce->engine->status_page.vma) +
82                 offset_in_page(slot);
83         struct i915_request *rq;
84         u32 *cs;
85
86         rq = intel_context_create_request(ce);
87         if (IS_ERR(rq))
88                 return PTR_ERR(rq);
89
90         cs = intel_ring_begin(rq, 4);
91         if (IS_ERR(cs)) {
92                 i915_request_add(rq);
93                 return PTR_ERR(cs);
94         }
95
96         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
97         *cs++ = offset;
98         *cs++ = 0;
99         *cs++ = 1;
100
101         intel_ring_advance(rq, cs);
102
103         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
104         i915_request_add(rq);
105         return 0;
106 }
107
108 static int context_flush(struct intel_context *ce, long timeout)
109 {
110         struct i915_request *rq;
111         struct dma_fence *fence;
112         int err = 0;
113
114         rq = intel_engine_create_kernel_request(ce->engine);
115         if (IS_ERR(rq))
116                 return PTR_ERR(rq);
117
118         fence = i915_active_fence_get(&ce->timeline->last_request);
119         if (fence) {
120                 i915_request_await_dma_fence(rq, fence);
121                 dma_fence_put(fence);
122         }
123
124         rq = i915_request_get(rq);
125         i915_request_add(rq);
126         if (i915_request_wait(rq, 0, timeout) < 0)
127                 err = -ETIME;
128         i915_request_put(rq);
129
130         rmb(); /* We know the request is written, make sure all state is too! */
131         return err;
132 }
133
134 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
135 {
136         if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
137                 return ~0u;
138
139         if (GRAPHICS_VER(engine->i915) < 12)
140                 return 0xfff;
141
142         switch (engine->class) {
143         default:
144         case RENDER_CLASS:
145         case COMPUTE_CLASS:
146                 return 0x07ff;
147         case COPY_ENGINE_CLASS:
148                 return 0x0fff;
149         case VIDEO_DECODE_CLASS:
150         case VIDEO_ENHANCEMENT_CLASS:
151                 return 0x3fff;
152         }
153 }
154
155 static int live_lrc_layout(void *arg)
156 {
157         struct intel_gt *gt = arg;
158         struct intel_engine_cs *engine;
159         enum intel_engine_id id;
160         u32 *lrc;
161         int err;
162
163         /*
164          * Check the registers offsets we use to create the initial reg state
165          * match the layout saved by HW.
166          */
167
168         lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */
169         if (!lrc)
170                 return -ENOMEM;
171         GEM_BUG_ON(offset_in_page(lrc));
172
173         err = 0;
174         for_each_engine(engine, gt, id) {
175                 u32 *hw;
176                 int dw;
177
178                 if (!engine->default_state)
179                         continue;
180
181                 hw = shmem_pin_map(engine->default_state);
182                 if (!hw) {
183                         err = -ENOMEM;
184                         break;
185                 }
186                 hw += LRC_STATE_OFFSET / sizeof(*hw);
187
188                 __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
189                                 engine->kernel_context, engine, true);
190
191                 dw = 0;
192                 do {
193                         u32 lri = READ_ONCE(hw[dw]);
194                         u32 lri_mask;
195
196                         if (lri == 0) {
197                                 dw++;
198                                 continue;
199                         }
200
201                         if (lrc[dw] == 0) {
202                                 pr_debug("%s: skipped instruction %x at dword %d\n",
203                                          engine->name, lri, dw);
204                                 dw++;
205                                 continue;
206                         }
207
208                         if ((lri & GENMASK(31, 23)) != LRI_HEADER) {
209                                 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
210                                        engine->name, dw, lri);
211                                 err = -EINVAL;
212                                 break;
213                         }
214
215                         if (lrc[dw] != lri) {
216                                 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
217                                        engine->name, dw, lri, lrc[dw]);
218                                 err = -EINVAL;
219                                 break;
220                         }
221
222                         /*
223                          * When bit 19 of MI_LOAD_REGISTER_IMM instruction
224                          * opcode is set on Gen12+ devices, HW does not
225                          * care about certain register address offsets, and
226                          * instead check the following for valid address
227                          * ranges on specific engines:
228                          * RCS && CCS: BITS(0 - 10)
229                          * BCS: BITS(0 - 11)
230                          * VECS && VCS: BITS(0 - 13)
231                          */
232                         lri_mask = get_lri_mask(engine, lri);
233
234                         lri &= 0x7f;
235                         lri++;
236                         dw++;
237
238                         while (lri) {
239                                 u32 offset = READ_ONCE(hw[dw]);
240
241                                 if ((offset ^ lrc[dw]) & lri_mask) {
242                                         pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
243                                                engine->name, dw, offset, lrc[dw]);
244                                         err = -EINVAL;
245                                         break;
246                                 }
247
248                                 /*
249                                  * Skip over the actual register value as we
250                                  * expect that to differ.
251                                  */
252                                 dw += 2;
253                                 lri -= 2;
254                         }
255                 } while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
256
257                 if (err) {
258                         pr_info("%s: HW register image:\n", engine->name);
259                         igt_hexdump(hw, PAGE_SIZE);
260
261                         pr_info("%s: SW register image:\n", engine->name);
262                         igt_hexdump(lrc, PAGE_SIZE);
263                 }
264
265                 shmem_unpin_map(engine->default_state, hw);
266                 if (err)
267                         break;
268         }
269
270         free_page((unsigned long)lrc);
271         return err;
272 }
273
274 static int find_offset(const u32 *lri, u32 offset)
275 {
276         int i;
277
278         for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
279                 if (lri[i] == offset)
280                         return i;
281
282         return -1;
283 }
284
285 static int live_lrc_fixed(void *arg)
286 {
287         struct intel_gt *gt = arg;
288         struct intel_engine_cs *engine;
289         enum intel_engine_id id;
290         int err = 0;
291
292         /*
293          * Check the assumed register offsets match the actual locations in
294          * the context image.
295          */
296
297         for_each_engine(engine, gt, id) {
298                 const struct {
299                         u32 reg;
300                         u32 offset;
301                         const char *name;
302                 } tbl[] = {
303                         {
304                                 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
305                                 CTX_RING_START - 1,
306                                 "RING_START"
307                         },
308                         {
309                                 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
310                                 CTX_RING_CTL - 1,
311                                 "RING_CTL"
312                         },
313                         {
314                                 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
315                                 CTX_RING_HEAD - 1,
316                                 "RING_HEAD"
317                         },
318                         {
319                                 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
320                                 CTX_RING_TAIL - 1,
321                                 "RING_TAIL"
322                         },
323                         {
324                                 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
325                                 lrc_ring_mi_mode(engine),
326                                 "RING_MI_MODE"
327                         },
328                         {
329                                 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
330                                 CTX_BB_STATE - 1,
331                                 "BB_STATE"
332                         },
333                         {
334                                 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
335                                 lrc_ring_wa_bb_per_ctx(engine),
336                                 "RING_BB_PER_CTX_PTR"
337                         },
338                         {
339                                 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
340                                 lrc_ring_indirect_ptr(engine),
341                                 "RING_INDIRECT_CTX_PTR"
342                         },
343                         {
344                                 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
345                                 lrc_ring_indirect_offset(engine),
346                                 "RING_INDIRECT_CTX_OFFSET"
347                         },
348                         {
349                                 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
350                                 CTX_TIMESTAMP - 1,
351                                 "RING_CTX_TIMESTAMP"
352                         },
353                         {
354                                 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
355                                 lrc_ring_gpr0(engine),
356                                 "RING_CS_GPR0"
357                         },
358                         {
359                                 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
360                                 lrc_ring_cmd_buf_cctl(engine),
361                                 "RING_CMD_BUF_CCTL"
362                         },
363                         {
364                                 i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)),
365                                 lrc_ring_bb_offset(engine),
366                                 "RING_BB_OFFSET"
367                         },
368                         { },
369                 }, *t;
370                 u32 *hw;
371
372                 if (!engine->default_state)
373                         continue;
374
375                 hw = shmem_pin_map(engine->default_state);
376                 if (!hw) {
377                         err = -ENOMEM;
378                         break;
379                 }
380                 hw += LRC_STATE_OFFSET / sizeof(*hw);
381
382                 for (t = tbl; t->name; t++) {
383                         int dw = find_offset(hw, t->reg);
384
385                         if (dw != t->offset) {
386                                 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
387                                        engine->name,
388                                        t->name,
389                                        t->reg,
390                                        dw,
391                                        t->offset);
392                                 err = -EINVAL;
393                         }
394                 }
395
396                 shmem_unpin_map(engine->default_state, hw);
397         }
398
399         return err;
400 }
401
402 static int __live_lrc_state(struct intel_engine_cs *engine,
403                             struct i915_vma *scratch)
404 {
405         struct intel_context *ce;
406         struct i915_request *rq;
407         struct i915_gem_ww_ctx ww;
408         enum {
409                 RING_START_IDX = 0,
410                 RING_TAIL_IDX,
411                 MAX_IDX
412         };
413         u32 expected[MAX_IDX];
414         u32 *cs;
415         int err;
416         int n;
417
418         ce = intel_context_create(engine);
419         if (IS_ERR(ce))
420                 return PTR_ERR(ce);
421
422         i915_gem_ww_ctx_init(&ww, false);
423 retry:
424         err = i915_gem_object_lock(scratch->obj, &ww);
425         if (!err)
426                 err = intel_context_pin_ww(ce, &ww);
427         if (err)
428                 goto err_put;
429
430         rq = i915_request_create(ce);
431         if (IS_ERR(rq)) {
432                 err = PTR_ERR(rq);
433                 goto err_unpin;
434         }
435
436         cs = intel_ring_begin(rq, 4 * MAX_IDX);
437         if (IS_ERR(cs)) {
438                 err = PTR_ERR(cs);
439                 i915_request_add(rq);
440                 goto err_unpin;
441         }
442
443         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
444         *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
445         *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
446         *cs++ = 0;
447
448         expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
449
450         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
451         *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
452         *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
453         *cs++ = 0;
454
455         err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
456
457         i915_request_get(rq);
458         i915_request_add(rq);
459         if (err)
460                 goto err_rq;
461
462         intel_engine_flush_submission(engine);
463         expected[RING_TAIL_IDX] = ce->ring->tail;
464
465         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
466                 err = -ETIME;
467                 goto err_rq;
468         }
469
470         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
471         if (IS_ERR(cs)) {
472                 err = PTR_ERR(cs);
473                 goto err_rq;
474         }
475
476         for (n = 0; n < MAX_IDX; n++) {
477                 if (cs[n] != expected[n]) {
478                         pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
479                                engine->name, n, cs[n], expected[n]);
480                         err = -EINVAL;
481                         break;
482                 }
483         }
484
485         i915_gem_object_unpin_map(scratch->obj);
486
487 err_rq:
488         i915_request_put(rq);
489 err_unpin:
490         intel_context_unpin(ce);
491 err_put:
492         if (err == -EDEADLK) {
493                 err = i915_gem_ww_ctx_backoff(&ww);
494                 if (!err)
495                         goto retry;
496         }
497         i915_gem_ww_ctx_fini(&ww);
498         intel_context_put(ce);
499         return err;
500 }
501
502 static int live_lrc_state(void *arg)
503 {
504         struct intel_gt *gt = arg;
505         struct intel_engine_cs *engine;
506         struct i915_vma *scratch;
507         enum intel_engine_id id;
508         int err = 0;
509
510         /*
511          * Check the live register state matches what we expect for this
512          * intel_context.
513          */
514
515         scratch = create_scratch(gt);
516         if (IS_ERR(scratch))
517                 return PTR_ERR(scratch);
518
519         for_each_engine(engine, gt, id) {
520                 err = __live_lrc_state(engine, scratch);
521                 if (err)
522                         break;
523         }
524
525         if (igt_flush_test(gt->i915))
526                 err = -EIO;
527
528         i915_vma_unpin_and_release(&scratch, 0);
529         return err;
530 }
531
532 static int gpr_make_dirty(struct intel_context *ce)
533 {
534         struct i915_request *rq;
535         u32 *cs;
536         int n;
537
538         rq = intel_context_create_request(ce);
539         if (IS_ERR(rq))
540                 return PTR_ERR(rq);
541
542         cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
543         if (IS_ERR(cs)) {
544                 i915_request_add(rq);
545                 return PTR_ERR(cs);
546         }
547
548         *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
549         for (n = 0; n < NUM_GPR_DW; n++) {
550                 *cs++ = CS_GPR(ce->engine, n);
551                 *cs++ = STACK_MAGIC;
552         }
553         *cs++ = MI_NOOP;
554
555         intel_ring_advance(rq, cs);
556
557         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
558         i915_request_add(rq);
559
560         return 0;
561 }
562
563 static struct i915_request *
564 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
565 {
566         const u32 offset =
567                 i915_ggtt_offset(ce->engine->status_page.vma) +
568                 offset_in_page(slot);
569         struct i915_request *rq;
570         u32 *cs;
571         int err;
572         int n;
573
574         rq = intel_context_create_request(ce);
575         if (IS_ERR(rq))
576                 return rq;
577
578         cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
579         if (IS_ERR(cs)) {
580                 i915_request_add(rq);
581                 return ERR_CAST(cs);
582         }
583
584         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
585         *cs++ = MI_NOOP;
586
587         *cs++ = MI_SEMAPHORE_WAIT |
588                 MI_SEMAPHORE_GLOBAL_GTT |
589                 MI_SEMAPHORE_POLL |
590                 MI_SEMAPHORE_SAD_NEQ_SDD;
591         *cs++ = 0;
592         *cs++ = offset;
593         *cs++ = 0;
594
595         for (n = 0; n < NUM_GPR_DW; n++) {
596                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
597                 *cs++ = CS_GPR(ce->engine, n);
598                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
599                 *cs++ = 0;
600         }
601
602         err = igt_vma_move_to_active_unlocked(scratch, rq, EXEC_OBJECT_WRITE);
603
604         i915_request_get(rq);
605         i915_request_add(rq);
606         if (err) {
607                 i915_request_put(rq);
608                 rq = ERR_PTR(err);
609         }
610
611         return rq;
612 }
613
614 static int __live_lrc_gpr(struct intel_engine_cs *engine,
615                           struct i915_vma *scratch,
616                           bool preempt)
617 {
618         u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
619         struct intel_context *ce;
620         struct i915_request *rq;
621         u32 *cs;
622         int err;
623         int n;
624
625         if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS)
626                 return 0; /* GPR only on rcs0 for gen8 */
627
628         err = gpr_make_dirty(engine->kernel_context);
629         if (err)
630                 return err;
631
632         ce = intel_context_create(engine);
633         if (IS_ERR(ce))
634                 return PTR_ERR(ce);
635
636         rq = __gpr_read(ce, scratch, slot);
637         if (IS_ERR(rq)) {
638                 err = PTR_ERR(rq);
639                 goto err_put;
640         }
641
642         err = wait_for_submit(engine, rq, HZ / 2);
643         if (err)
644                 goto err_rq;
645
646         if (preempt) {
647                 err = gpr_make_dirty(engine->kernel_context);
648                 if (err)
649                         goto err_rq;
650
651                 err = emit_semaphore_signal(engine->kernel_context, slot);
652                 if (err)
653                         goto err_rq;
654
655                 err = wait_for_submit(engine, rq, HZ / 2);
656                 if (err)
657                         goto err_rq;
658         } else {
659                 slot[0] = 1;
660                 wmb();
661         }
662
663         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
664                 err = -ETIME;
665                 goto err_rq;
666         }
667
668         cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
669         if (IS_ERR(cs)) {
670                 err = PTR_ERR(cs);
671                 goto err_rq;
672         }
673
674         for (n = 0; n < NUM_GPR_DW; n++) {
675                 if (cs[n]) {
676                         pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
677                                engine->name,
678                                n / 2, n & 1 ? "udw" : "ldw",
679                                cs[n]);
680                         err = -EINVAL;
681                         break;
682                 }
683         }
684
685         i915_gem_object_unpin_map(scratch->obj);
686
687 err_rq:
688         memset32(&slot[0], -1, 4);
689         wmb();
690         i915_request_put(rq);
691 err_put:
692         intel_context_put(ce);
693         return err;
694 }
695
696 static int live_lrc_gpr(void *arg)
697 {
698         struct intel_gt *gt = arg;
699         struct intel_engine_cs *engine;
700         struct i915_vma *scratch;
701         enum intel_engine_id id;
702         int err = 0;
703
704         /*
705          * Check that GPR registers are cleared in new contexts as we need
706          * to avoid leaking any information from previous contexts.
707          */
708
709         scratch = create_scratch(gt);
710         if (IS_ERR(scratch))
711                 return PTR_ERR(scratch);
712
713         for_each_engine(engine, gt, id) {
714                 st_engine_heartbeat_disable(engine);
715
716                 err = __live_lrc_gpr(engine, scratch, false);
717                 if (err)
718                         goto err;
719
720                 err = __live_lrc_gpr(engine, scratch, true);
721                 if (err)
722                         goto err;
723
724 err:
725                 st_engine_heartbeat_enable(engine);
726                 if (igt_flush_test(gt->i915))
727                         err = -EIO;
728                 if (err)
729                         break;
730         }
731
732         i915_vma_unpin_and_release(&scratch, 0);
733         return err;
734 }
735
736 static struct i915_request *
737 create_timestamp(struct intel_context *ce, void *slot, int idx)
738 {
739         const u32 offset =
740                 i915_ggtt_offset(ce->engine->status_page.vma) +
741                 offset_in_page(slot);
742         struct i915_request *rq;
743         u32 *cs;
744         int err;
745
746         rq = intel_context_create_request(ce);
747         if (IS_ERR(rq))
748                 return rq;
749
750         cs = intel_ring_begin(rq, 10);
751         if (IS_ERR(cs)) {
752                 err = PTR_ERR(cs);
753                 goto err;
754         }
755
756         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
757         *cs++ = MI_NOOP;
758
759         *cs++ = MI_SEMAPHORE_WAIT |
760                 MI_SEMAPHORE_GLOBAL_GTT |
761                 MI_SEMAPHORE_POLL |
762                 MI_SEMAPHORE_SAD_NEQ_SDD;
763         *cs++ = 0;
764         *cs++ = offset;
765         *cs++ = 0;
766
767         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
768         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
769         *cs++ = offset + idx * sizeof(u32);
770         *cs++ = 0;
771
772         intel_ring_advance(rq, cs);
773
774         err = 0;
775 err:
776         i915_request_get(rq);
777         i915_request_add(rq);
778         if (err) {
779                 i915_request_put(rq);
780                 return ERR_PTR(err);
781         }
782
783         return rq;
784 }
785
786 struct lrc_timestamp {
787         struct intel_engine_cs *engine;
788         struct intel_context *ce[2];
789         u32 poison;
790 };
791
792 static bool timestamp_advanced(u32 start, u32 end)
793 {
794         return (s32)(end - start) > 0;
795 }
796
797 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
798 {
799         u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
800         struct i915_request *rq;
801         u32 timestamp;
802         int err = 0;
803
804         arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
805         rq = create_timestamp(arg->ce[0], slot, 1);
806         if (IS_ERR(rq))
807                 return PTR_ERR(rq);
808
809         err = wait_for_submit(rq->engine, rq, HZ / 2);
810         if (err)
811                 goto err;
812
813         if (preempt) {
814                 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
815                 err = emit_semaphore_signal(arg->ce[1], slot);
816                 if (err)
817                         goto err;
818         } else {
819                 slot[0] = 1;
820                 wmb();
821         }
822
823         /* And wait for switch to kernel (to save our context to memory) */
824         err = context_flush(arg->ce[0], HZ / 2);
825         if (err)
826                 goto err;
827
828         if (!timestamp_advanced(arg->poison, slot[1])) {
829                 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
830                        arg->engine->name, preempt ? "preempt" : "simple",
831                        arg->poison, slot[1]);
832                 err = -EINVAL;
833         }
834
835         timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
836         if (!timestamp_advanced(slot[1], timestamp)) {
837                 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
838                        arg->engine->name, preempt ? "preempt" : "simple",
839                        slot[1], timestamp);
840                 err = -EINVAL;
841         }
842
843 err:
844         memset32(slot, -1, 4);
845         i915_request_put(rq);
846         return err;
847 }
848
849 static int live_lrc_timestamp(void *arg)
850 {
851         struct lrc_timestamp data = {};
852         struct intel_gt *gt = arg;
853         enum intel_engine_id id;
854         const u32 poison[] = {
855                 0,
856                 S32_MAX,
857                 (u32)S32_MAX + 1,
858                 U32_MAX,
859         };
860
861         /*
862          * We want to verify that the timestamp is saved and restore across
863          * context switches and is monotonic.
864          *
865          * So we do this with a little bit of LRC poisoning to check various
866          * boundary conditions, and see what happens if we preempt the context
867          * with a second request (carrying more poison into the timestamp).
868          */
869
870         for_each_engine(data.engine, gt, id) {
871                 int i, err = 0;
872
873                 st_engine_heartbeat_disable(data.engine);
874
875                 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
876                         struct intel_context *tmp;
877
878                         tmp = intel_context_create(data.engine);
879                         if (IS_ERR(tmp)) {
880                                 err = PTR_ERR(tmp);
881                                 goto err;
882                         }
883
884                         err = intel_context_pin(tmp);
885                         if (err) {
886                                 intel_context_put(tmp);
887                                 goto err;
888                         }
889
890                         data.ce[i] = tmp;
891                 }
892
893                 for (i = 0; i < ARRAY_SIZE(poison); i++) {
894                         data.poison = poison[i];
895
896                         err = __lrc_timestamp(&data, false);
897                         if (err)
898                                 break;
899
900                         err = __lrc_timestamp(&data, true);
901                         if (err)
902                                 break;
903                 }
904
905 err:
906                 st_engine_heartbeat_enable(data.engine);
907                 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
908                         if (!data.ce[i])
909                                 break;
910
911                         intel_context_unpin(data.ce[i]);
912                         intel_context_put(data.ce[i]);
913                 }
914
915                 if (igt_flush_test(gt->i915))
916                         err = -EIO;
917                 if (err)
918                         return err;
919         }
920
921         return 0;
922 }
923
924 static struct i915_vma *
925 create_user_vma(struct i915_address_space *vm, unsigned long size)
926 {
927         struct drm_i915_gem_object *obj;
928         struct i915_vma *vma;
929         int err;
930
931         obj = i915_gem_object_create_internal(vm->i915, size);
932         if (IS_ERR(obj))
933                 return ERR_CAST(obj);
934
935         vma = i915_vma_instance(obj, vm, NULL);
936         if (IS_ERR(vma)) {
937                 i915_gem_object_put(obj);
938                 return vma;
939         }
940
941         err = i915_vma_pin(vma, 0, 0, PIN_USER);
942         if (err) {
943                 i915_gem_object_put(obj);
944                 return ERR_PTR(err);
945         }
946
947         return vma;
948 }
949
950 static u32 safe_poison(u32 offset, u32 poison)
951 {
952         /*
953          * Do not enable predication as it will nop all subsequent commands,
954          * not only disabling the tests (by preventing all the other SRM) but
955          * also preventing the arbitration events at the end of the request.
956          */
957         if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
958                 poison &= ~REG_BIT(0);
959
960         return poison;
961 }
962
963 static struct i915_vma *
964 store_context(struct intel_context *ce, struct i915_vma *scratch)
965 {
966         struct i915_vma *batch;
967         u32 dw, x, *cs, *hw;
968         u32 *defaults;
969
970         batch = create_user_vma(ce->vm, SZ_64K);
971         if (IS_ERR(batch))
972                 return batch;
973
974         cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
975         if (IS_ERR(cs)) {
976                 i915_vma_put(batch);
977                 return ERR_CAST(cs);
978         }
979
980         defaults = shmem_pin_map(ce->engine->default_state);
981         if (!defaults) {
982                 i915_gem_object_unpin_map(batch->obj);
983                 i915_vma_put(batch);
984                 return ERR_PTR(-ENOMEM);
985         }
986
987         x = 0;
988         dw = 0;
989         hw = defaults;
990         hw += LRC_STATE_OFFSET / sizeof(*hw);
991         do {
992                 u32 len = hw[dw] & LRI_LENGTH_MASK;
993
994                 /*
995                  * Keep it simple, skip parsing complex commands
996                  *
997                  * At present, there are no more MI_LOAD_REGISTER_IMM
998                  * commands after the first 3D state command. Rather
999                  * than include a table (see i915_cmd_parser.c) of all
1000                  * the possible commands and their instruction lengths
1001                  * (or mask for variable length instructions), assume
1002                  * we have gathered the complete list of registers and
1003                  * bail out.
1004                  */
1005                 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1006                         break;
1007
1008                 if (hw[dw] == 0) {
1009                         dw++;
1010                         continue;
1011                 }
1012
1013                 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1014                         /* Assume all other MI commands match LRI length mask */
1015                         dw += len + 2;
1016                         continue;
1017                 }
1018
1019                 if (!len) {
1020                         pr_err("%s: invalid LRI found in context image\n",
1021                                ce->engine->name);
1022                         igt_hexdump(defaults, PAGE_SIZE);
1023                         break;
1024                 }
1025
1026                 dw++;
1027                 len = (len + 1) / 2;
1028                 while (len--) {
1029                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
1030                         *cs++ = hw[dw];
1031                         *cs++ = lower_32_bits(i915_vma_offset(scratch) + x);
1032                         *cs++ = upper_32_bits(i915_vma_offset(scratch) + x);
1033
1034                         dw += 2;
1035                         x += 4;
1036                 }
1037         } while (dw < PAGE_SIZE / sizeof(u32) &&
1038                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1039
1040         *cs++ = MI_BATCH_BUFFER_END;
1041
1042         shmem_unpin_map(ce->engine->default_state, defaults);
1043
1044         i915_gem_object_flush_map(batch->obj);
1045         i915_gem_object_unpin_map(batch->obj);
1046
1047         return batch;
1048 }
1049
1050 static struct i915_request *
1051 record_registers(struct intel_context *ce,
1052                  struct i915_vma *before,
1053                  struct i915_vma *after,
1054                  u32 *sema)
1055 {
1056         struct i915_vma *b_before, *b_after;
1057         struct i915_request *rq;
1058         u32 *cs;
1059         int err;
1060
1061         b_before = store_context(ce, before);
1062         if (IS_ERR(b_before))
1063                 return ERR_CAST(b_before);
1064
1065         b_after = store_context(ce, after);
1066         if (IS_ERR(b_after)) {
1067                 rq = ERR_CAST(b_after);
1068                 goto err_before;
1069         }
1070
1071         rq = intel_context_create_request(ce);
1072         if (IS_ERR(rq))
1073                 goto err_after;
1074
1075         err = igt_vma_move_to_active_unlocked(before, rq, EXEC_OBJECT_WRITE);
1076         if (err)
1077                 goto err_rq;
1078
1079         err = igt_vma_move_to_active_unlocked(b_before, rq, 0);
1080         if (err)
1081                 goto err_rq;
1082
1083         err = igt_vma_move_to_active_unlocked(after, rq, EXEC_OBJECT_WRITE);
1084         if (err)
1085                 goto err_rq;
1086
1087         err = igt_vma_move_to_active_unlocked(b_after, rq, 0);
1088         if (err)
1089                 goto err_rq;
1090
1091         cs = intel_ring_begin(rq, 14);
1092         if (IS_ERR(cs)) {
1093                 err = PTR_ERR(cs);
1094                 goto err_rq;
1095         }
1096
1097         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1098         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1099         *cs++ = lower_32_bits(i915_vma_offset(b_before));
1100         *cs++ = upper_32_bits(i915_vma_offset(b_before));
1101
1102         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1103         *cs++ = MI_SEMAPHORE_WAIT |
1104                 MI_SEMAPHORE_GLOBAL_GTT |
1105                 MI_SEMAPHORE_POLL |
1106                 MI_SEMAPHORE_SAD_NEQ_SDD;
1107         *cs++ = 0;
1108         *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1109                 offset_in_page(sema);
1110         *cs++ = 0;
1111         *cs++ = MI_NOOP;
1112
1113         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1114         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1115         *cs++ = lower_32_bits(i915_vma_offset(b_after));
1116         *cs++ = upper_32_bits(i915_vma_offset(b_after));
1117
1118         intel_ring_advance(rq, cs);
1119
1120         WRITE_ONCE(*sema, 0);
1121         i915_request_get(rq);
1122         i915_request_add(rq);
1123 err_after:
1124         i915_vma_put(b_after);
1125 err_before:
1126         i915_vma_put(b_before);
1127         return rq;
1128
1129 err_rq:
1130         i915_request_add(rq);
1131         rq = ERR_PTR(err);
1132         goto err_after;
1133 }
1134
1135 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
1136 {
1137         struct i915_vma *batch;
1138         u32 dw, *cs, *hw;
1139         u32 *defaults;
1140
1141         batch = create_user_vma(ce->vm, SZ_64K);
1142         if (IS_ERR(batch))
1143                 return batch;
1144
1145         cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1146         if (IS_ERR(cs)) {
1147                 i915_vma_put(batch);
1148                 return ERR_CAST(cs);
1149         }
1150
1151         defaults = shmem_pin_map(ce->engine->default_state);
1152         if (!defaults) {
1153                 i915_gem_object_unpin_map(batch->obj);
1154                 i915_vma_put(batch);
1155                 return ERR_PTR(-ENOMEM);
1156         }
1157
1158         dw = 0;
1159         hw = defaults;
1160         hw += LRC_STATE_OFFSET / sizeof(*hw);
1161         do {
1162                 u32 len = hw[dw] & LRI_LENGTH_MASK;
1163
1164                 /* For simplicity, break parsing at the first complex command */
1165                 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1166                         break;
1167
1168                 if (hw[dw] == 0) {
1169                         dw++;
1170                         continue;
1171                 }
1172
1173                 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1174                         dw += len + 2;
1175                         continue;
1176                 }
1177
1178                 if (!len) {
1179                         pr_err("%s: invalid LRI found in context image\n",
1180                                ce->engine->name);
1181                         igt_hexdump(defaults, PAGE_SIZE);
1182                         break;
1183                 }
1184
1185                 dw++;
1186                 len = (len + 1) / 2;
1187                 *cs++ = MI_LOAD_REGISTER_IMM(len);
1188                 while (len--) {
1189                         *cs++ = hw[dw];
1190                         *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
1191                                                                   MI_LRI_LRM_CS_MMIO),
1192                                             poison);
1193                         dw += 2;
1194                 }
1195         } while (dw < PAGE_SIZE / sizeof(u32) &&
1196                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1197
1198         *cs++ = MI_BATCH_BUFFER_END;
1199
1200         shmem_unpin_map(ce->engine->default_state, defaults);
1201
1202         i915_gem_object_flush_map(batch->obj);
1203         i915_gem_object_unpin_map(batch->obj);
1204
1205         return batch;
1206 }
1207
1208 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
1209 {
1210         struct i915_request *rq;
1211         struct i915_vma *batch;
1212         u32 *cs;
1213         int err;
1214
1215         batch = load_context(ce, poison);
1216         if (IS_ERR(batch))
1217                 return PTR_ERR(batch);
1218
1219         rq = intel_context_create_request(ce);
1220         if (IS_ERR(rq)) {
1221                 err = PTR_ERR(rq);
1222                 goto err_batch;
1223         }
1224
1225         err = igt_vma_move_to_active_unlocked(batch, rq, 0);
1226         if (err)
1227                 goto err_rq;
1228
1229         cs = intel_ring_begin(rq, 8);
1230         if (IS_ERR(cs)) {
1231                 err = PTR_ERR(cs);
1232                 goto err_rq;
1233         }
1234
1235         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1236         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1237         *cs++ = lower_32_bits(i915_vma_offset(batch));
1238         *cs++ = upper_32_bits(i915_vma_offset(batch));
1239
1240         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1241         *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1242                 offset_in_page(sema);
1243         *cs++ = 0;
1244         *cs++ = 1;
1245
1246         intel_ring_advance(rq, cs);
1247
1248         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1249 err_rq:
1250         i915_request_add(rq);
1251 err_batch:
1252         i915_vma_put(batch);
1253         return err;
1254 }
1255
1256 static bool is_moving(u32 a, u32 b)
1257 {
1258         return a != b;
1259 }
1260
1261 static int compare_isolation(struct intel_engine_cs *engine,
1262                              struct i915_vma *ref[2],
1263                              struct i915_vma *result[2],
1264                              struct intel_context *ce,
1265                              u32 poison)
1266 {
1267         u32 x, dw, *hw, *lrc;
1268         u32 *A[2], *B[2];
1269         u32 *defaults;
1270         int err = 0;
1271
1272         A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC);
1273         if (IS_ERR(A[0]))
1274                 return PTR_ERR(A[0]);
1275
1276         A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC);
1277         if (IS_ERR(A[1])) {
1278                 err = PTR_ERR(A[1]);
1279                 goto err_A0;
1280         }
1281
1282         B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC);
1283         if (IS_ERR(B[0])) {
1284                 err = PTR_ERR(B[0]);
1285                 goto err_A1;
1286         }
1287
1288         B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC);
1289         if (IS_ERR(B[1])) {
1290                 err = PTR_ERR(B[1]);
1291                 goto err_B0;
1292         }
1293
1294         lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
1295                                                intel_gt_coherent_map_type(engine->gt,
1296                                                                           ce->state->obj,
1297                                                                           false));
1298         if (IS_ERR(lrc)) {
1299                 err = PTR_ERR(lrc);
1300                 goto err_B1;
1301         }
1302         lrc += LRC_STATE_OFFSET / sizeof(*hw);
1303
1304         defaults = shmem_pin_map(ce->engine->default_state);
1305         if (!defaults) {
1306                 err = -ENOMEM;
1307                 goto err_lrc;
1308         }
1309
1310         x = 0;
1311         dw = 0;
1312         hw = defaults;
1313         hw += LRC_STATE_OFFSET / sizeof(*hw);
1314         do {
1315                 u32 len = hw[dw] & LRI_LENGTH_MASK;
1316
1317                 /* For simplicity, break parsing at the first complex command */
1318                 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1319                         break;
1320
1321                 if (hw[dw] == 0) {
1322                         dw++;
1323                         continue;
1324                 }
1325
1326                 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1327                         dw += len + 2;
1328                         continue;
1329                 }
1330
1331                 if (!len) {
1332                         pr_err("%s: invalid LRI found in context image\n",
1333                                engine->name);
1334                         igt_hexdump(defaults, PAGE_SIZE);
1335                         break;
1336                 }
1337
1338                 dw++;
1339                 len = (len + 1) / 2;
1340                 while (len--) {
1341                         if (!is_moving(A[0][x], A[1][x]) &&
1342                             (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
1343                                 switch (hw[dw] & 4095) {
1344                                 case 0x30: /* RING_HEAD */
1345                                 case 0x34: /* RING_TAIL */
1346                                         break;
1347
1348                                 default:
1349                                         pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
1350                                                engine->name, dw,
1351                                                hw[dw], hw[dw + 1],
1352                                                A[0][x], B[0][x], B[1][x],
1353                                                poison, lrc[dw + 1]);
1354                                         err = -EINVAL;
1355                                 }
1356                         }
1357                         dw += 2;
1358                         x++;
1359                 }
1360         } while (dw < PAGE_SIZE / sizeof(u32) &&
1361                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1362
1363         shmem_unpin_map(ce->engine->default_state, defaults);
1364 err_lrc:
1365         i915_gem_object_unpin_map(ce->state->obj);
1366 err_B1:
1367         i915_gem_object_unpin_map(result[1]->obj);
1368 err_B0:
1369         i915_gem_object_unpin_map(result[0]->obj);
1370 err_A1:
1371         i915_gem_object_unpin_map(ref[1]->obj);
1372 err_A0:
1373         i915_gem_object_unpin_map(ref[0]->obj);
1374         return err;
1375 }
1376
1377 static struct i915_vma *
1378 create_result_vma(struct i915_address_space *vm, unsigned long sz)
1379 {
1380         struct i915_vma *vma;
1381         void *ptr;
1382
1383         vma = create_user_vma(vm, sz);
1384         if (IS_ERR(vma))
1385                 return vma;
1386
1387         /* Set the results to a known value distinct from the poison */
1388         ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC);
1389         if (IS_ERR(ptr)) {
1390                 i915_vma_put(vma);
1391                 return ERR_CAST(ptr);
1392         }
1393
1394         memset(ptr, POISON_INUSE, vma->size);
1395         i915_gem_object_flush_map(vma->obj);
1396         i915_gem_object_unpin_map(vma->obj);
1397
1398         return vma;
1399 }
1400
1401 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
1402 {
1403         u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
1404         struct i915_vma *ref[2], *result[2];
1405         struct intel_context *A, *B;
1406         struct i915_request *rq;
1407         int err;
1408
1409         A = intel_context_create(engine);
1410         if (IS_ERR(A))
1411                 return PTR_ERR(A);
1412
1413         B = intel_context_create(engine);
1414         if (IS_ERR(B)) {
1415                 err = PTR_ERR(B);
1416                 goto err_A;
1417         }
1418
1419         ref[0] = create_result_vma(A->vm, SZ_64K);
1420         if (IS_ERR(ref[0])) {
1421                 err = PTR_ERR(ref[0]);
1422                 goto err_B;
1423         }
1424
1425         ref[1] = create_result_vma(A->vm, SZ_64K);
1426         if (IS_ERR(ref[1])) {
1427                 err = PTR_ERR(ref[1]);
1428                 goto err_ref0;
1429         }
1430
1431         rq = record_registers(A, ref[0], ref[1], sema);
1432         if (IS_ERR(rq)) {
1433                 err = PTR_ERR(rq);
1434                 goto err_ref1;
1435         }
1436
1437         WRITE_ONCE(*sema, 1);
1438         wmb();
1439
1440         if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1441                 i915_request_put(rq);
1442                 err = -ETIME;
1443                 goto err_ref1;
1444         }
1445         i915_request_put(rq);
1446
1447         result[0] = create_result_vma(A->vm, SZ_64K);
1448         if (IS_ERR(result[0])) {
1449                 err = PTR_ERR(result[0]);
1450                 goto err_ref1;
1451         }
1452
1453         result[1] = create_result_vma(A->vm, SZ_64K);
1454         if (IS_ERR(result[1])) {
1455                 err = PTR_ERR(result[1]);
1456                 goto err_result0;
1457         }
1458
1459         rq = record_registers(A, result[0], result[1], sema);
1460         if (IS_ERR(rq)) {
1461                 err = PTR_ERR(rq);
1462                 goto err_result1;
1463         }
1464
1465         err = poison_registers(B, poison, sema);
1466         if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) {
1467                 pr_err("%s(%s): wait for results timed out\n",
1468                        __func__, engine->name);
1469                 err = -ETIME;
1470         }
1471
1472         /* Always cancel the semaphore wait, just in case the GPU gets stuck */
1473         WRITE_ONCE(*sema, -1);
1474         i915_request_put(rq);
1475         if (err)
1476                 goto err_result1;
1477
1478         err = compare_isolation(engine, ref, result, A, poison);
1479
1480 err_result1:
1481         i915_vma_put(result[1]);
1482 err_result0:
1483         i915_vma_put(result[0]);
1484 err_ref1:
1485         i915_vma_put(ref[1]);
1486 err_ref0:
1487         i915_vma_put(ref[0]);
1488 err_B:
1489         intel_context_put(B);
1490 err_A:
1491         intel_context_put(A);
1492         return err;
1493 }
1494
1495 static bool skip_isolation(const struct intel_engine_cs *engine)
1496 {
1497         if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9)
1498                 return true;
1499
1500         if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11)
1501                 return true;
1502
1503         return false;
1504 }
1505
1506 static int live_lrc_isolation(void *arg)
1507 {
1508         struct intel_gt *gt = arg;
1509         struct intel_engine_cs *engine;
1510         enum intel_engine_id id;
1511         const u32 poison[] = {
1512                 STACK_MAGIC,
1513                 0x3a3a3a3a,
1514                 0x5c5c5c5c,
1515                 0xffffffff,
1516                 0xffff0000,
1517         };
1518         int err = 0;
1519
1520         /*
1521          * Our goal is try and verify that per-context state cannot be
1522          * tampered with by another non-privileged client.
1523          *
1524          * We take the list of context registers from the LRI in the default
1525          * context image and attempt to modify that list from a remote context.
1526          */
1527
1528         for_each_engine(engine, gt, id) {
1529                 int i;
1530
1531                 /* Just don't even ask */
1532                 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
1533                     skip_isolation(engine))
1534                         continue;
1535
1536                 intel_engine_pm_get(engine);
1537                 for (i = 0; i < ARRAY_SIZE(poison); i++) {
1538                         int result;
1539
1540                         result = __lrc_isolation(engine, poison[i]);
1541                         if (result && !err)
1542                                 err = result;
1543
1544                         result = __lrc_isolation(engine, ~poison[i]);
1545                         if (result && !err)
1546                                 err = result;
1547                 }
1548                 intel_engine_pm_put(engine);
1549                 if (igt_flush_test(gt->i915)) {
1550                         err = -EIO;
1551                         break;
1552                 }
1553         }
1554
1555         return err;
1556 }
1557
1558 static int wabb_ctx_submit_req(struct intel_context *ce)
1559 {
1560         struct i915_request *rq;
1561         int err = 0;
1562
1563         rq = intel_context_create_request(ce);
1564         if (IS_ERR(rq))
1565                 return PTR_ERR(rq);
1566
1567         i915_request_get(rq);
1568         i915_request_add(rq);
1569
1570         if (i915_request_wait(rq, 0, HZ / 5) < 0)
1571                 err = -ETIME;
1572
1573         i915_request_put(rq);
1574
1575         return err;
1576 }
1577
1578 #define CTX_BB_CANARY_OFFSET (3 * 1024)
1579 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
1580
1581 static u32 *
1582 emit_wabb_ctx_canary(const struct intel_context *ce,
1583                      u32 *cs, bool per_ctx)
1584 {
1585         *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
1586                 MI_SRM_LRM_GLOBAL_GTT |
1587                 MI_LRI_LRM_CS_MMIO;
1588         *cs++ = i915_mmio_reg_offset(RING_START(0));
1589         *cs++ = i915_ggtt_offset(ce->state) +
1590                 context_wa_bb_offset(ce) +
1591                 CTX_BB_CANARY_OFFSET +
1592                 (per_ctx ? PAGE_SIZE : 0);
1593         *cs++ = 0;
1594
1595         return cs;
1596 }
1597
1598 static u32 *
1599 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1600 {
1601         return emit_wabb_ctx_canary(ce, cs, false);
1602 }
1603
1604 static u32 *
1605 emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1606 {
1607         return emit_wabb_ctx_canary(ce, cs, true);
1608 }
1609
1610 static void
1611 wabb_ctx_setup(struct intel_context *ce, bool per_ctx)
1612 {
1613         u32 *cs = context_wabb(ce, per_ctx);
1614
1615         cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
1616
1617         if (per_ctx)
1618                 setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary);
1619         else
1620                 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
1621 }
1622
1623 static bool check_ring_start(struct intel_context *ce, bool per_ctx)
1624 {
1625         const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
1626                 LRC_STATE_OFFSET + context_wa_bb_offset(ce) +
1627                 (per_ctx ? PAGE_SIZE : 0);
1628
1629         if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
1630                 return true;
1631
1632         pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
1633                ctx_bb[CTX_BB_CANARY_INDEX],
1634                ce->lrc_reg_state[CTX_RING_START]);
1635
1636         return false;
1637 }
1638
1639 static int wabb_ctx_check(struct intel_context *ce, bool per_ctx)
1640 {
1641         int err;
1642
1643         err = wabb_ctx_submit_req(ce);
1644         if (err)
1645                 return err;
1646
1647         if (!check_ring_start(ce, per_ctx))
1648                 return -EINVAL;
1649
1650         return 0;
1651 }
1652
1653 static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx)
1654 {
1655         struct intel_context *a, *b;
1656         int err;
1657
1658         a = intel_context_create(engine);
1659         if (IS_ERR(a))
1660                 return PTR_ERR(a);
1661         err = intel_context_pin(a);
1662         if (err)
1663                 goto put_a;
1664
1665         b = intel_context_create(engine);
1666         if (IS_ERR(b)) {
1667                 err = PTR_ERR(b);
1668                 goto unpin_a;
1669         }
1670         err = intel_context_pin(b);
1671         if (err)
1672                 goto put_b;
1673
1674         /* We use the already reserved extra page in context state */
1675         if (!a->wa_bb_page) {
1676                 GEM_BUG_ON(b->wa_bb_page);
1677                 GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12);
1678                 goto unpin_b;
1679         }
1680
1681         /*
1682          * In order to test that our per context bb is truly per context,
1683          * and executes at the intended spot on context restoring process,
1684          * make the batch store the ring start value to memory.
1685          * As ring start is restored apriori of starting the indirect ctx bb and
1686          * as it will be different for each context, it fits to this purpose.
1687          */
1688         wabb_ctx_setup(a, per_ctx);
1689         wabb_ctx_setup(b, per_ctx);
1690
1691         err = wabb_ctx_check(a, per_ctx);
1692         if (err)
1693                 goto unpin_b;
1694
1695         err = wabb_ctx_check(b, per_ctx);
1696
1697 unpin_b:
1698         intel_context_unpin(b);
1699 put_b:
1700         intel_context_put(b);
1701 unpin_a:
1702         intel_context_unpin(a);
1703 put_a:
1704         intel_context_put(a);
1705
1706         return err;
1707 }
1708
1709 static int lrc_wabb_ctx(void *arg, bool per_ctx)
1710 {
1711         struct intel_gt *gt = arg;
1712         struct intel_engine_cs *engine;
1713         enum intel_engine_id id;
1714         int err = 0;
1715
1716         for_each_engine(engine, gt, id) {
1717                 intel_engine_pm_get(engine);
1718                 err = __lrc_wabb_ctx(engine, per_ctx);
1719                 intel_engine_pm_put(engine);
1720
1721                 if (igt_flush_test(gt->i915))
1722                         err = -EIO;
1723
1724                 if (err)
1725                         break;
1726         }
1727
1728         return err;
1729 }
1730
1731 static int live_lrc_indirect_ctx_bb(void *arg)
1732 {
1733         return lrc_wabb_ctx(arg, false);
1734 }
1735
1736 static int live_lrc_per_ctx_bb(void *arg)
1737 {
1738         return lrc_wabb_ctx(arg, true);
1739 }
1740
1741 static void garbage_reset(struct intel_engine_cs *engine,
1742                           struct i915_request *rq)
1743 {
1744         const unsigned int bit = I915_RESET_ENGINE + engine->id;
1745         unsigned long *lock = &engine->gt->reset.flags;
1746
1747         local_bh_disable();
1748         if (!test_and_set_bit(bit, lock)) {
1749                 tasklet_disable(&engine->sched_engine->tasklet);
1750
1751                 if (!rq->fence.error)
1752                         __intel_engine_reset_bh(engine, NULL);
1753
1754                 tasklet_enable(&engine->sched_engine->tasklet);
1755                 clear_and_wake_up_bit(bit, lock);
1756         }
1757         local_bh_enable();
1758 }
1759
1760 static struct i915_request *garbage(struct intel_context *ce,
1761                                     struct rnd_state *prng)
1762 {
1763         struct i915_request *rq;
1764         int err;
1765
1766         err = intel_context_pin(ce);
1767         if (err)
1768                 return ERR_PTR(err);
1769
1770         prandom_bytes_state(prng,
1771                             ce->lrc_reg_state,
1772                             ce->engine->context_size -
1773                             LRC_STATE_OFFSET);
1774
1775         rq = intel_context_create_request(ce);
1776         if (IS_ERR(rq)) {
1777                 err = PTR_ERR(rq);
1778                 goto err_unpin;
1779         }
1780
1781         i915_request_get(rq);
1782         i915_request_add(rq);
1783         return rq;
1784
1785 err_unpin:
1786         intel_context_unpin(ce);
1787         return ERR_PTR(err);
1788 }
1789
1790 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
1791 {
1792         struct intel_context *ce;
1793         struct i915_request *hang;
1794         int err = 0;
1795
1796         ce = intel_context_create(engine);
1797         if (IS_ERR(ce))
1798                 return PTR_ERR(ce);
1799
1800         hang = garbage(ce, prng);
1801         if (IS_ERR(hang)) {
1802                 err = PTR_ERR(hang);
1803                 goto err_ce;
1804         }
1805
1806         if (wait_for_submit(engine, hang, HZ / 2)) {
1807                 i915_request_put(hang);
1808                 err = -ETIME;
1809                 goto err_ce;
1810         }
1811
1812         intel_context_set_banned(ce);
1813         garbage_reset(engine, hang);
1814
1815         intel_engine_flush_submission(engine);
1816         if (!hang->fence.error) {
1817                 i915_request_put(hang);
1818                 pr_err("%s: corrupted context was not reset\n",
1819                        engine->name);
1820                 err = -EINVAL;
1821                 goto err_ce;
1822         }
1823
1824         if (i915_request_wait(hang, 0, HZ / 2) < 0) {
1825                 pr_err("%s: corrupted context did not recover\n",
1826                        engine->name);
1827                 i915_request_put(hang);
1828                 err = -EIO;
1829                 goto err_ce;
1830         }
1831         i915_request_put(hang);
1832
1833 err_ce:
1834         intel_context_put(ce);
1835         return err;
1836 }
1837
1838 static int live_lrc_garbage(void *arg)
1839 {
1840         struct intel_gt *gt = arg;
1841         struct intel_engine_cs *engine;
1842         enum intel_engine_id id;
1843
1844         /*
1845          * Verify that we can recover if one context state is completely
1846          * corrupted.
1847          */
1848
1849         if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
1850                 return 0;
1851
1852         for_each_engine(engine, gt, id) {
1853                 I915_RND_STATE(prng);
1854                 int err = 0, i;
1855
1856                 if (!intel_has_reset_engine(engine->gt))
1857                         continue;
1858
1859                 intel_engine_pm_get(engine);
1860                 for (i = 0; i < 3; i++) {
1861                         err = __lrc_garbage(engine, &prng);
1862                         if (err)
1863                                 break;
1864                 }
1865                 intel_engine_pm_put(engine);
1866
1867                 if (igt_flush_test(gt->i915))
1868                         err = -EIO;
1869                 if (err)
1870                         return err;
1871         }
1872
1873         return 0;
1874 }
1875
1876 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
1877 {
1878         struct intel_context *ce;
1879         struct i915_request *rq;
1880         IGT_TIMEOUT(end_time);
1881         int err;
1882
1883         ce = intel_context_create(engine);
1884         if (IS_ERR(ce))
1885                 return PTR_ERR(ce);
1886
1887         ce->stats.runtime.num_underflow = 0;
1888         ce->stats.runtime.max_underflow = 0;
1889
1890         do {
1891                 unsigned int loop = 1024;
1892
1893                 while (loop) {
1894                         rq = intel_context_create_request(ce);
1895                         if (IS_ERR(rq)) {
1896                                 err = PTR_ERR(rq);
1897                                 goto err_rq;
1898                         }
1899
1900                         if (--loop == 0)
1901                                 i915_request_get(rq);
1902
1903                         i915_request_add(rq);
1904                 }
1905
1906                 if (__igt_timeout(end_time, NULL))
1907                         break;
1908
1909                 i915_request_put(rq);
1910         } while (1);
1911
1912         err = i915_request_wait(rq, 0, HZ / 5);
1913         if (err < 0) {
1914                 pr_err("%s: request not completed!\n", engine->name);
1915                 goto err_wait;
1916         }
1917
1918         igt_flush_test(engine->i915);
1919
1920         pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
1921                 engine->name,
1922                 intel_context_get_total_runtime_ns(ce),
1923                 intel_context_get_avg_runtime_ns(ce));
1924
1925         err = 0;
1926         if (ce->stats.runtime.num_underflow) {
1927                 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
1928                        engine->name,
1929                        ce->stats.runtime.num_underflow,
1930                        ce->stats.runtime.max_underflow);
1931                 GEM_TRACE_DUMP();
1932                 err = -EOVERFLOW;
1933         }
1934
1935 err_wait:
1936         i915_request_put(rq);
1937 err_rq:
1938         intel_context_put(ce);
1939         return err;
1940 }
1941
1942 static int live_pphwsp_runtime(void *arg)
1943 {
1944         struct intel_gt *gt = arg;
1945         struct intel_engine_cs *engine;
1946         enum intel_engine_id id;
1947         int err = 0;
1948
1949         /*
1950          * Check that cumulative context runtime as stored in the pphwsp[16]
1951          * is monotonic.
1952          */
1953
1954         for_each_engine(engine, gt, id) {
1955                 err = __live_pphwsp_runtime(engine);
1956                 if (err)
1957                         break;
1958         }
1959
1960         if (igt_flush_test(gt->i915))
1961                 err = -EIO;
1962
1963         return err;
1964 }
1965
1966 int intel_lrc_live_selftests(struct drm_i915_private *i915)
1967 {
1968         static const struct i915_subtest tests[] = {
1969                 SUBTEST(live_lrc_layout),
1970                 SUBTEST(live_lrc_fixed),
1971                 SUBTEST(live_lrc_state),
1972                 SUBTEST(live_lrc_gpr),
1973                 SUBTEST(live_lrc_isolation),
1974                 SUBTEST(live_lrc_timestamp),
1975                 SUBTEST(live_lrc_garbage),
1976                 SUBTEST(live_pphwsp_runtime),
1977                 SUBTEST(live_lrc_indirect_ctx_bb),
1978                 SUBTEST(live_lrc_per_ctx_bb),
1979         };
1980
1981         if (!HAS_LOGICAL_RING_CONTEXTS(i915))
1982                 return 0;
1983
1984         return intel_gt_live_subtests(tests, to_gt(i915));
1985 }
This page took 0.138151 seconds and 4 git commands to generate.