]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/gt/selftest_timeline.c
Linux 6.14-rc3
[linux.git] / drivers / gpu / drm / i915 / gt / selftest_timeline.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2017-2018 Intel Corporation
4  */
5
6 #include <linux/prime_numbers.h>
7 #include <linux/string_helpers.h>
8
9 #include "intel_context.h"
10 #include "intel_engine_heartbeat.h"
11 #include "intel_engine_pm.h"
12 #include "intel_engine_regs.h"
13 #include "intel_gpu_commands.h"
14 #include "intel_gt.h"
15 #include "intel_gt_requests.h"
16 #include "intel_ring.h"
17 #include "selftest_engine_heartbeat.h"
18
19 #include "../selftests/i915_random.h"
20 #include "../i915_selftest.h"
21
22 #include "selftests/igt_flush_test.h"
23 #include "selftests/lib_sw_fence.h"
24 #include "selftests/mock_gem_device.h"
25 #include "selftests/mock_timeline.h"
26
27 static struct page *hwsp_page(struct intel_timeline *tl)
28 {
29         struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
30
31         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
32         return sg_page(obj->mm.pages->sgl);
33 }
34
35 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
36 {
37         unsigned long address = (unsigned long)page_address(hwsp_page(tl));
38
39         return (address + offset_in_page(tl->hwsp_offset)) / TIMELINE_SEQNO_BYTES;
40 }
41
42 static int selftest_tl_pin(struct intel_timeline *tl)
43 {
44         struct i915_gem_ww_ctx ww;
45         int err;
46
47         i915_gem_ww_ctx_init(&ww, false);
48 retry:
49         err = i915_gem_object_lock(tl->hwsp_ggtt->obj, &ww);
50         if (!err)
51                 err = intel_timeline_pin(tl, &ww);
52
53         if (err == -EDEADLK) {
54                 err = i915_gem_ww_ctx_backoff(&ww);
55                 if (!err)
56                         goto retry;
57         }
58         i915_gem_ww_ctx_fini(&ww);
59         return err;
60 }
61
62 /* Only half of seqno's are usable, see __intel_timeline_get_seqno() */
63 #define CACHELINES_PER_PAGE (PAGE_SIZE / TIMELINE_SEQNO_BYTES / 2)
64
65 struct mock_hwsp_freelist {
66         struct intel_gt *gt;
67         struct radix_tree_root cachelines;
68         struct intel_timeline **history;
69         unsigned long count, max;
70         struct rnd_state prng;
71 };
72
73 enum {
74         SHUFFLE = BIT(0),
75 };
76
77 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
78                                unsigned int idx,
79                                struct intel_timeline *tl)
80 {
81         tl = xchg(&state->history[idx], tl);
82         if (tl) {
83                 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
84                 intel_timeline_unpin(tl);
85                 intel_timeline_put(tl);
86         }
87 }
88
89 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
90                                 unsigned int count,
91                                 unsigned int flags)
92 {
93         struct intel_timeline *tl;
94         unsigned int idx;
95
96         while (count--) {
97                 unsigned long cacheline;
98                 int err;
99
100                 tl = intel_timeline_create(state->gt);
101                 if (IS_ERR(tl))
102                         return PTR_ERR(tl);
103
104                 err = selftest_tl_pin(tl);
105                 if (err) {
106                         intel_timeline_put(tl);
107                         return err;
108                 }
109
110                 cacheline = hwsp_cacheline(tl);
111                 err = radix_tree_insert(&state->cachelines, cacheline, tl);
112                 if (err) {
113                         if (err == -EEXIST) {
114                                 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
115                                        cacheline);
116                         }
117                         intel_timeline_unpin(tl);
118                         intel_timeline_put(tl);
119                         return err;
120                 }
121
122                 idx = state->count++ % state->max;
123                 __mock_hwsp_record(state, idx, tl);
124         }
125
126         if (flags & SHUFFLE)
127                 i915_prandom_shuffle(state->history,
128                                      sizeof(*state->history),
129                                      min(state->count, state->max),
130                                      &state->prng);
131
132         count = i915_prandom_u32_max_state(min(state->count, state->max),
133                                            &state->prng);
134         while (count--) {
135                 idx = --state->count % state->max;
136                 __mock_hwsp_record(state, idx, NULL);
137         }
138
139         return 0;
140 }
141
142 static int mock_hwsp_freelist(void *arg)
143 {
144         struct mock_hwsp_freelist state;
145         struct drm_i915_private *i915;
146         const struct {
147                 const char *name;
148                 unsigned int flags;
149         } phases[] = {
150                 { "linear", 0 },
151                 { "shuffled", SHUFFLE },
152                 { },
153         }, *p;
154         unsigned int na;
155         int err = 0;
156
157         i915 = mock_gem_device();
158         if (!i915)
159                 return -ENOMEM;
160
161         INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
162         state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
163
164         state.gt = to_gt(i915);
165
166         /*
167          * Create a bunch of timelines and check that their HWSP do not overlap.
168          * Free some, and try again.
169          */
170
171         state.max = PAGE_SIZE / sizeof(*state.history);
172         state.count = 0;
173         state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
174         if (!state.history) {
175                 err = -ENOMEM;
176                 goto err_put;
177         }
178
179         for (p = phases; p->name; p++) {
180                 pr_debug("%s(%s)\n", __func__, p->name);
181                 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
182                         err = __mock_hwsp_timeline(&state, na, p->flags);
183                         if (err)
184                                 goto out;
185                 }
186         }
187
188 out:
189         for (na = 0; na < state.max; na++)
190                 __mock_hwsp_record(&state, na, NULL);
191         kfree(state.history);
192 err_put:
193         mock_destroy_device(i915);
194         return err;
195 }
196
197 struct __igt_sync {
198         const char *name;
199         u32 seqno;
200         bool expected;
201         bool set;
202 };
203
204 static int __igt_sync(struct intel_timeline *tl,
205                       u64 ctx,
206                       const struct __igt_sync *p,
207                       const char *name)
208 {
209         int ret;
210
211         if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
212                 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
213                        name, p->name, ctx, p->seqno, str_yes_no(p->expected));
214                 return -EINVAL;
215         }
216
217         if (p->set) {
218                 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
219                 if (ret)
220                         return ret;
221         }
222
223         return 0;
224 }
225
226 static int igt_sync(void *arg)
227 {
228         const struct __igt_sync pass[] = {
229                 { "unset", 0, false, false },
230                 { "new", 0, false, true },
231                 { "0a", 0, true, true },
232                 { "1a", 1, false, true },
233                 { "1b", 1, true, true },
234                 { "0b", 0, true, false },
235                 { "2a", 2, false, true },
236                 { "4", 4, false, true },
237                 { "INT_MAX", INT_MAX, false, true },
238                 { "INT_MAX-1", INT_MAX-1, true, false },
239                 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
240                 { "INT_MAX", INT_MAX, true, false },
241                 { "UINT_MAX", UINT_MAX, false, true },
242                 { "wrap", 0, false, true },
243                 { "unwrap", UINT_MAX, true, false },
244                 {},
245         }, *p;
246         struct intel_timeline tl;
247         int order, offset;
248         int ret = -ENODEV;
249
250         mock_timeline_init(&tl, 0);
251         for (p = pass; p->name; p++) {
252                 for (order = 1; order < 64; order++) {
253                         for (offset = -1; offset <= (order > 1); offset++) {
254                                 u64 ctx = BIT_ULL(order) + offset;
255
256                                 ret = __igt_sync(&tl, ctx, p, "1");
257                                 if (ret)
258                                         goto out;
259                         }
260                 }
261         }
262         mock_timeline_fini(&tl);
263
264         mock_timeline_init(&tl, 0);
265         for (order = 1; order < 64; order++) {
266                 for (offset = -1; offset <= (order > 1); offset++) {
267                         u64 ctx = BIT_ULL(order) + offset;
268
269                         for (p = pass; p->name; p++) {
270                                 ret = __igt_sync(&tl, ctx, p, "2");
271                                 if (ret)
272                                         goto out;
273                         }
274                 }
275         }
276
277 out:
278         mock_timeline_fini(&tl);
279         return ret;
280 }
281
282 static unsigned int random_engine(struct rnd_state *rnd)
283 {
284         return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
285 }
286
287 static int bench_sync(void *arg)
288 {
289         struct rnd_state prng;
290         struct intel_timeline tl;
291         unsigned long end_time, count;
292         u64 prng32_1M;
293         ktime_t kt;
294         int order, last_order;
295
296         mock_timeline_init(&tl, 0);
297
298         /* Lookups from cache are very fast and so the random number generation
299          * and the loop itself becomes a significant factor in the per-iteration
300          * timings. We try to compensate the results by measuring the overhead
301          * of the prng and subtract it from the reported results.
302          */
303         prandom_seed_state(&prng, i915_selftest.random_seed);
304         count = 0;
305         kt = ktime_get();
306         end_time = jiffies + HZ/10;
307         do {
308                 u32 x;
309
310                 /* Make sure the compiler doesn't optimise away the prng call */
311                 WRITE_ONCE(x, prandom_u32_state(&prng));
312
313                 count++;
314         } while (!time_after(jiffies, end_time));
315         kt = ktime_sub(ktime_get(), kt);
316         pr_debug("%s: %lu random evaluations, %lluns/prng\n",
317                  __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
318         prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
319
320         /* Benchmark (only) setting random context ids */
321         prandom_seed_state(&prng, i915_selftest.random_seed);
322         count = 0;
323         kt = ktime_get();
324         end_time = jiffies + HZ/10;
325         do {
326                 u64 id = i915_prandom_u64_state(&prng);
327
328                 __intel_timeline_sync_set(&tl, id, 0);
329                 count++;
330         } while (!time_after(jiffies, end_time));
331         kt = ktime_sub(ktime_get(), kt);
332         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
333         pr_info("%s: %lu random insertions, %lluns/insert\n",
334                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
335
336         /* Benchmark looking up the exact same context ids as we just set */
337         prandom_seed_state(&prng, i915_selftest.random_seed);
338         end_time = count;
339         kt = ktime_get();
340         while (end_time--) {
341                 u64 id = i915_prandom_u64_state(&prng);
342
343                 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
344                         mock_timeline_fini(&tl);
345                         pr_err("Lookup of %llu failed\n", id);
346                         return -EINVAL;
347                 }
348         }
349         kt = ktime_sub(ktime_get(), kt);
350         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
351         pr_info("%s: %lu random lookups, %lluns/lookup\n",
352                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
353
354         mock_timeline_fini(&tl);
355         cond_resched();
356
357         mock_timeline_init(&tl, 0);
358
359         /* Benchmark setting the first N (in order) contexts */
360         count = 0;
361         kt = ktime_get();
362         end_time = jiffies + HZ/10;
363         do {
364                 __intel_timeline_sync_set(&tl, count++, 0);
365         } while (!time_after(jiffies, end_time));
366         kt = ktime_sub(ktime_get(), kt);
367         pr_info("%s: %lu in-order insertions, %lluns/insert\n",
368                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
369
370         /* Benchmark looking up the exact same context ids as we just set */
371         end_time = count;
372         kt = ktime_get();
373         while (end_time--) {
374                 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
375                         pr_err("Lookup of %lu failed\n", end_time);
376                         mock_timeline_fini(&tl);
377                         return -EINVAL;
378                 }
379         }
380         kt = ktime_sub(ktime_get(), kt);
381         pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
382                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
383
384         mock_timeline_fini(&tl);
385         cond_resched();
386
387         mock_timeline_init(&tl, 0);
388
389         /* Benchmark searching for a random context id and maybe changing it */
390         prandom_seed_state(&prng, i915_selftest.random_seed);
391         count = 0;
392         kt = ktime_get();
393         end_time = jiffies + HZ/10;
394         do {
395                 u32 id = random_engine(&prng);
396                 u32 seqno = prandom_u32_state(&prng);
397
398                 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
399                         __intel_timeline_sync_set(&tl, id, seqno);
400
401                 count++;
402         } while (!time_after(jiffies, end_time));
403         kt = ktime_sub(ktime_get(), kt);
404         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
405         pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
406                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
407         mock_timeline_fini(&tl);
408         cond_resched();
409
410         /* Benchmark searching for a known context id and changing the seqno */
411         for (last_order = 1, order = 1; order < 32;
412              ({ int tmp = last_order; last_order = order; order += tmp; })) {
413                 unsigned int mask = BIT(order) - 1;
414
415                 mock_timeline_init(&tl, 0);
416
417                 count = 0;
418                 kt = ktime_get();
419                 end_time = jiffies + HZ/10;
420                 do {
421                         /* Without assuming too many details of the underlying
422                          * implementation, try to identify its phase-changes
423                          * (if any)!
424                          */
425                         u64 id = (u64)(count & mask) << order;
426
427                         __intel_timeline_sync_is_later(&tl, id, 0);
428                         __intel_timeline_sync_set(&tl, id, 0);
429
430                         count++;
431                 } while (!time_after(jiffies, end_time));
432                 kt = ktime_sub(ktime_get(), kt);
433                 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
434                         __func__, count, order,
435                         (long long)div64_ul(ktime_to_ns(kt), count));
436                 mock_timeline_fini(&tl);
437                 cond_resched();
438         }
439
440         return 0;
441 }
442
443 int intel_timeline_mock_selftests(void)
444 {
445         static const struct i915_subtest tests[] = {
446                 SUBTEST(mock_hwsp_freelist),
447                 SUBTEST(igt_sync),
448                 SUBTEST(bench_sync),
449         };
450
451         return i915_subtests(tests, NULL);
452 }
453
454 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
455 {
456         u32 *cs;
457
458         cs = intel_ring_begin(rq, 4);
459         if (IS_ERR(cs))
460                 return PTR_ERR(cs);
461
462         if (GRAPHICS_VER(rq->i915) >= 8) {
463                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
464                 *cs++ = addr;
465                 *cs++ = 0;
466                 *cs++ = value;
467         } else if (GRAPHICS_VER(rq->i915) >= 4) {
468                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
469                 *cs++ = 0;
470                 *cs++ = addr;
471                 *cs++ = value;
472         } else {
473                 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
474                 *cs++ = addr;
475                 *cs++ = value;
476                 *cs++ = MI_NOOP;
477         }
478
479         intel_ring_advance(rq, cs);
480
481         return 0;
482 }
483
484 static struct i915_request *
485 checked_tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
486 {
487         struct i915_request *rq;
488         int err;
489
490         err = selftest_tl_pin(tl);
491         if (err) {
492                 rq = ERR_PTR(err);
493                 goto out;
494         }
495
496         if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) {
497                 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
498                        *tl->hwsp_seqno, tl->seqno);
499                 intel_timeline_unpin(tl);
500                 return ERR_PTR(-EINVAL);
501         }
502
503         rq = intel_engine_create_kernel_request(engine);
504         if (IS_ERR(rq))
505                 goto out_unpin;
506
507         i915_request_get(rq);
508
509         err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
510         i915_request_add(rq);
511         if (err) {
512                 i915_request_put(rq);
513                 rq = ERR_PTR(err);
514         }
515
516 out_unpin:
517         intel_timeline_unpin(tl);
518 out:
519         if (IS_ERR(rq))
520                 pr_err("Failed to write to timeline!\n");
521         return rq;
522 }
523
524 static int live_hwsp_engine(void *arg)
525 {
526 #define NUM_TIMELINES 4096
527         struct intel_gt *gt = arg;
528         struct intel_timeline **timelines;
529         struct intel_engine_cs *engine;
530         enum intel_engine_id id;
531         unsigned long count, n;
532         int err = 0;
533
534         /*
535          * Create a bunch of timelines and check we can write
536          * independently to each of their breadcrumb slots.
537          */
538
539         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
540                                    sizeof(*timelines),
541                                    GFP_KERNEL);
542         if (!timelines)
543                 return -ENOMEM;
544
545         count = 0;
546         for_each_engine(engine, gt, id) {
547                 if (!intel_engine_can_store_dword(engine))
548                         continue;
549
550                 intel_engine_pm_get(engine);
551
552                 for (n = 0; n < NUM_TIMELINES; n++) {
553                         struct intel_timeline *tl;
554                         struct i915_request *rq;
555
556                         tl = intel_timeline_create(gt);
557                         if (IS_ERR(tl)) {
558                                 err = PTR_ERR(tl);
559                                 break;
560                         }
561
562                         rq = checked_tl_write(tl, engine, count);
563                         if (IS_ERR(rq)) {
564                                 intel_timeline_put(tl);
565                                 err = PTR_ERR(rq);
566                                 break;
567                         }
568
569                         timelines[count++] = tl;
570                         i915_request_put(rq);
571                 }
572
573                 intel_engine_pm_put(engine);
574                 if (err)
575                         break;
576         }
577
578         if (igt_flush_test(gt->i915))
579                 err = -EIO;
580
581         for (n = 0; n < count; n++) {
582                 struct intel_timeline *tl = timelines[n];
583
584                 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
585                         GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
586                                       n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
587                         GEM_TRACE_DUMP();
588                         err = -EINVAL;
589                 }
590                 intel_timeline_put(tl);
591         }
592
593         kvfree(timelines);
594         return err;
595 #undef NUM_TIMELINES
596 }
597
598 static int live_hwsp_alternate(void *arg)
599 {
600 #define NUM_TIMELINES 4096
601         struct intel_gt *gt = arg;
602         struct intel_timeline **timelines;
603         struct intel_engine_cs *engine;
604         enum intel_engine_id id;
605         unsigned long count, n;
606         int err = 0;
607
608         /*
609          * Create a bunch of timelines and check we can write
610          * independently to each of their breadcrumb slots with adjacent
611          * engines.
612          */
613
614         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
615                                    sizeof(*timelines),
616                                    GFP_KERNEL);
617         if (!timelines)
618                 return -ENOMEM;
619
620         count = 0;
621         for (n = 0; n < NUM_TIMELINES; n++) {
622                 for_each_engine(engine, gt, id) {
623                         struct intel_timeline *tl;
624                         struct i915_request *rq;
625
626                         if (!intel_engine_can_store_dword(engine))
627                                 continue;
628
629                         tl = intel_timeline_create(gt);
630                         if (IS_ERR(tl)) {
631                                 err = PTR_ERR(tl);
632                                 goto out;
633                         }
634
635                         intel_engine_pm_get(engine);
636                         rq = checked_tl_write(tl, engine, count);
637                         intel_engine_pm_put(engine);
638                         if (IS_ERR(rq)) {
639                                 intel_timeline_put(tl);
640                                 err = PTR_ERR(rq);
641                                 goto out;
642                         }
643
644                         timelines[count++] = tl;
645                         i915_request_put(rq);
646                 }
647         }
648
649 out:
650         if (igt_flush_test(gt->i915))
651                 err = -EIO;
652
653         for (n = 0; n < count; n++) {
654                 struct intel_timeline *tl = timelines[n];
655
656                 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
657                         GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
658                                       n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
659                         GEM_TRACE_DUMP();
660                         err = -EINVAL;
661                 }
662                 intel_timeline_put(tl);
663         }
664
665         kvfree(timelines);
666         return err;
667 #undef NUM_TIMELINES
668 }
669
670 static int live_hwsp_wrap(void *arg)
671 {
672         struct intel_gt *gt = arg;
673         struct intel_engine_cs *engine;
674         struct intel_timeline *tl;
675         enum intel_engine_id id;
676         int err = 0;
677
678         /*
679          * Across a seqno wrap, we need to keep the old cacheline alive for
680          * foreign GPU references.
681          */
682
683         tl = intel_timeline_create(gt);
684         if (IS_ERR(tl))
685                 return PTR_ERR(tl);
686
687         if (!tl->has_initial_breadcrumb)
688                 goto out_free;
689
690         err = selftest_tl_pin(tl);
691         if (err)
692                 goto out_free;
693
694         for_each_engine(engine, gt, id) {
695                 const u32 *hwsp_seqno[2];
696                 struct i915_request *rq;
697                 u32 seqno[2];
698
699                 if (!intel_engine_can_store_dword(engine))
700                         continue;
701
702                 rq = intel_engine_create_kernel_request(engine);
703                 if (IS_ERR(rq)) {
704                         err = PTR_ERR(rq);
705                         goto out;
706                 }
707
708                 tl->seqno = -4u;
709
710                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
711                 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
712                 mutex_unlock(&tl->mutex);
713                 if (err) {
714                         i915_request_add(rq);
715                         goto out;
716                 }
717                 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
718                          seqno[0], tl->hwsp_offset);
719
720                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
721                 if (err) {
722                         i915_request_add(rq);
723                         goto out;
724                 }
725                 hwsp_seqno[0] = tl->hwsp_seqno;
726
727                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
728                 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
729                 mutex_unlock(&tl->mutex);
730                 if (err) {
731                         i915_request_add(rq);
732                         goto out;
733                 }
734                 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
735                          seqno[1], tl->hwsp_offset);
736
737                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
738                 if (err) {
739                         i915_request_add(rq);
740                         goto out;
741                 }
742                 hwsp_seqno[1] = tl->hwsp_seqno;
743
744                 /* With wrap should come a new hwsp */
745                 GEM_BUG_ON(seqno[1] >= seqno[0]);
746                 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
747
748                 i915_request_add(rq);
749
750                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
751                         pr_err("Wait for timeline writes timed out!\n");
752                         err = -EIO;
753                         goto out;
754                 }
755
756                 if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] ||
757                     READ_ONCE(*hwsp_seqno[1]) != seqno[1]) {
758                         pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
759                                *hwsp_seqno[0], *hwsp_seqno[1],
760                                seqno[0], seqno[1]);
761                         err = -EINVAL;
762                         goto out;
763                 }
764
765                 intel_gt_retire_requests(gt); /* recycle HWSP */
766         }
767
768 out:
769         if (igt_flush_test(gt->i915))
770                 err = -EIO;
771
772         intel_timeline_unpin(tl);
773 out_free:
774         intel_timeline_put(tl);
775         return err;
776 }
777
778 static int emit_read_hwsp(struct i915_request *rq,
779                           u32 seqno, u32 hwsp,
780                           u32 *addr)
781 {
782         const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0));
783         u32 *cs;
784
785         cs = intel_ring_begin(rq, 12);
786         if (IS_ERR(cs))
787                 return PTR_ERR(cs);
788
789         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
790         *cs++ = *addr;
791         *cs++ = 0;
792         *cs++ = seqno;
793         *addr += 4;
794
795         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT;
796         *cs++ = gpr;
797         *cs++ = hwsp;
798         *cs++ = 0;
799
800         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
801         *cs++ = gpr;
802         *cs++ = *addr;
803         *cs++ = 0;
804         *addr += 4;
805
806         intel_ring_advance(rq, cs);
807
808         return 0;
809 }
810
811 struct hwsp_watcher {
812         struct i915_vma *vma;
813         struct i915_request *rq;
814         u32 addr;
815         u32 *map;
816 };
817
818 static bool cmp_lt(u32 a, u32 b)
819 {
820         return a < b;
821 }
822
823 static bool cmp_gte(u32 a, u32 b)
824 {
825         return a >= b;
826 }
827
828 static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt,
829                          struct intel_timeline *tl)
830 {
831         struct drm_i915_gem_object *obj;
832         struct i915_vma *vma;
833
834         obj = i915_gem_object_create_internal(gt->i915, SZ_2M);
835         if (IS_ERR(obj))
836                 return PTR_ERR(obj);
837
838         /* keep the same cache settings as timeline */
839         i915_gem_object_set_pat_index(obj, tl->hwsp_ggtt->obj->pat_index);
840         w->map = i915_gem_object_pin_map_unlocked(obj,
841                                                   page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping));
842         if (IS_ERR(w->map)) {
843                 i915_gem_object_put(obj);
844                 return PTR_ERR(w->map);
845         }
846
847         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
848         if (IS_ERR(vma)) {
849                 i915_gem_object_put(obj);
850                 return PTR_ERR(vma);
851         }
852
853         w->vma = vma;
854         w->addr = i915_ggtt_offset(vma);
855         return 0;
856 }
857
858 static void switch_tl_lock(struct i915_request *from, struct i915_request *to)
859 {
860         /* some light mutex juggling required; think co-routines */
861
862         if (from) {
863                 lockdep_unpin_lock(&from->context->timeline->mutex, from->cookie);
864                 mutex_unlock(&from->context->timeline->mutex);
865         }
866
867         if (to) {
868                 mutex_lock(&to->context->timeline->mutex);
869                 to->cookie = lockdep_pin_lock(&to->context->timeline->mutex);
870         }
871 }
872
873 static int create_watcher(struct hwsp_watcher *w,
874                           struct intel_engine_cs *engine,
875                           int ringsz)
876 {
877         struct intel_context *ce;
878
879         ce = intel_context_create(engine);
880         if (IS_ERR(ce))
881                 return PTR_ERR(ce);
882
883         ce->ring_size = ringsz;
884         w->rq = intel_context_create_request(ce);
885         intel_context_put(ce);
886         if (IS_ERR(w->rq))
887                 return PTR_ERR(w->rq);
888
889         w->addr = i915_ggtt_offset(w->vma);
890
891         switch_tl_lock(w->rq, NULL);
892
893         return 0;
894 }
895
896 static int check_watcher(struct hwsp_watcher *w, const char *name,
897                          bool (*op)(u32 hwsp, u32 seqno))
898 {
899         struct i915_request *rq = fetch_and_zero(&w->rq);
900         u32 offset, end;
901         int err;
902
903         GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size);
904
905         i915_request_get(rq);
906         switch_tl_lock(NULL, rq);
907         i915_request_add(rq);
908
909         if (i915_request_wait(rq, 0, HZ) < 0) {
910                 err = -ETIME;
911                 goto out;
912         }
913
914         err = 0;
915         offset = 0;
916         end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map);
917         while (offset < end) {
918                 if (!op(w->map[offset + 1], w->map[offset])) {
919                         pr_err("Watcher '%s' found HWSP value %x for seqno %x\n",
920                                name, w->map[offset + 1], w->map[offset]);
921                         err = -EINVAL;
922                 }
923
924                 offset += 2;
925         }
926
927 out:
928         i915_request_put(rq);
929         return err;
930 }
931
932 static void cleanup_watcher(struct hwsp_watcher *w)
933 {
934         if (w->rq) {
935                 switch_tl_lock(NULL, w->rq);
936
937                 i915_request_add(w->rq);
938         }
939
940         i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP);
941 }
942
943 static bool retire_requests(struct intel_timeline *tl)
944 {
945         struct i915_request *rq, *rn;
946
947         mutex_lock(&tl->mutex);
948         list_for_each_entry_safe(rq, rn, &tl->requests, link)
949                 if (!i915_request_retire(rq))
950                         break;
951         mutex_unlock(&tl->mutex);
952
953         return !i915_active_fence_isset(&tl->last_request);
954 }
955
956 static struct i915_request *wrap_timeline(struct i915_request *rq)
957 {
958         struct intel_context *ce = rq->context;
959         struct intel_timeline *tl = ce->timeline;
960         u32 seqno = rq->fence.seqno;
961
962         while (tl->seqno >= seqno) { /* Cause a wrap */
963                 i915_request_put(rq);
964                 rq = intel_context_create_request(ce);
965                 if (IS_ERR(rq))
966                         return rq;
967
968                 i915_request_get(rq);
969                 i915_request_add(rq);
970         }
971
972         i915_request_put(rq);
973         rq = i915_request_create(ce);
974         if (IS_ERR(rq))
975                 return rq;
976
977         i915_request_get(rq);
978         i915_request_add(rq);
979
980         return rq;
981 }
982
983 static int live_hwsp_read(void *arg)
984 {
985         struct intel_gt *gt = arg;
986         struct hwsp_watcher watcher[2] = {};
987         struct intel_engine_cs *engine;
988         struct intel_timeline *tl;
989         enum intel_engine_id id;
990         int err = 0;
991         int i;
992
993         /*
994          * If we take a reference to the HWSP for reading on the GPU, that
995          * read may be arbitrarily delayed (either by foreign fence or
996          * priority saturation) and a wrap can happen within 30 minutes.
997          * When the GPU read is finally submitted it should be correct,
998          * even across multiple wraps.
999          */
1000
1001         if (GRAPHICS_VER(gt->i915) < 8) /* CS convenience [SRM/LRM] */
1002                 return 0;
1003
1004         tl = intel_timeline_create(gt);
1005         if (IS_ERR(tl))
1006                 return PTR_ERR(tl);
1007
1008         if (!tl->has_initial_breadcrumb)
1009                 goto out_free;
1010
1011         selftest_tl_pin(tl);
1012
1013         for (i = 0; i < ARRAY_SIZE(watcher); i++) {
1014                 err = setup_watcher(&watcher[i], gt, tl);
1015                 if (err)
1016                         goto out;
1017         }
1018
1019         for_each_engine(engine, gt, id) {
1020                 struct intel_context *ce;
1021                 unsigned long count = 0;
1022                 IGT_TIMEOUT(end_time);
1023
1024                 /* Create a request we can use for remote reading of the HWSP */
1025                 err = create_watcher(&watcher[1], engine, SZ_512K);
1026                 if (err)
1027                         goto out;
1028
1029                 do {
1030                         struct i915_sw_fence *submit;
1031                         struct i915_request *rq;
1032                         u32 hwsp, dummy;
1033
1034                         submit = heap_fence_create(GFP_KERNEL);
1035                         if (!submit) {
1036                                 err = -ENOMEM;
1037                                 goto out;
1038                         }
1039
1040                         err = create_watcher(&watcher[0], engine, SZ_4K);
1041                         if (err)
1042                                 goto out;
1043
1044                         ce = intel_context_create(engine);
1045                         if (IS_ERR(ce)) {
1046                                 err = PTR_ERR(ce);
1047                                 goto out;
1048                         }
1049
1050                         ce->timeline = intel_timeline_get(tl);
1051
1052                         /* Ensure timeline is mapped, done during first pin */
1053                         err = intel_context_pin(ce);
1054                         if (err) {
1055                                 intel_context_put(ce);
1056                                 goto out;
1057                         }
1058
1059                         /*
1060                          * Start at a new wrap, and set seqno right before another wrap,
1061                          * saving 30 minutes of nops
1062                          */
1063                         tl->seqno = -12u + 2 * (count & 3);
1064                         __intel_timeline_get_seqno(tl, &dummy);
1065
1066                         rq = i915_request_create(ce);
1067                         if (IS_ERR(rq)) {
1068                                 err = PTR_ERR(rq);
1069                                 intel_context_unpin(ce);
1070                                 intel_context_put(ce);
1071                                 goto out;
1072                         }
1073
1074                         err = i915_sw_fence_await_dma_fence(&rq->submit,
1075                                                             &watcher[0].rq->fence, 0,
1076                                                             GFP_KERNEL);
1077                         if (err < 0) {
1078                                 i915_request_add(rq);
1079                                 intel_context_unpin(ce);
1080                                 intel_context_put(ce);
1081                                 goto out;
1082                         }
1083
1084                         switch_tl_lock(rq, watcher[0].rq);
1085                         err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp);
1086                         if (err == 0)
1087                                 err = emit_read_hwsp(watcher[0].rq, /* before */
1088                                                      rq->fence.seqno, hwsp,
1089                                                      &watcher[0].addr);
1090                         switch_tl_lock(watcher[0].rq, rq);
1091                         if (err) {
1092                                 i915_request_add(rq);
1093                                 intel_context_unpin(ce);
1094                                 intel_context_put(ce);
1095                                 goto out;
1096                         }
1097
1098                         switch_tl_lock(rq, watcher[1].rq);
1099                         err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp);
1100                         if (err == 0)
1101                                 err = emit_read_hwsp(watcher[1].rq, /* after */
1102                                                      rq->fence.seqno, hwsp,
1103                                                      &watcher[1].addr);
1104                         switch_tl_lock(watcher[1].rq, rq);
1105                         if (err) {
1106                                 i915_request_add(rq);
1107                                 intel_context_unpin(ce);
1108                                 intel_context_put(ce);
1109                                 goto out;
1110                         }
1111
1112                         i915_request_get(rq);
1113                         i915_request_add(rq);
1114
1115                         rq = wrap_timeline(rq);
1116                         intel_context_unpin(ce);
1117                         intel_context_put(ce);
1118                         if (IS_ERR(rq)) {
1119                                 err = PTR_ERR(rq);
1120                                 goto out;
1121                         }
1122
1123                         err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit,
1124                                                             &rq->fence, 0,
1125                                                             GFP_KERNEL);
1126                         if (err < 0) {
1127                                 i915_request_put(rq);
1128                                 goto out;
1129                         }
1130
1131                         err = check_watcher(&watcher[0], "before", cmp_lt);
1132                         i915_sw_fence_commit(submit);
1133                         heap_fence_put(submit);
1134                         if (err) {
1135                                 i915_request_put(rq);
1136                                 goto out;
1137                         }
1138                         count++;
1139
1140                         /* Flush the timeline before manually wrapping again */
1141                         if (i915_request_wait(rq,
1142                                               I915_WAIT_INTERRUPTIBLE,
1143                                               HZ) < 0) {
1144                                 err = -ETIME;
1145                                 i915_request_put(rq);
1146                                 goto out;
1147                         }
1148                         retire_requests(tl);
1149                         i915_request_put(rq);
1150
1151                         /* Single requests are limited to half a ring at most */
1152                         if (8 * watcher[1].rq->ring->emit >
1153                             3 * watcher[1].rq->ring->size)
1154                                 break;
1155
1156                 } while (!__igt_timeout(end_time, NULL) &&
1157                          count < (PAGE_SIZE / TIMELINE_SEQNO_BYTES - 1) / 2);
1158
1159                 pr_info("%s: simulated %lu wraps\n", engine->name, count);
1160                 err = check_watcher(&watcher[1], "after", cmp_gte);
1161                 if (err)
1162                         goto out;
1163         }
1164
1165 out:
1166         for (i = 0; i < ARRAY_SIZE(watcher); i++)
1167                 cleanup_watcher(&watcher[i]);
1168
1169         intel_timeline_unpin(tl);
1170
1171         if (igt_flush_test(gt->i915))
1172                 err = -EIO;
1173
1174 out_free:
1175         intel_timeline_put(tl);
1176         return err;
1177 }
1178
1179 static int live_hwsp_rollover_kernel(void *arg)
1180 {
1181         struct intel_gt *gt = arg;
1182         struct intel_engine_cs *engine;
1183         enum intel_engine_id id;
1184         int err = 0;
1185
1186         /*
1187          * Run the host for long enough, and even the kernel context will
1188          * see a seqno rollover.
1189          */
1190
1191         for_each_engine(engine, gt, id) {
1192                 struct intel_context *ce = engine->kernel_context;
1193                 struct intel_timeline *tl = ce->timeline;
1194                 struct i915_request *rq[3] = {};
1195                 int i;
1196
1197                 st_engine_heartbeat_disable(engine);
1198                 if (intel_gt_wait_for_idle(gt, HZ / 2)) {
1199                         err = -EIO;
1200                         goto out;
1201                 }
1202
1203                 GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
1204                 tl->seqno = -2u;
1205                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
1206
1207                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1208                         struct i915_request *this;
1209
1210                         this = i915_request_create(ce);
1211                         if (IS_ERR(this)) {
1212                                 err = PTR_ERR(this);
1213                                 goto out;
1214                         }
1215
1216                         pr_debug("%s: create fence.seqnp:%d\n",
1217                                  engine->name,
1218                                  lower_32_bits(this->fence.seqno));
1219
1220                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1221
1222                         rq[i] = i915_request_get(this);
1223                         i915_request_add(this);
1224                 }
1225
1226                 /* We expected a wrap! */
1227                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
1228
1229                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1230                         pr_err("Wait for timeline wrap timed out!\n");
1231                         err = -EIO;
1232                         goto out;
1233                 }
1234
1235                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1236                         if (!i915_request_completed(rq[i])) {
1237                                 pr_err("Pre-wrap request not completed!\n");
1238                                 err = -EINVAL;
1239                                 goto out;
1240                         }
1241                 }
1242
1243 out:
1244                 for (i = 0; i < ARRAY_SIZE(rq); i++)
1245                         i915_request_put(rq[i]);
1246                 st_engine_heartbeat_enable(engine);
1247                 if (err)
1248                         break;
1249         }
1250
1251         if (igt_flush_test(gt->i915))
1252                 err = -EIO;
1253
1254         return err;
1255 }
1256
1257 static int live_hwsp_rollover_user(void *arg)
1258 {
1259         struct intel_gt *gt = arg;
1260         struct intel_engine_cs *engine;
1261         enum intel_engine_id id;
1262         int err = 0;
1263
1264         /*
1265          * Simulate a long running user context, and force the seqno wrap
1266          * on the user's timeline.
1267          */
1268
1269         for_each_engine(engine, gt, id) {
1270                 struct i915_request *rq[3] = {};
1271                 struct intel_timeline *tl;
1272                 struct intel_context *ce;
1273                 int i;
1274
1275                 ce = intel_context_create(engine);
1276                 if (IS_ERR(ce))
1277                         return PTR_ERR(ce);
1278
1279                 err = intel_context_alloc_state(ce);
1280                 if (err)
1281                         goto out;
1282
1283                 tl = ce->timeline;
1284                 if (!tl->has_initial_breadcrumb)
1285                         goto out;
1286
1287                 err = intel_context_pin(ce);
1288                 if (err)
1289                         goto out;
1290
1291                 tl->seqno = -4u;
1292                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
1293
1294                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1295                         struct i915_request *this;
1296
1297                         this = intel_context_create_request(ce);
1298                         if (IS_ERR(this)) {
1299                                 err = PTR_ERR(this);
1300                                 goto out_unpin;
1301                         }
1302
1303                         pr_debug("%s: create fence.seqnp:%d\n",
1304                                  engine->name,
1305                                  lower_32_bits(this->fence.seqno));
1306
1307                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1308
1309                         rq[i] = i915_request_get(this);
1310                         i915_request_add(this);
1311                 }
1312
1313                 /* We expected a wrap! */
1314                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
1315
1316                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1317                         pr_err("Wait for timeline wrap timed out!\n");
1318                         err = -EIO;
1319                         goto out_unpin;
1320                 }
1321
1322                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1323                         if (!i915_request_completed(rq[i])) {
1324                                 pr_err("Pre-wrap request not completed!\n");
1325                                 err = -EINVAL;
1326                                 goto out_unpin;
1327                         }
1328                 }
1329 out_unpin:
1330                 intel_context_unpin(ce);
1331 out:
1332                 for (i = 0; i < ARRAY_SIZE(rq); i++)
1333                         i915_request_put(rq[i]);
1334                 intel_context_put(ce);
1335                 if (err)
1336                         break;
1337         }
1338
1339         if (igt_flush_test(gt->i915))
1340                 err = -EIO;
1341
1342         return err;
1343 }
1344
1345 static int live_hwsp_recycle(void *arg)
1346 {
1347         struct intel_gt *gt = arg;
1348         struct intel_engine_cs *engine;
1349         enum intel_engine_id id;
1350         unsigned long count;
1351         int err = 0;
1352
1353         /*
1354          * Check seqno writes into one timeline at a time. We expect to
1355          * recycle the breadcrumb slot between iterations and neither
1356          * want to confuse ourselves or the GPU.
1357          */
1358
1359         count = 0;
1360         for_each_engine(engine, gt, id) {
1361                 IGT_TIMEOUT(end_time);
1362
1363                 if (!intel_engine_can_store_dword(engine))
1364                         continue;
1365
1366                 intel_engine_pm_get(engine);
1367
1368                 do {
1369                         struct intel_timeline *tl;
1370                         struct i915_request *rq;
1371
1372                         tl = intel_timeline_create(gt);
1373                         if (IS_ERR(tl)) {
1374                                 err = PTR_ERR(tl);
1375                                 break;
1376                         }
1377
1378                         rq = checked_tl_write(tl, engine, count);
1379                         if (IS_ERR(rq)) {
1380                                 intel_timeline_put(tl);
1381                                 err = PTR_ERR(rq);
1382                                 break;
1383                         }
1384
1385                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1386                                 pr_err("Wait for timeline writes timed out!\n");
1387                                 i915_request_put(rq);
1388                                 intel_timeline_put(tl);
1389                                 err = -EIO;
1390                                 break;
1391                         }
1392
1393                         if (READ_ONCE(*tl->hwsp_seqno) != count) {
1394                                 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
1395                                               count, tl->fence_context,
1396                                               tl->hwsp_offset, *tl->hwsp_seqno);
1397                                 GEM_TRACE_DUMP();
1398                                 err = -EINVAL;
1399                         }
1400
1401                         i915_request_put(rq);
1402                         intel_timeline_put(tl);
1403                         count++;
1404
1405                         if (err)
1406                                 break;
1407                 } while (!__igt_timeout(end_time, NULL));
1408
1409                 intel_engine_pm_put(engine);
1410                 if (err)
1411                         break;
1412         }
1413
1414         return err;
1415 }
1416
1417 int intel_timeline_live_selftests(struct drm_i915_private *i915)
1418 {
1419         static const struct i915_subtest tests[] = {
1420                 SUBTEST(live_hwsp_recycle),
1421                 SUBTEST(live_hwsp_engine),
1422                 SUBTEST(live_hwsp_alternate),
1423                 SUBTEST(live_hwsp_wrap),
1424                 SUBTEST(live_hwsp_read),
1425                 SUBTEST(live_hwsp_rollover_kernel),
1426                 SUBTEST(live_hwsp_rollover_user),
1427         };
1428
1429         if (intel_gt_is_wedged(to_gt(i915)))
1430                 return 0;
1431
1432         return intel_gt_live_subtests(tests, to_gt(i915));
1433 }
This page took 0.163033 seconds and 4 git commands to generate.