1 // SPDX-License-Identifier: MIT
3 * Copyright © 2017-2018 Intel Corporation
6 #include <linux/prime_numbers.h>
8 #include "intel_context.h"
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
13 #include "intel_gt_requests.h"
14 #include "intel_ring.h"
15 #include "selftest_engine_heartbeat.h"
17 #include "../selftests/i915_random.h"
18 #include "../i915_selftest.h"
20 #include "selftests/igt_flush_test.h"
21 #include "selftests/lib_sw_fence.h"
22 #include "selftests/mock_gem_device.h"
23 #include "selftests/mock_timeline.h"
25 static struct page *hwsp_page(struct intel_timeline *tl)
27 struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
29 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
30 return sg_page(obj->mm.pages->sgl);
33 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
35 unsigned long address = (unsigned long)page_address(hwsp_page(tl));
37 return (address + offset_in_page(tl->hwsp_offset)) / TIMELINE_SEQNO_BYTES;
40 static int selftest_tl_pin(struct intel_timeline *tl)
42 struct i915_gem_ww_ctx ww;
45 i915_gem_ww_ctx_init(&ww, false);
47 err = i915_gem_object_lock(tl->hwsp_ggtt->obj, &ww);
49 err = intel_timeline_pin(tl, &ww);
51 if (err == -EDEADLK) {
52 err = i915_gem_ww_ctx_backoff(&ww);
56 i915_gem_ww_ctx_fini(&ww);
60 /* Only half of seqno's are usable, see __intel_timeline_get_seqno() */
61 #define CACHELINES_PER_PAGE (PAGE_SIZE / TIMELINE_SEQNO_BYTES / 2)
63 struct mock_hwsp_freelist {
65 struct radix_tree_root cachelines;
66 struct intel_timeline **history;
67 unsigned long count, max;
68 struct rnd_state prng;
75 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
77 struct intel_timeline *tl)
79 tl = xchg(&state->history[idx], tl);
81 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
82 intel_timeline_unpin(tl);
83 intel_timeline_put(tl);
87 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
91 struct intel_timeline *tl;
95 unsigned long cacheline;
98 tl = intel_timeline_create(state->gt);
102 err = selftest_tl_pin(tl);
104 intel_timeline_put(tl);
108 cacheline = hwsp_cacheline(tl);
109 err = radix_tree_insert(&state->cachelines, cacheline, tl);
111 if (err == -EEXIST) {
112 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
115 intel_timeline_unpin(tl);
116 intel_timeline_put(tl);
120 idx = state->count++ % state->max;
121 __mock_hwsp_record(state, idx, tl);
125 i915_prandom_shuffle(state->history,
126 sizeof(*state->history),
127 min(state->count, state->max),
130 count = i915_prandom_u32_max_state(min(state->count, state->max),
133 idx = --state->count % state->max;
134 __mock_hwsp_record(state, idx, NULL);
140 static int mock_hwsp_freelist(void *arg)
142 struct mock_hwsp_freelist state;
143 struct drm_i915_private *i915;
149 { "shuffled", SHUFFLE },
155 i915 = mock_gem_device();
159 INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
160 state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
162 state.gt = &i915->gt;
165 * Create a bunch of timelines and check that their HWSP do not overlap.
166 * Free some, and try again.
169 state.max = PAGE_SIZE / sizeof(*state.history);
171 state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
172 if (!state.history) {
177 for (p = phases; p->name; p++) {
178 pr_debug("%s(%s)\n", __func__, p->name);
179 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
180 err = __mock_hwsp_timeline(&state, na, p->flags);
187 for (na = 0; na < state.max; na++)
188 __mock_hwsp_record(&state, na, NULL);
189 kfree(state.history);
191 mock_destroy_device(i915);
202 static int __igt_sync(struct intel_timeline *tl,
204 const struct __igt_sync *p,
209 if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
210 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
211 name, p->name, ctx, p->seqno, yesno(p->expected));
216 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
224 static int igt_sync(void *arg)
226 const struct __igt_sync pass[] = {
227 { "unset", 0, false, false },
228 { "new", 0, false, true },
229 { "0a", 0, true, true },
230 { "1a", 1, false, true },
231 { "1b", 1, true, true },
232 { "0b", 0, true, false },
233 { "2a", 2, false, true },
234 { "4", 4, false, true },
235 { "INT_MAX", INT_MAX, false, true },
236 { "INT_MAX-1", INT_MAX-1, true, false },
237 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
238 { "INT_MAX", INT_MAX, true, false },
239 { "UINT_MAX", UINT_MAX, false, true },
240 { "wrap", 0, false, true },
241 { "unwrap", UINT_MAX, true, false },
244 struct intel_timeline tl;
248 mock_timeline_init(&tl, 0);
249 for (p = pass; p->name; p++) {
250 for (order = 1; order < 64; order++) {
251 for (offset = -1; offset <= (order > 1); offset++) {
252 u64 ctx = BIT_ULL(order) + offset;
254 ret = __igt_sync(&tl, ctx, p, "1");
260 mock_timeline_fini(&tl);
262 mock_timeline_init(&tl, 0);
263 for (order = 1; order < 64; order++) {
264 for (offset = -1; offset <= (order > 1); offset++) {
265 u64 ctx = BIT_ULL(order) + offset;
267 for (p = pass; p->name; p++) {
268 ret = __igt_sync(&tl, ctx, p, "2");
276 mock_timeline_fini(&tl);
280 static unsigned int random_engine(struct rnd_state *rnd)
282 return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
285 static int bench_sync(void *arg)
287 struct rnd_state prng;
288 struct intel_timeline tl;
289 unsigned long end_time, count;
292 int order, last_order;
294 mock_timeline_init(&tl, 0);
296 /* Lookups from cache are very fast and so the random number generation
297 * and the loop itself becomes a significant factor in the per-iteration
298 * timings. We try to compensate the results by measuring the overhead
299 * of the prng and subtract it from the reported results.
301 prandom_seed_state(&prng, i915_selftest.random_seed);
304 end_time = jiffies + HZ/10;
308 /* Make sure the compiler doesn't optimise away the prng call */
309 WRITE_ONCE(x, prandom_u32_state(&prng));
312 } while (!time_after(jiffies, end_time));
313 kt = ktime_sub(ktime_get(), kt);
314 pr_debug("%s: %lu random evaluations, %lluns/prng\n",
315 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
316 prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
318 /* Benchmark (only) setting random context ids */
319 prandom_seed_state(&prng, i915_selftest.random_seed);
322 end_time = jiffies + HZ/10;
324 u64 id = i915_prandom_u64_state(&prng);
326 __intel_timeline_sync_set(&tl, id, 0);
328 } while (!time_after(jiffies, end_time));
329 kt = ktime_sub(ktime_get(), kt);
330 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
331 pr_info("%s: %lu random insertions, %lluns/insert\n",
332 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
334 /* Benchmark looking up the exact same context ids as we just set */
335 prandom_seed_state(&prng, i915_selftest.random_seed);
339 u64 id = i915_prandom_u64_state(&prng);
341 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
342 mock_timeline_fini(&tl);
343 pr_err("Lookup of %llu failed\n", id);
347 kt = ktime_sub(ktime_get(), kt);
348 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
349 pr_info("%s: %lu random lookups, %lluns/lookup\n",
350 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
352 mock_timeline_fini(&tl);
355 mock_timeline_init(&tl, 0);
357 /* Benchmark setting the first N (in order) contexts */
360 end_time = jiffies + HZ/10;
362 __intel_timeline_sync_set(&tl, count++, 0);
363 } while (!time_after(jiffies, end_time));
364 kt = ktime_sub(ktime_get(), kt);
365 pr_info("%s: %lu in-order insertions, %lluns/insert\n",
366 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
368 /* Benchmark looking up the exact same context ids as we just set */
372 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
373 pr_err("Lookup of %lu failed\n", end_time);
374 mock_timeline_fini(&tl);
378 kt = ktime_sub(ktime_get(), kt);
379 pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
380 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
382 mock_timeline_fini(&tl);
385 mock_timeline_init(&tl, 0);
387 /* Benchmark searching for a random context id and maybe changing it */
388 prandom_seed_state(&prng, i915_selftest.random_seed);
391 end_time = jiffies + HZ/10;
393 u32 id = random_engine(&prng);
394 u32 seqno = prandom_u32_state(&prng);
396 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
397 __intel_timeline_sync_set(&tl, id, seqno);
400 } while (!time_after(jiffies, end_time));
401 kt = ktime_sub(ktime_get(), kt);
402 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
403 pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
404 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
405 mock_timeline_fini(&tl);
408 /* Benchmark searching for a known context id and changing the seqno */
409 for (last_order = 1, order = 1; order < 32;
410 ({ int tmp = last_order; last_order = order; order += tmp; })) {
411 unsigned int mask = BIT(order) - 1;
413 mock_timeline_init(&tl, 0);
417 end_time = jiffies + HZ/10;
419 /* Without assuming too many details of the underlying
420 * implementation, try to identify its phase-changes
423 u64 id = (u64)(count & mask) << order;
425 __intel_timeline_sync_is_later(&tl, id, 0);
426 __intel_timeline_sync_set(&tl, id, 0);
429 } while (!time_after(jiffies, end_time));
430 kt = ktime_sub(ktime_get(), kt);
431 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
432 __func__, count, order,
433 (long long)div64_ul(ktime_to_ns(kt), count));
434 mock_timeline_fini(&tl);
441 int intel_timeline_mock_selftests(void)
443 static const struct i915_subtest tests[] = {
444 SUBTEST(mock_hwsp_freelist),
449 return i915_subtests(tests, NULL);
452 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
456 cs = intel_ring_begin(rq, 4);
460 if (GRAPHICS_VER(rq->engine->i915) >= 8) {
461 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
465 } else if (GRAPHICS_VER(rq->engine->i915) >= 4) {
466 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
471 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
477 intel_ring_advance(rq, cs);
482 static struct i915_request *
483 checked_tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
485 struct i915_request *rq;
488 err = selftest_tl_pin(tl);
494 if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) {
495 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
496 *tl->hwsp_seqno, tl->seqno);
497 intel_timeline_unpin(tl);
498 return ERR_PTR(-EINVAL);
501 rq = intel_engine_create_kernel_request(engine);
505 i915_request_get(rq);
507 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
508 i915_request_add(rq);
510 i915_request_put(rq);
515 intel_timeline_unpin(tl);
518 pr_err("Failed to write to timeline!\n");
522 static int live_hwsp_engine(void *arg)
524 #define NUM_TIMELINES 4096
525 struct intel_gt *gt = arg;
526 struct intel_timeline **timelines;
527 struct intel_engine_cs *engine;
528 enum intel_engine_id id;
529 unsigned long count, n;
533 * Create a bunch of timelines and check we can write
534 * independently to each of their breadcrumb slots.
537 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
544 for_each_engine(engine, gt, id) {
545 if (!intel_engine_can_store_dword(engine))
548 intel_engine_pm_get(engine);
550 for (n = 0; n < NUM_TIMELINES; n++) {
551 struct intel_timeline *tl;
552 struct i915_request *rq;
554 tl = intel_timeline_create(gt);
560 rq = checked_tl_write(tl, engine, count);
562 intel_timeline_put(tl);
567 timelines[count++] = tl;
568 i915_request_put(rq);
571 intel_engine_pm_put(engine);
576 if (igt_flush_test(gt->i915))
579 for (n = 0; n < count; n++) {
580 struct intel_timeline *tl = timelines[n];
582 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
583 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
584 n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
588 intel_timeline_put(tl);
596 static int live_hwsp_alternate(void *arg)
598 #define NUM_TIMELINES 4096
599 struct intel_gt *gt = arg;
600 struct intel_timeline **timelines;
601 struct intel_engine_cs *engine;
602 enum intel_engine_id id;
603 unsigned long count, n;
607 * Create a bunch of timelines and check we can write
608 * independently to each of their breadcrumb slots with adjacent
612 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
619 for (n = 0; n < NUM_TIMELINES; n++) {
620 for_each_engine(engine, gt, id) {
621 struct intel_timeline *tl;
622 struct i915_request *rq;
624 if (!intel_engine_can_store_dword(engine))
627 tl = intel_timeline_create(gt);
633 intel_engine_pm_get(engine);
634 rq = checked_tl_write(tl, engine, count);
635 intel_engine_pm_put(engine);
637 intel_timeline_put(tl);
642 timelines[count++] = tl;
643 i915_request_put(rq);
648 if (igt_flush_test(gt->i915))
651 for (n = 0; n < count; n++) {
652 struct intel_timeline *tl = timelines[n];
654 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
655 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
656 n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
660 intel_timeline_put(tl);
668 static int live_hwsp_wrap(void *arg)
670 struct intel_gt *gt = arg;
671 struct intel_engine_cs *engine;
672 struct intel_timeline *tl;
673 enum intel_engine_id id;
677 * Across a seqno wrap, we need to keep the old cacheline alive for
678 * foreign GPU references.
681 tl = intel_timeline_create(gt);
685 if (!tl->has_initial_breadcrumb)
688 err = selftest_tl_pin(tl);
692 for_each_engine(engine, gt, id) {
693 const u32 *hwsp_seqno[2];
694 struct i915_request *rq;
697 if (!intel_engine_can_store_dword(engine))
700 rq = intel_engine_create_kernel_request(engine);
708 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
709 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
710 mutex_unlock(&tl->mutex);
712 i915_request_add(rq);
715 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
716 seqno[0], tl->hwsp_offset);
718 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
720 i915_request_add(rq);
723 hwsp_seqno[0] = tl->hwsp_seqno;
725 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
726 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
727 mutex_unlock(&tl->mutex);
729 i915_request_add(rq);
732 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
733 seqno[1], tl->hwsp_offset);
735 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
737 i915_request_add(rq);
740 hwsp_seqno[1] = tl->hwsp_seqno;
742 /* With wrap should come a new hwsp */
743 GEM_BUG_ON(seqno[1] >= seqno[0]);
744 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
746 i915_request_add(rq);
748 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
749 pr_err("Wait for timeline writes timed out!\n");
754 if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] ||
755 READ_ONCE(*hwsp_seqno[1]) != seqno[1]) {
756 pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
757 *hwsp_seqno[0], *hwsp_seqno[1],
763 intel_gt_retire_requests(gt); /* recycle HWSP */
767 if (igt_flush_test(gt->i915))
770 intel_timeline_unpin(tl);
772 intel_timeline_put(tl);
776 static int emit_read_hwsp(struct i915_request *rq,
780 const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0));
783 cs = intel_ring_begin(rq, 12);
787 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
793 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT;
798 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
804 intel_ring_advance(rq, cs);
809 struct hwsp_watcher {
810 struct i915_vma *vma;
811 struct i915_request *rq;
816 static bool cmp_lt(u32 a, u32 b)
821 static bool cmp_gte(u32 a, u32 b)
826 static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
828 struct drm_i915_gem_object *obj;
829 struct i915_vma *vma;
831 obj = i915_gem_object_create_internal(gt->i915, SZ_2M);
835 w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
836 if (IS_ERR(w->map)) {
837 i915_gem_object_put(obj);
838 return PTR_ERR(w->map);
841 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
843 i915_gem_object_put(obj);
848 w->addr = i915_ggtt_offset(vma);
852 static void switch_tl_lock(struct i915_request *from, struct i915_request *to)
854 /* some light mutex juggling required; think co-routines */
857 lockdep_unpin_lock(&from->context->timeline->mutex, from->cookie);
858 mutex_unlock(&from->context->timeline->mutex);
862 mutex_lock(&to->context->timeline->mutex);
863 to->cookie = lockdep_pin_lock(&to->context->timeline->mutex);
867 static int create_watcher(struct hwsp_watcher *w,
868 struct intel_engine_cs *engine,
871 struct intel_context *ce;
873 ce = intel_context_create(engine);
877 ce->ring = __intel_context_ring_size(ringsz);
878 w->rq = intel_context_create_request(ce);
879 intel_context_put(ce);
881 return PTR_ERR(w->rq);
883 w->addr = i915_ggtt_offset(w->vma);
885 switch_tl_lock(w->rq, NULL);
890 static int check_watcher(struct hwsp_watcher *w, const char *name,
891 bool (*op)(u32 hwsp, u32 seqno))
893 struct i915_request *rq = fetch_and_zero(&w->rq);
897 GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size);
899 i915_request_get(rq);
900 switch_tl_lock(NULL, rq);
901 i915_request_add(rq);
903 if (i915_request_wait(rq, 0, HZ) < 0) {
910 end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map);
911 while (offset < end) {
912 if (!op(w->map[offset + 1], w->map[offset])) {
913 pr_err("Watcher '%s' found HWSP value %x for seqno %x\n",
914 name, w->map[offset + 1], w->map[offset]);
922 i915_request_put(rq);
926 static void cleanup_watcher(struct hwsp_watcher *w)
929 switch_tl_lock(NULL, w->rq);
931 i915_request_add(w->rq);
934 i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP);
937 static bool retire_requests(struct intel_timeline *tl)
939 struct i915_request *rq, *rn;
941 mutex_lock(&tl->mutex);
942 list_for_each_entry_safe(rq, rn, &tl->requests, link)
943 if (!i915_request_retire(rq))
945 mutex_unlock(&tl->mutex);
947 return !i915_active_fence_isset(&tl->last_request);
950 static struct i915_request *wrap_timeline(struct i915_request *rq)
952 struct intel_context *ce = rq->context;
953 struct intel_timeline *tl = ce->timeline;
954 u32 seqno = rq->fence.seqno;
956 while (tl->seqno >= seqno) { /* Cause a wrap */
957 i915_request_put(rq);
958 rq = intel_context_create_request(ce);
962 i915_request_get(rq);
963 i915_request_add(rq);
966 i915_request_put(rq);
967 rq = i915_request_create(ce);
971 i915_request_get(rq);
972 i915_request_add(rq);
977 static int live_hwsp_read(void *arg)
979 struct intel_gt *gt = arg;
980 struct hwsp_watcher watcher[2] = {};
981 struct intel_engine_cs *engine;
982 struct intel_timeline *tl;
983 enum intel_engine_id id;
988 * If we take a reference to the HWSP for reading on the GPU, that
989 * read may be arbitrarily delayed (either by foreign fence or
990 * priority saturation) and a wrap can happen within 30 minutes.
991 * When the GPU read is finally submitted it should be correct,
992 * even across multiple wraps.
995 if (GRAPHICS_VER(gt->i915) < 8) /* CS convenience [SRM/LRM] */
998 tl = intel_timeline_create(gt);
1002 if (!tl->has_initial_breadcrumb)
1005 for (i = 0; i < ARRAY_SIZE(watcher); i++) {
1006 err = setup_watcher(&watcher[i], gt);
1011 for_each_engine(engine, gt, id) {
1012 struct intel_context *ce;
1013 unsigned long count = 0;
1014 IGT_TIMEOUT(end_time);
1016 /* Create a request we can use for remote reading of the HWSP */
1017 err = create_watcher(&watcher[1], engine, SZ_512K);
1022 struct i915_sw_fence *submit;
1023 struct i915_request *rq;
1026 submit = heap_fence_create(GFP_KERNEL);
1032 err = create_watcher(&watcher[0], engine, SZ_4K);
1036 ce = intel_context_create(engine);
1042 ce->timeline = intel_timeline_get(tl);
1044 /* Ensure timeline is mapped, done during first pin */
1045 err = intel_context_pin(ce);
1047 intel_context_put(ce);
1052 * Start at a new wrap, and set seqno right before another wrap,
1053 * saving 30 minutes of nops
1055 tl->seqno = -12u + 2 * (count & 3);
1056 __intel_timeline_get_seqno(tl, &dummy);
1058 rq = i915_request_create(ce);
1061 intel_context_unpin(ce);
1062 intel_context_put(ce);
1066 err = i915_sw_fence_await_dma_fence(&rq->submit,
1067 &watcher[0].rq->fence, 0,
1070 i915_request_add(rq);
1071 intel_context_unpin(ce);
1072 intel_context_put(ce);
1076 switch_tl_lock(rq, watcher[0].rq);
1077 err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp);
1079 err = emit_read_hwsp(watcher[0].rq, /* before */
1080 rq->fence.seqno, hwsp,
1082 switch_tl_lock(watcher[0].rq, rq);
1084 i915_request_add(rq);
1085 intel_context_unpin(ce);
1086 intel_context_put(ce);
1090 switch_tl_lock(rq, watcher[1].rq);
1091 err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp);
1093 err = emit_read_hwsp(watcher[1].rq, /* after */
1094 rq->fence.seqno, hwsp,
1096 switch_tl_lock(watcher[1].rq, rq);
1098 i915_request_add(rq);
1099 intel_context_unpin(ce);
1100 intel_context_put(ce);
1104 i915_request_get(rq);
1105 i915_request_add(rq);
1107 rq = wrap_timeline(rq);
1108 intel_context_unpin(ce);
1109 intel_context_put(ce);
1115 err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit,
1119 i915_request_put(rq);
1123 err = check_watcher(&watcher[0], "before", cmp_lt);
1124 i915_sw_fence_commit(submit);
1125 heap_fence_put(submit);
1127 i915_request_put(rq);
1132 /* Flush the timeline before manually wrapping again */
1133 if (i915_request_wait(rq,
1134 I915_WAIT_INTERRUPTIBLE,
1137 i915_request_put(rq);
1140 retire_requests(tl);
1141 i915_request_put(rq);
1143 /* Single requests are limited to half a ring at most */
1144 if (8 * watcher[1].rq->ring->emit >
1145 3 * watcher[1].rq->ring->size)
1148 } while (!__igt_timeout(end_time, NULL) &&
1149 count < (PAGE_SIZE / TIMELINE_SEQNO_BYTES - 1) / 2);
1151 pr_info("%s: simulated %lu wraps\n", engine->name, count);
1152 err = check_watcher(&watcher[1], "after", cmp_gte);
1158 for (i = 0; i < ARRAY_SIZE(watcher); i++)
1159 cleanup_watcher(&watcher[i]);
1161 if (igt_flush_test(gt->i915))
1165 intel_timeline_put(tl);
1169 static int live_hwsp_rollover_kernel(void *arg)
1171 struct intel_gt *gt = arg;
1172 struct intel_engine_cs *engine;
1173 enum intel_engine_id id;
1177 * Run the host for long enough, and even the kernel context will
1178 * see a seqno rollover.
1181 for_each_engine(engine, gt, id) {
1182 struct intel_context *ce = engine->kernel_context;
1183 struct intel_timeline *tl = ce->timeline;
1184 struct i915_request *rq[3] = {};
1187 st_engine_heartbeat_disable(engine);
1188 if (intel_gt_wait_for_idle(gt, HZ / 2)) {
1193 GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
1195 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
1197 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1198 struct i915_request *this;
1200 this = i915_request_create(ce);
1202 err = PTR_ERR(this);
1206 pr_debug("%s: create fence.seqnp:%d\n",
1208 lower_32_bits(this->fence.seqno));
1210 GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1212 rq[i] = i915_request_get(this);
1213 i915_request_add(this);
1216 /* We expected a wrap! */
1217 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
1219 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1220 pr_err("Wait for timeline wrap timed out!\n");
1225 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1226 if (!i915_request_completed(rq[i])) {
1227 pr_err("Pre-wrap request not completed!\n");
1234 for (i = 0; i < ARRAY_SIZE(rq); i++)
1235 i915_request_put(rq[i]);
1236 st_engine_heartbeat_enable(engine);
1241 if (igt_flush_test(gt->i915))
1247 static int live_hwsp_rollover_user(void *arg)
1249 struct intel_gt *gt = arg;
1250 struct intel_engine_cs *engine;
1251 enum intel_engine_id id;
1255 * Simulate a long running user context, and force the seqno wrap
1256 * on the user's timeline.
1259 for_each_engine(engine, gt, id) {
1260 struct i915_request *rq[3] = {};
1261 struct intel_timeline *tl;
1262 struct intel_context *ce;
1265 ce = intel_context_create(engine);
1269 err = intel_context_alloc_state(ce);
1274 if (!tl->has_initial_breadcrumb)
1277 err = intel_context_pin(ce);
1282 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
1284 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1285 struct i915_request *this;
1287 this = intel_context_create_request(ce);
1289 err = PTR_ERR(this);
1293 pr_debug("%s: create fence.seqnp:%d\n",
1295 lower_32_bits(this->fence.seqno));
1297 GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1299 rq[i] = i915_request_get(this);
1300 i915_request_add(this);
1303 /* We expected a wrap! */
1304 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
1306 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1307 pr_err("Wait for timeline wrap timed out!\n");
1312 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1313 if (!i915_request_completed(rq[i])) {
1314 pr_err("Pre-wrap request not completed!\n");
1320 intel_context_unpin(ce);
1322 for (i = 0; i < ARRAY_SIZE(rq); i++)
1323 i915_request_put(rq[i]);
1324 intel_context_put(ce);
1329 if (igt_flush_test(gt->i915))
1335 static int live_hwsp_recycle(void *arg)
1337 struct intel_gt *gt = arg;
1338 struct intel_engine_cs *engine;
1339 enum intel_engine_id id;
1340 unsigned long count;
1344 * Check seqno writes into one timeline at a time. We expect to
1345 * recycle the breadcrumb slot between iterations and neither
1346 * want to confuse ourselves or the GPU.
1350 for_each_engine(engine, gt, id) {
1351 IGT_TIMEOUT(end_time);
1353 if (!intel_engine_can_store_dword(engine))
1356 intel_engine_pm_get(engine);
1359 struct intel_timeline *tl;
1360 struct i915_request *rq;
1362 tl = intel_timeline_create(gt);
1368 rq = checked_tl_write(tl, engine, count);
1370 intel_timeline_put(tl);
1375 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1376 pr_err("Wait for timeline writes timed out!\n");
1377 i915_request_put(rq);
1378 intel_timeline_put(tl);
1383 if (READ_ONCE(*tl->hwsp_seqno) != count) {
1384 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
1385 count, tl->fence_context,
1386 tl->hwsp_offset, *tl->hwsp_seqno);
1391 i915_request_put(rq);
1392 intel_timeline_put(tl);
1397 } while (!__igt_timeout(end_time, NULL));
1399 intel_engine_pm_put(engine);
1407 int intel_timeline_live_selftests(struct drm_i915_private *i915)
1409 static const struct i915_subtest tests[] = {
1410 SUBTEST(live_hwsp_recycle),
1411 SUBTEST(live_hwsp_engine),
1412 SUBTEST(live_hwsp_alternate),
1413 SUBTEST(live_hwsp_wrap),
1414 SUBTEST(live_hwsp_read),
1415 SUBTEST(live_hwsp_rollover_kernel),
1416 SUBTEST(live_hwsp_rollover_user),
1419 if (intel_gt_is_wedged(&i915->gt))
1422 return intel_gt_live_subtests(tests, &i915->gt);