1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include <linux/sort.h>
8 #include "gem/i915_gem_internal.h"
9 #include "gem/i915_gem_lmem.h"
11 #include "selftests/igt_spinner.h"
12 #include "selftests/i915_random.h"
14 static const unsigned int sizes[] = {
24 static struct drm_i915_gem_object *
25 create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
27 struct drm_i915_gem_object *obj;
29 obj = i915_gem_object_create_lmem(i915, size, 0);
33 return i915_gem_object_create_internal(i915, size);
36 static int copy(struct intel_migrate *migrate,
37 int (*fn)(struct intel_migrate *migrate,
38 struct i915_gem_ww_ctx *ww,
39 struct drm_i915_gem_object *src,
40 struct drm_i915_gem_object *dst,
41 struct i915_request **out),
42 u32 sz, struct rnd_state *prng)
44 struct drm_i915_private *i915 = migrate->context->engine->i915;
45 struct drm_i915_gem_object *src, *dst;
46 struct i915_request *rq;
47 struct i915_gem_ww_ctx ww;
52 src = create_lmem_or_internal(i915, sz);
57 dst = i915_gem_object_create_internal(i915, sz);
61 for_i915_gem_ww(&ww, err, true) {
62 err = i915_gem_object_lock(src, &ww);
66 err = i915_gem_object_lock(dst, &ww);
70 vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
76 for (i = 0; i < sz / sizeof(u32); i++)
78 i915_gem_object_flush_map(src);
80 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
86 for (i = 0; i < sz / sizeof(u32); i++)
88 i915_gem_object_flush_map(dst);
90 err = fn(migrate, &ww, src, dst, &rq);
94 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
95 pr_err("%ps failed, size: %u\n", fn, sz);
97 i915_request_wait(rq, 0, HZ);
100 i915_gem_object_unpin_map(dst);
102 i915_gem_object_unpin_map(src);
108 if (i915_request_wait(rq, 0, HZ) < 0) {
109 pr_err("%ps timed out, size: %u\n", fn, sz);
112 i915_request_put(rq);
115 for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
116 int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
119 pr_err("%ps failed, size: %u, offset: %zu\n",
120 fn, sz, x * sizeof(u32));
121 igt_hexdump(vaddr + i * 1024, 4096);
126 i915_gem_object_unpin_map(dst);
127 i915_gem_object_unpin_map(src);
130 i915_gem_object_put(dst);
132 i915_gem_object_put(src);
137 static int intel_context_copy_ccs(struct intel_context *ce,
138 const struct i915_deps *deps,
139 struct scatterlist *sg,
140 unsigned int pat_index,
142 struct i915_request **out)
144 u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
145 u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
146 struct sgt_dma it = sg_sgt(sg);
147 struct i915_request *rq;
151 GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
154 GEM_BUG_ON(ce->ring->size < SZ_64K);
157 if (HAS_64K_PAGES(ce->engine->i915))
163 rq = i915_request_create(ce);
170 err = i915_request_await_deps(rq, deps);
174 if (rq->engine->emit_init_breadcrumb) {
175 err = rq->engine->emit_init_breadcrumb(rq);
183 /* The PTE updates + clear must not be interrupted. */
184 err = emit_no_arbitration(rq);
188 len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ);
194 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
198 err = emit_copy_ccs(rq, offset, dst_access,
199 offset, src_access, len);
203 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
205 /* Arbitration is re-enabled between requests. */
208 i915_request_put(*out);
209 *out = i915_request_get(rq);
210 i915_request_add(rq);
211 if (err || !it.sg || !sg_dma_len(it.sg))
222 intel_migrate_ccs_copy(struct intel_migrate *m,
223 struct i915_gem_ww_ctx *ww,
224 const struct i915_deps *deps,
225 struct scatterlist *sg,
226 unsigned int pat_index,
228 struct i915_request **out)
230 struct intel_context *ce;
237 ce = intel_migrate_create_context(m);
239 ce = intel_context_get(m->context);
240 GEM_BUG_ON(IS_ERR(ce));
242 err = intel_context_pin_ww(ce, ww);
246 err = intel_context_copy_ccs(ce, deps, sg, pat_index,
249 intel_context_unpin(ce);
251 intel_context_put(ce);
255 static int clear(struct intel_migrate *migrate,
256 int (*fn)(struct intel_migrate *migrate,
257 struct i915_gem_ww_ctx *ww,
258 struct drm_i915_gem_object *obj,
260 struct i915_request **out),
261 u32 sz, struct rnd_state *prng)
263 struct drm_i915_private *i915 = migrate->context->engine->i915;
264 struct drm_i915_gem_object *obj;
265 struct i915_request *rq;
266 struct i915_gem_ww_ctx ww;
268 bool ccs_cap = false;
272 obj = create_lmem_or_internal(i915, sz);
276 /* Consider the rounded up memory too */
279 if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
282 for_i915_gem_ww(&ww, err, true) {
283 int ccs_bytes, ccs_bytes_per_chunk;
285 err = i915_gem_object_lock(obj, &ww);
289 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
291 err = PTR_ERR(vaddr);
295 for (i = 0; i < sz / sizeof(u32); i++)
297 i915_gem_object_flush_map(obj);
299 if (ccs_cap && !val) {
300 /* Write the obj data into ccs surface */
301 err = intel_migrate_ccs_copy(migrate, &ww, NULL,
306 if (i915_request_wait(rq, 0, HZ) < 0) {
307 pr_err("%ps timed out, size: %u\n",
311 i915_request_put(rq);
318 err = fn(migrate, &ww, obj, val, &rq);
320 if (i915_request_wait(rq, 0, HZ) < 0) {
321 pr_err("%ps timed out, size: %u\n", fn, sz);
324 i915_request_put(rq);
330 i915_gem_object_flush_map(obj);
332 /* Verify the set/clear of the obj mem */
333 for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
335 i915_prandom_u32_max_state(1024, prng);
337 if (vaddr[x] != val) {
338 pr_err("%ps failed, (%u != %u), offset: %zu\n",
339 fn, vaddr[x], val, x * sizeof(u32));
340 igt_hexdump(vaddr + i * 1024, 4096);
347 if (ccs_cap && !val) {
348 for (i = 0; i < sz / sizeof(u32); i++)
350 i915_gem_object_flush_map(obj);
352 err = intel_migrate_ccs_copy(migrate, &ww, NULL,
357 if (i915_request_wait(rq, 0, HZ) < 0) {
358 pr_err("%ps timed out, size: %u\n",
362 i915_request_put(rq);
368 ccs_bytes = GET_CCS_BYTES(i915, sz);
369 ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ);
370 i915_gem_object_flush_map(obj);
372 for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) {
373 int offset = ((i * PAGE_SIZE) /
374 ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32);
375 int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32);
376 int x = i915_prandom_u32_max_state(min_t(int, 1024,
377 ccs_bytes_left), prng);
379 if (vaddr[offset + x]) {
380 pr_err("%ps ccs clearing failed, offset: %ld/%d\n",
381 fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes);
382 igt_hexdump(vaddr + offset,
384 ccs_bytes_left * sizeof(u32)));
392 i915_gem_object_unpin_map(obj);
396 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
397 pr_err("%ps failed, size: %u\n", fn, sz);
398 if (rq && err != -EINVAL) {
399 i915_request_wait(rq, 0, HZ);
400 i915_request_put(rq);
403 i915_gem_object_unpin_map(obj);
406 i915_gem_object_put(obj);
410 static int __migrate_copy(struct intel_migrate *migrate,
411 struct i915_gem_ww_ctx *ww,
412 struct drm_i915_gem_object *src,
413 struct drm_i915_gem_object *dst,
414 struct i915_request **out)
416 return intel_migrate_copy(migrate, ww, NULL,
417 src->mm.pages->sgl, src->pat_index,
418 i915_gem_object_is_lmem(src),
419 dst->mm.pages->sgl, dst->pat_index,
420 i915_gem_object_is_lmem(dst),
424 static int __global_copy(struct intel_migrate *migrate,
425 struct i915_gem_ww_ctx *ww,
426 struct drm_i915_gem_object *src,
427 struct drm_i915_gem_object *dst,
428 struct i915_request **out)
430 return intel_context_migrate_copy(migrate->context, NULL,
431 src->mm.pages->sgl, src->pat_index,
432 i915_gem_object_is_lmem(src),
433 dst->mm.pages->sgl, dst->pat_index,
434 i915_gem_object_is_lmem(dst),
439 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
441 return copy(migrate, __migrate_copy, sz, prng);
445 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
447 return copy(migrate, __global_copy, sz, prng);
450 static int __migrate_clear(struct intel_migrate *migrate,
451 struct i915_gem_ww_ctx *ww,
452 struct drm_i915_gem_object *obj,
454 struct i915_request **out)
456 return intel_migrate_clear(migrate, ww, NULL,
459 i915_gem_object_is_lmem(obj),
463 static int __global_clear(struct intel_migrate *migrate,
464 struct i915_gem_ww_ctx *ww,
465 struct drm_i915_gem_object *obj,
467 struct i915_request **out)
469 return intel_context_migrate_clear(migrate->context, NULL,
472 i915_gem_object_is_lmem(obj),
477 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
479 return clear(migrate, __migrate_clear, sz, prng);
483 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
485 return clear(migrate, __global_clear, sz, prng);
488 static int live_migrate_copy(void *arg)
490 struct intel_gt *gt = arg;
491 struct intel_migrate *migrate = >->migrate;
492 struct drm_i915_private *i915 = migrate->context->engine->i915;
493 I915_RND_STATE(prng);
496 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
499 err = migrate_copy(migrate, sizes[i], &prng);
501 err = global_copy(migrate, sizes[i], &prng);
502 i915_gem_drain_freed_objects(i915);
510 static int live_migrate_clear(void *arg)
512 struct intel_gt *gt = arg;
513 struct intel_migrate *migrate = >->migrate;
514 struct drm_i915_private *i915 = migrate->context->engine->i915;
515 I915_RND_STATE(prng);
518 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
521 err = migrate_clear(migrate, sizes[i], &prng);
523 err = global_clear(migrate, sizes[i], &prng);
525 i915_gem_drain_freed_objects(i915);
533 struct spinner_timer {
534 struct timer_list timer;
535 struct igt_spinner spin;
538 static void spinner_kill(struct timer_list *timer)
540 struct spinner_timer *st = from_timer(st, timer, timer);
542 igt_spinner_end(&st->spin);
543 pr_info("%s\n", __func__);
546 static int live_emit_pte_full_ring(void *arg)
548 struct intel_gt *gt = arg;
549 struct intel_migrate *migrate = >->migrate;
550 struct drm_i915_private *i915 = migrate->context->engine->i915;
551 struct drm_i915_gem_object *obj;
552 struct intel_context *ce;
553 struct i915_request *rq, *prev;
554 struct spinner_timer st;
560 * Simple regression test to check that we don't trample the
561 * rq->reserved_space when returning from emit_pte(), if the ring is
565 if (igt_spinner_init(&st.spin, to_gt(i915)))
568 obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE);
574 err = i915_gem_object_pin_pages_unlocked(obj);
578 ce = intel_migrate_create_context(migrate);
584 ce->ring_size = SZ_4K; /* Not too big */
586 err = intel_context_pin(ce);
590 rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK);
596 i915_request_add(rq);
597 if (!igt_wait_for_spinner(&st.spin, rq)) {
603 * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS +
604 * ring->reserved_space at the end. To actually emit the PTEs we require
605 * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is
606 * greater than PAGE_SIZE. The correct behaviour is to wait for more
607 * ring space in emit_pte(), otherwise we trample on the reserved_space
608 * resulting in crashes when later submitting the rq.
614 i915_request_add(rq);
616 rq = i915_request_create(ce);
622 sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) -
623 I915_EMIT_PTE_NUM_DWORDS;
624 sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) -
625 I915_EMIT_PTE_NUM_DWORDS);
626 cs = intel_ring_begin(rq, sz);
632 memset32(cs, MI_NOOP, sz);
634 intel_ring_advance(rq, cs);
636 pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz);
639 } while (rq->ring->space > (rq->reserved_space +
640 I915_EMIT_PTE_NUM_DWORDS * sizeof(u32)));
642 timer_setup_on_stack(&st.timer, spinner_kill, 0);
643 mod_timer(&st.timer, jiffies + 2 * HZ);
646 * This should wait for the spinner to be killed, otherwise we should go
647 * down in flames when doing i915_request_add().
649 pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space);
650 it = sg_sgt(obj->mm.pages->sgl);
651 len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ);
662 i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */
663 del_timer_sync(&st.timer);
664 destroy_timer_on_stack(&st.timer);
666 intel_context_unpin(ce);
668 intel_context_put(ce);
670 i915_gem_object_put(obj);
672 igt_spinner_fini(&st.spin);
676 struct threaded_migrate {
677 struct intel_migrate *migrate;
678 struct task_struct *tsk;
679 struct rnd_state prng;
682 static int threaded_migrate(struct intel_migrate *migrate,
683 int (*fn)(void *arg),
686 const unsigned int n_cpus = num_online_cpus() + 1;
687 struct threaded_migrate *thread;
688 I915_RND_STATE(prng);
692 thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL);
696 for (i = 0; i < n_cpus; ++i) {
697 struct task_struct *tsk;
699 thread[i].migrate = migrate;
701 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
703 tsk = kthread_run(fn, &thread[i], "igt-%d", i);
709 get_task_struct(tsk);
713 msleep(10 * n_cpus); /* start all threads before we kthread_stop() */
715 for (i = 0; i < n_cpus; ++i) {
716 struct task_struct *tsk = thread[i].tsk;
719 if (IS_ERR_OR_NULL(tsk))
722 status = kthread_stop_put(tsk);
731 static int __thread_migrate_copy(void *arg)
733 struct threaded_migrate *tm = arg;
735 return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
738 static int thread_migrate_copy(void *arg)
740 struct intel_gt *gt = arg;
741 struct intel_migrate *migrate = >->migrate;
743 return threaded_migrate(migrate, __thread_migrate_copy, 0);
746 static int __thread_global_copy(void *arg)
748 struct threaded_migrate *tm = arg;
750 return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
753 static int thread_global_copy(void *arg)
755 struct intel_gt *gt = arg;
756 struct intel_migrate *migrate = >->migrate;
758 return threaded_migrate(migrate, __thread_global_copy, 0);
761 static int __thread_migrate_clear(void *arg)
763 struct threaded_migrate *tm = arg;
765 return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
768 static int __thread_global_clear(void *arg)
770 struct threaded_migrate *tm = arg;
772 return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
775 static int thread_migrate_clear(void *arg)
777 struct intel_gt *gt = arg;
778 struct intel_migrate *migrate = >->migrate;
780 return threaded_migrate(migrate, __thread_migrate_clear, 0);
783 static int thread_global_clear(void *arg)
785 struct intel_gt *gt = arg;
786 struct intel_migrate *migrate = >->migrate;
788 return threaded_migrate(migrate, __thread_global_clear, 0);
791 int intel_migrate_live_selftests(struct drm_i915_private *i915)
793 static const struct i915_subtest tests[] = {
794 SUBTEST(live_migrate_copy),
795 SUBTEST(live_migrate_clear),
796 SUBTEST(live_emit_pte_full_ring),
797 SUBTEST(thread_migrate_copy),
798 SUBTEST(thread_migrate_clear),
799 SUBTEST(thread_global_copy),
800 SUBTEST(thread_global_clear),
802 struct intel_gt *gt = to_gt(i915);
804 if (!gt->migrate.context)
807 return intel_gt_live_subtests(tests, gt);
810 static struct drm_i915_gem_object *
811 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
813 struct drm_i915_gem_object *obj = NULL;
817 obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
819 if (IS_ERR_OR_NULL(obj)) {
820 obj = i915_gem_object_create_internal(gt->i915, sz);
825 i915_gem_object_trylock(obj, NULL);
826 err = i915_gem_object_pin_pages(obj);
828 i915_gem_object_unlock(obj);
829 i915_gem_object_put(obj);
836 static int wrap_ktime_compare(const void *A, const void *B)
838 const ktime_t *a = A, *b = B;
840 return ktime_compare(*a, *b);
843 static int __perf_clear_blt(struct intel_context *ce,
844 struct scatterlist *sg,
845 unsigned int pat_index,
853 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
854 struct i915_request *rq;
859 err = intel_context_migrate_clear(ce, NULL, sg, pat_index,
862 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
864 i915_request_put(rq);
870 t[pass] = ktime_sub(t1, t0);
875 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
876 pr_info("%s: %zd KiB fill: %lld MiB/s\n",
877 ce->engine->name, sz >> 10,
878 div64_u64(mul_u32_u32(4 * sz,
880 t[1] + 2 * t[2] + t[3]) >> 20);
884 static int perf_clear_blt(void *arg)
886 struct intel_gt *gt = arg;
887 static const unsigned long sizes[] = {
895 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
896 struct drm_i915_gem_object *dst;
899 dst = create_init_lmem_internal(gt, sizes[i], true);
903 err = __perf_clear_blt(gt->migrate.context,
905 i915_gem_get_pat_index(gt->i915,
907 i915_gem_object_is_lmem(dst),
910 i915_gem_object_unlock(dst);
911 i915_gem_object_put(dst);
919 static int __perf_copy_blt(struct intel_context *ce,
920 struct scatterlist *src,
921 unsigned int src_pat_index,
923 struct scatterlist *dst,
924 unsigned int dst_pat_index,
932 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
933 struct i915_request *rq;
938 err = intel_context_migrate_copy(ce, NULL,
945 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
947 i915_request_put(rq);
953 t[pass] = ktime_sub(t1, t0);
958 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
959 pr_info("%s: %zd KiB copy: %lld MiB/s\n",
960 ce->engine->name, sz >> 10,
961 div64_u64(mul_u32_u32(4 * sz,
963 t[1] + 2 * t[2] + t[3]) >> 20);
967 static int perf_copy_blt(void *arg)
969 struct intel_gt *gt = arg;
970 static const unsigned long sizes[] = {
978 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
979 struct drm_i915_gem_object *src, *dst;
983 src = create_init_lmem_internal(gt, sizes[i], true);
988 dst = create_init_lmem_internal(gt, sz, false);
994 err = __perf_copy_blt(gt->migrate.context,
996 i915_gem_get_pat_index(gt->i915,
998 i915_gem_object_is_lmem(src),
1000 i915_gem_get_pat_index(gt->i915,
1002 i915_gem_object_is_lmem(dst),
1005 i915_gem_object_unlock(dst);
1006 i915_gem_object_put(dst);
1008 i915_gem_object_unlock(src);
1009 i915_gem_object_put(src);
1017 int intel_migrate_perf_selftests(struct drm_i915_private *i915)
1019 static const struct i915_subtest tests[] = {
1020 SUBTEST(perf_clear_blt),
1021 SUBTEST(perf_copy_blt),
1023 struct intel_gt *gt = to_gt(i915);
1025 if (intel_gt_is_wedged(gt))
1028 if (!gt->migrate.context)
1031 return intel_gt_live_subtests(tests, gt);