]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/selftests/intel_memory_region.c
Linux 6.14-rc3
[linux.git] / drivers / gpu / drm / i915 / selftests / intel_memory_region.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5
6 #include <linux/prime_numbers.h>
7 #include <linux/sort.h>
8
9 #include <drm/drm_buddy.h>
10
11 #include "../i915_selftest.h"
12
13 #include "mock_drm.h"
14 #include "mock_gem_device.h"
15 #include "mock_region.h"
16
17 #include "gem/i915_gem_context.h"
18 #include "gem/i915_gem_lmem.h"
19 #include "gem/i915_gem_region.h"
20 #include "gem/i915_gem_ttm.h"
21 #include "gem/selftests/igt_gem_utils.h"
22 #include "gem/selftests/mock_context.h"
23 #include "gt/intel_engine_pm.h"
24 #include "gt/intel_engine_user.h"
25 #include "gt/intel_gt.h"
26 #include "gt/intel_migrate.h"
27 #include "i915_memcpy.h"
28 #include "i915_ttm_buddy_manager.h"
29 #include "selftests/igt_flush_test.h"
30 #include "selftests/i915_random.h"
31
32 static void close_objects(struct intel_memory_region *mem,
33                           struct list_head *objects)
34 {
35         struct drm_i915_private *i915 = mem->i915;
36         struct drm_i915_gem_object *obj, *on;
37
38         list_for_each_entry_safe(obj, on, objects, st_link) {
39                 i915_gem_object_lock(obj, NULL);
40                 if (i915_gem_object_has_pinned_pages(obj))
41                         i915_gem_object_unpin_pages(obj);
42                 /* No polluting the memory region between tests */
43                 __i915_gem_object_put_pages(obj);
44                 i915_gem_object_unlock(obj);
45                 list_del(&obj->st_link);
46                 i915_gem_object_put(obj);
47         }
48
49         cond_resched();
50
51         i915_gem_drain_freed_objects(i915);
52 }
53
54 static int igt_mock_fill(void *arg)
55 {
56         struct intel_memory_region *mem = arg;
57         resource_size_t total = resource_size(&mem->region);
58         resource_size_t page_size;
59         resource_size_t rem;
60         unsigned long max_pages;
61         unsigned long page_num;
62         LIST_HEAD(objects);
63         int err = 0;
64
65         page_size = PAGE_SIZE;
66         max_pages = div64_u64(total, page_size);
67         rem = total;
68
69         for_each_prime_number_from(page_num, 1, max_pages) {
70                 resource_size_t size = page_num * page_size;
71                 struct drm_i915_gem_object *obj;
72
73                 obj = i915_gem_object_create_region(mem, size, 0, 0);
74                 if (IS_ERR(obj)) {
75                         err = PTR_ERR(obj);
76                         break;
77                 }
78
79                 err = i915_gem_object_pin_pages_unlocked(obj);
80                 if (err) {
81                         i915_gem_object_put(obj);
82                         break;
83                 }
84
85                 list_add(&obj->st_link, &objects);
86                 rem -= size;
87         }
88
89         if (err == -ENOMEM)
90                 err = 0;
91         if (err == -ENXIO) {
92                 if (page_num * page_size <= rem) {
93                         pr_err("%s failed, space still left in region\n",
94                                __func__);
95                         err = -EINVAL;
96                 } else {
97                         err = 0;
98                 }
99         }
100
101         close_objects(mem, &objects);
102
103         return err;
104 }
105
106 static struct drm_i915_gem_object *
107 igt_object_create(struct intel_memory_region *mem,
108                   struct list_head *objects,
109                   u64 size,
110                   unsigned int flags)
111 {
112         struct drm_i915_gem_object *obj;
113         int err;
114
115         obj = i915_gem_object_create_region(mem, size, 0, flags);
116         if (IS_ERR(obj))
117                 return obj;
118
119         err = i915_gem_object_pin_pages_unlocked(obj);
120         if (err)
121                 goto put;
122
123         list_add(&obj->st_link, objects);
124         return obj;
125
126 put:
127         i915_gem_object_put(obj);
128         return ERR_PTR(err);
129 }
130
131 static void igt_object_release(struct drm_i915_gem_object *obj)
132 {
133         i915_gem_object_lock(obj, NULL);
134         i915_gem_object_unpin_pages(obj);
135         __i915_gem_object_put_pages(obj);
136         i915_gem_object_unlock(obj);
137         list_del(&obj->st_link);
138         i915_gem_object_put(obj);
139 }
140
141 static bool is_contiguous(struct drm_i915_gem_object *obj)
142 {
143         struct scatterlist *sg;
144         dma_addr_t addr = -1;
145
146         for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
147                 if (addr != -1 && sg_dma_address(sg) != addr)
148                         return false;
149
150                 addr = sg_dma_address(sg) + sg_dma_len(sg);
151         }
152
153         return true;
154 }
155
156 static int igt_mock_reserve(void *arg)
157 {
158         struct intel_memory_region *mem = arg;
159         struct drm_i915_private *i915 = mem->i915;
160         resource_size_t avail = resource_size(&mem->region);
161         struct drm_i915_gem_object *obj;
162         const u32 chunk_size = SZ_32M;
163         u32 i, offset, count, *order;
164         u64 allocated, cur_avail;
165         I915_RND_STATE(prng);
166         LIST_HEAD(objects);
167         int err = 0;
168
169         count = avail / chunk_size;
170         order = i915_random_order(count, &prng);
171         if (!order)
172                 return 0;
173
174         mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
175         if (IS_ERR(mem)) {
176                 pr_err("failed to create memory region\n");
177                 err = PTR_ERR(mem);
178                 goto out_free_order;
179         }
180
181         /* Reserve a bunch of ranges within the region */
182         for (i = 0; i < count; ++i) {
183                 u64 start = order[i] * chunk_size;
184                 u64 size = i915_prandom_u32_max_state(chunk_size, &prng);
185
186                 /* Allow for some really big holes */
187                 if (!size)
188                         continue;
189
190                 size = round_up(size, PAGE_SIZE);
191                 offset = igt_random_offset(&prng, 0, chunk_size, size,
192                                            PAGE_SIZE);
193
194                 err = intel_memory_region_reserve(mem, start + offset, size);
195                 if (err) {
196                         pr_err("%s failed to reserve range", __func__);
197                         goto out_close;
198                 }
199
200                 /* XXX: maybe sanity check the block range here? */
201                 avail -= size;
202         }
203
204         /* Try to see if we can allocate from the remaining space */
205         allocated = 0;
206         cur_avail = avail;
207         do {
208                 u32 size = i915_prandom_u32_max_state(cur_avail, &prng);
209
210                 size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE);
211                 obj = igt_object_create(mem, &objects, size, 0);
212                 if (IS_ERR(obj)) {
213                         if (PTR_ERR(obj) == -ENXIO)
214                                 break;
215
216                         err = PTR_ERR(obj);
217                         goto out_close;
218                 }
219                 cur_avail -= size;
220                 allocated += size;
221         } while (1);
222
223         if (allocated != avail) {
224                 pr_err("%s mismatch between allocation and free space", __func__);
225                 err = -EINVAL;
226         }
227
228 out_close:
229         close_objects(mem, &objects);
230         intel_memory_region_destroy(mem);
231 out_free_order:
232         kfree(order);
233         return err;
234 }
235
236 static int igt_mock_contiguous(void *arg)
237 {
238         struct intel_memory_region *mem = arg;
239         struct drm_i915_gem_object *obj;
240         unsigned long n_objects;
241         LIST_HEAD(objects);
242         LIST_HEAD(holes);
243         I915_RND_STATE(prng);
244         resource_size_t total;
245         resource_size_t min;
246         u64 target;
247         int err = 0;
248
249         total = resource_size(&mem->region);
250
251         /* Min size */
252         obj = igt_object_create(mem, &objects, PAGE_SIZE,
253                                 I915_BO_ALLOC_CONTIGUOUS);
254         if (IS_ERR(obj))
255                 return PTR_ERR(obj);
256
257         if (!is_contiguous(obj)) {
258                 pr_err("%s min object spans disjoint sg entries\n", __func__);
259                 err = -EINVAL;
260                 goto err_close_objects;
261         }
262
263         igt_object_release(obj);
264
265         /* Max size */
266         obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS);
267         if (IS_ERR(obj))
268                 return PTR_ERR(obj);
269
270         if (!is_contiguous(obj)) {
271                 pr_err("%s max object spans disjoint sg entries\n", __func__);
272                 err = -EINVAL;
273                 goto err_close_objects;
274         }
275
276         igt_object_release(obj);
277
278         /* Internal fragmentation should not bleed into the object size */
279         target = i915_prandom_u64_state(&prng);
280         div64_u64_rem(target, total, &target);
281         target = round_up(target, PAGE_SIZE);
282         target = max_t(u64, PAGE_SIZE, target);
283
284         obj = igt_object_create(mem, &objects, target,
285                                 I915_BO_ALLOC_CONTIGUOUS);
286         if (IS_ERR(obj))
287                 return PTR_ERR(obj);
288
289         if (obj->base.size != target) {
290                 pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__,
291                        obj->base.size, target);
292                 err = -EINVAL;
293                 goto err_close_objects;
294         }
295
296         if (!is_contiguous(obj)) {
297                 pr_err("%s object spans disjoint sg entries\n", __func__);
298                 err = -EINVAL;
299                 goto err_close_objects;
300         }
301
302         igt_object_release(obj);
303
304         /*
305          * Try to fragment the address space, such that half of it is free, but
306          * the max contiguous block size is SZ_64K.
307          */
308
309         target = SZ_64K;
310         n_objects = div64_u64(total, target);
311
312         while (n_objects--) {
313                 struct list_head *list;
314
315                 if (n_objects % 2)
316                         list = &holes;
317                 else
318                         list = &objects;
319
320                 obj = igt_object_create(mem, list, target,
321                                         I915_BO_ALLOC_CONTIGUOUS);
322                 if (IS_ERR(obj)) {
323                         err = PTR_ERR(obj);
324                         goto err_close_objects;
325                 }
326         }
327
328         close_objects(mem, &holes);
329
330         min = target;
331         target = total >> 1;
332
333         /* Make sure we can still allocate all the fragmented space */
334         obj = igt_object_create(mem, &objects, target, 0);
335         if (IS_ERR(obj)) {
336                 err = PTR_ERR(obj);
337                 goto err_close_objects;
338         }
339
340         igt_object_release(obj);
341
342         /*
343          * Even though we have enough free space, we don't have a big enough
344          * contiguous block. Make sure that holds true.
345          */
346
347         do {
348                 bool should_fail = target > min;
349
350                 obj = igt_object_create(mem, &objects, target,
351                                         I915_BO_ALLOC_CONTIGUOUS);
352                 if (should_fail != IS_ERR(obj)) {
353                         pr_err("%s target allocation(%llx) mismatch\n",
354                                __func__, target);
355                         err = -EINVAL;
356                         goto err_close_objects;
357                 }
358
359                 target >>= 1;
360         } while (target >= PAGE_SIZE);
361
362 err_close_objects:
363         list_splice_tail(&holes, &objects);
364         close_objects(mem, &objects);
365         return err;
366 }
367
368 static int igt_mock_splintered_region(void *arg)
369 {
370         struct intel_memory_region *mem = arg;
371         struct drm_i915_private *i915 = mem->i915;
372         struct i915_ttm_buddy_resource *res;
373         struct drm_i915_gem_object *obj;
374         struct drm_buddy *mm;
375         unsigned int expected_order;
376         LIST_HEAD(objects);
377         u64 size;
378         int err = 0;
379
380         /*
381          * Sanity check we can still allocate everything even if the
382          * mm.max_order != mm.size. i.e our starting address space size is not a
383          * power-of-two.
384          */
385
386         size = (SZ_4G - 1) & PAGE_MASK;
387         mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
388         if (IS_ERR(mem))
389                 return PTR_ERR(mem);
390
391         obj = igt_object_create(mem, &objects, size, 0);
392         if (IS_ERR(obj)) {
393                 err = PTR_ERR(obj);
394                 goto out_close;
395         }
396
397         res = to_ttm_buddy_resource(obj->mm.res);
398         mm = res->mm;
399         if (mm->size != size) {
400                 pr_err("%s size mismatch(%llu != %llu)\n",
401                        __func__, mm->size, size);
402                 err = -EINVAL;
403                 goto out_put;
404         }
405
406         expected_order = get_order(rounddown_pow_of_two(size));
407         if (mm->max_order != expected_order) {
408                 pr_err("%s order mismatch(%u != %u)\n",
409                        __func__, mm->max_order, expected_order);
410                 err = -EINVAL;
411                 goto out_put;
412         }
413
414         close_objects(mem, &objects);
415
416         /*
417          * While we should be able allocate everything without any flag
418          * restrictions, if we consider I915_BO_ALLOC_CONTIGUOUS then we are
419          * actually limited to the largest power-of-two for the region size i.e
420          * max_order, due to the inner workings of the buddy allocator. So make
421          * sure that does indeed hold true.
422          */
423
424         obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS);
425         if (!IS_ERR(obj)) {
426                 pr_err("%s too large contiguous allocation was not rejected\n",
427                        __func__);
428                 err = -EINVAL;
429                 goto out_close;
430         }
431
432         obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size),
433                                 I915_BO_ALLOC_CONTIGUOUS);
434         if (IS_ERR(obj)) {
435                 pr_err("%s largest possible contiguous allocation failed\n",
436                        __func__);
437                 err = PTR_ERR(obj);
438                 goto out_close;
439         }
440
441 out_close:
442         close_objects(mem, &objects);
443 out_put:
444         intel_memory_region_destroy(mem);
445         return err;
446 }
447
448 #ifndef SZ_8G
449 #define SZ_8G BIT_ULL(33)
450 #endif
451
452 static int igt_mock_max_segment(void *arg)
453 {
454         struct intel_memory_region *mem = arg;
455         struct drm_i915_private *i915 = mem->i915;
456         struct i915_ttm_buddy_resource *res;
457         struct drm_i915_gem_object *obj;
458         struct drm_buddy_block *block;
459         struct drm_buddy *mm;
460         struct list_head *blocks;
461         struct scatterlist *sg;
462         I915_RND_STATE(prng);
463         LIST_HEAD(objects);
464         unsigned int max_segment;
465         unsigned int ps;
466         u64 size;
467         int err = 0;
468
469         /*
470          * While we may create very large contiguous blocks, we may need
471          * to break those down for consumption elsewhere. In particular,
472          * dma-mapping with scatterlist elements have an implicit limit of
473          * UINT_MAX on each element.
474          */
475
476         size = SZ_8G;
477         ps = PAGE_SIZE;
478         if (i915_prandom_u64_state(&prng) & 1)
479                 ps = SZ_64K; /* For something like DG2 */
480
481         max_segment = round_down(UINT_MAX, ps);
482
483         mem = mock_region_create(i915, 0, size, ps, 0, 0);
484         if (IS_ERR(mem))
485                 return PTR_ERR(mem);
486
487         obj = igt_object_create(mem, &objects, size, 0);
488         if (IS_ERR(obj)) {
489                 err = PTR_ERR(obj);
490                 goto out_put;
491         }
492
493         res = to_ttm_buddy_resource(obj->mm.res);
494         blocks = &res->blocks;
495         mm = res->mm;
496         size = 0;
497         list_for_each_entry(block, blocks, link) {
498                 if (drm_buddy_block_size(mm, block) > size)
499                         size = drm_buddy_block_size(mm, block);
500         }
501         if (size < max_segment) {
502                 pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n",
503                        __func__, max_segment, size);
504                 err = -EINVAL;
505                 goto out_close;
506         }
507
508         for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
509                 dma_addr_t daddr = sg_dma_address(sg);
510
511                 if (sg->length > max_segment) {
512                         pr_err("%s: Created an oversized scatterlist entry, %u > %u\n",
513                                __func__, sg->length, max_segment);
514                         err = -EINVAL;
515                         goto out_close;
516                 }
517
518                 if (!IS_ALIGNED(daddr, ps)) {
519                         pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n",
520                                __func__, &daddr, ps);
521                         err = -EINVAL;
522                         goto out_close;
523                 }
524         }
525
526 out_close:
527         close_objects(mem, &objects);
528 out_put:
529         intel_memory_region_destroy(mem);
530         return err;
531 }
532
533 static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj)
534 {
535         struct intel_memory_region *mr = obj->mm.region;
536         struct i915_ttm_buddy_resource *bman_res =
537                 to_ttm_buddy_resource(obj->mm.res);
538         struct drm_buddy *mm = bman_res->mm;
539         struct drm_buddy_block *block;
540         u64 total;
541
542         total = 0;
543         list_for_each_entry(block, &bman_res->blocks, link) {
544                 u64 start = drm_buddy_block_offset(block);
545                 u64 end = start + drm_buddy_block_size(mm, block);
546
547                 if (start < resource_size(&mr->io))
548                         total += min_t(u64, end, resource_size(&mr->io)) - start;
549         }
550
551         return total;
552 }
553
554 static int igt_mock_io_size(void *arg)
555 {
556         struct intel_memory_region *mr = arg;
557         struct drm_i915_private *i915 = mr->i915;
558         struct drm_i915_gem_object *obj;
559         u64 mappable_theft_total;
560         u64 io_size;
561         u64 total;
562         u64 ps;
563         u64 rem;
564         u64 size;
565         I915_RND_STATE(prng);
566         LIST_HEAD(objects);
567         int err = 0;
568
569         ps = SZ_4K;
570         if (i915_prandom_u64_state(&prng) & 1)
571                 ps = SZ_64K; /* For something like DG2 */
572
573         div64_u64_rem(i915_prandom_u64_state(&prng), SZ_8G, &total);
574         total = round_down(total, ps);
575         total = max_t(u64, total, SZ_1G);
576
577         div64_u64_rem(i915_prandom_u64_state(&prng), total - ps, &io_size);
578         io_size = round_down(io_size, ps);
579         io_size = max_t(u64, io_size, SZ_256M); /* 256M seems to be the common lower limit */
580
581         pr_info("%s with ps=%llx, io_size=%llx, total=%llx\n",
582                 __func__, ps, io_size, total);
583
584         mr = mock_region_create(i915, 0, total, ps, 0, io_size);
585         if (IS_ERR(mr)) {
586                 err = PTR_ERR(mr);
587                 goto out_err;
588         }
589
590         mappable_theft_total = 0;
591         rem = total - io_size;
592         do {
593                 div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
594                 size = round_down(size, ps);
595                 size = max(size, ps);
596
597                 obj = igt_object_create(mr, &objects, size,
598                                         I915_BO_ALLOC_GPU_ONLY);
599                 if (IS_ERR(obj)) {
600                         pr_err("%s TOPDOWN failed with rem=%llx, size=%llx\n",
601                                __func__, rem, size);
602                         err = PTR_ERR(obj);
603                         goto out_close;
604                 }
605
606                 mappable_theft_total += igt_object_mappable_total(obj);
607                 rem -= size;
608         } while (rem);
609
610         pr_info("%s mappable theft=(%lluMiB/%lluMiB), total=%lluMiB\n",
611                 __func__,
612                 (u64)mappable_theft_total >> 20,
613                 (u64)io_size >> 20,
614                 (u64)total >> 20);
615
616         /*
617          * Even if we allocate all of the non-mappable portion, we should still
618          * be able to dip into the mappable portion.
619          */
620         obj = igt_object_create(mr, &objects, io_size,
621                                 I915_BO_ALLOC_GPU_ONLY);
622         if (IS_ERR(obj)) {
623                 pr_err("%s allocation unexpectedly failed\n", __func__);
624                 err = PTR_ERR(obj);
625                 goto out_close;
626         }
627
628         close_objects(mr, &objects);
629
630         rem = io_size;
631         do {
632                 div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
633                 size = round_down(size, ps);
634                 size = max(size, ps);
635
636                 obj = igt_object_create(mr, &objects, size, 0);
637                 if (IS_ERR(obj)) {
638                         pr_err("%s MAPPABLE failed with rem=%llx, size=%llx\n",
639                                __func__, rem, size);
640                         err = PTR_ERR(obj);
641                         goto out_close;
642                 }
643
644                 if (igt_object_mappable_total(obj) != size) {
645                         pr_err("%s allocation is not mappable(size=%llx)\n",
646                                __func__, size);
647                         err = -EINVAL;
648                         goto out_close;
649                 }
650                 rem -= size;
651         } while (rem);
652
653         /*
654          * We assume CPU access is required by default, which should result in a
655          * failure here, even though the non-mappable portion is free.
656          */
657         obj = igt_object_create(mr, &objects, ps, 0);
658         if (!IS_ERR(obj)) {
659                 pr_err("%s allocation unexpectedly succeeded\n", __func__);
660                 err = -EINVAL;
661                 goto out_close;
662         }
663
664 out_close:
665         close_objects(mr, &objects);
666         intel_memory_region_destroy(mr);
667 out_err:
668         if (err == -ENOMEM)
669                 err = 0;
670
671         return err;
672 }
673
674 static int igt_gpu_write_dw(struct intel_context *ce,
675                             struct i915_vma *vma,
676                             u32 dword,
677                             u32 value)
678 {
679         return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32),
680                                vma->size >> PAGE_SHIFT, value);
681 }
682
683 static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
684 {
685         unsigned long n = obj->base.size >> PAGE_SHIFT;
686         u32 *ptr;
687         int err;
688
689         err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
690         if (err)
691                 return err;
692
693         ptr = i915_gem_object_pin_map(obj, I915_MAP_WC);
694         if (IS_ERR(ptr))
695                 return PTR_ERR(ptr);
696
697         ptr += dword;
698         while (n--) {
699                 if (*ptr != val) {
700                         pr_err("base[%u]=%08x, val=%08x\n",
701                                dword, *ptr, val);
702                         err = -EINVAL;
703                         break;
704                 }
705
706                 ptr += PAGE_SIZE / sizeof(*ptr);
707         }
708
709         i915_gem_object_unpin_map(obj);
710         return err;
711 }
712
713 static int igt_gpu_write(struct i915_gem_context *ctx,
714                          struct drm_i915_gem_object *obj)
715 {
716         struct i915_gem_engines *engines;
717         struct i915_gem_engines_iter it;
718         struct i915_address_space *vm;
719         struct intel_context *ce;
720         I915_RND_STATE(prng);
721         IGT_TIMEOUT(end_time);
722         unsigned int count;
723         struct i915_vma *vma;
724         int *order;
725         int i, n;
726         int err = 0;
727
728         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
729
730         n = 0;
731         count = 0;
732         for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
733                 count++;
734                 if (!intel_engine_can_store_dword(ce->engine))
735                         continue;
736
737                 vm = ce->vm;
738                 n++;
739         }
740         i915_gem_context_unlock_engines(ctx);
741         if (!n)
742                 return 0;
743
744         order = i915_random_order(count * count, &prng);
745         if (!order)
746                 return -ENOMEM;
747
748         vma = i915_vma_instance(obj, vm, NULL);
749         if (IS_ERR(vma)) {
750                 err = PTR_ERR(vma);
751                 goto out_free;
752         }
753
754         err = i915_vma_pin(vma, 0, 0, PIN_USER);
755         if (err)
756                 goto out_free;
757
758         i = 0;
759         engines = i915_gem_context_lock_engines(ctx);
760         do {
761                 u32 rng = prandom_u32_state(&prng);
762                 u32 dword = offset_in_page(rng) / 4;
763
764                 ce = engines->engines[order[i] % engines->num_engines];
765                 i = (i + 1) % (count * count);
766                 if (!ce || !intel_engine_can_store_dword(ce->engine))
767                         continue;
768
769                 err = igt_gpu_write_dw(ce, vma, dword, rng);
770                 if (err)
771                         break;
772
773                 i915_gem_object_lock(obj, NULL);
774                 err = igt_cpu_check(obj, dword, rng);
775                 i915_gem_object_unlock(obj);
776                 if (err)
777                         break;
778         } while (!__igt_timeout(end_time, NULL));
779         i915_gem_context_unlock_engines(ctx);
780
781 out_free:
782         kfree(order);
783
784         if (err == -ENOMEM)
785                 err = 0;
786
787         return err;
788 }
789
790 static int igt_lmem_create(void *arg)
791 {
792         struct drm_i915_private *i915 = arg;
793         struct drm_i915_gem_object *obj;
794         int err = 0;
795
796         obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0);
797         if (IS_ERR(obj))
798                 return PTR_ERR(obj);
799
800         err = i915_gem_object_pin_pages_unlocked(obj);
801         if (err)
802                 goto out_put;
803
804         i915_gem_object_unpin_pages(obj);
805 out_put:
806         i915_gem_object_put(obj);
807
808         return err;
809 }
810
811 static int igt_lmem_create_with_ps(void *arg)
812 {
813         struct drm_i915_private *i915 = arg;
814         int err = 0;
815         u32 ps;
816
817         for (ps = PAGE_SIZE; ps <= SZ_1G; ps <<= 1) {
818                 struct drm_i915_gem_object *obj;
819                 dma_addr_t daddr;
820
821                 obj = __i915_gem_object_create_lmem_with_ps(i915, ps, ps, 0);
822                 if (IS_ERR(obj)) {
823                         err = PTR_ERR(obj);
824                         if (err == -ENXIO || err == -E2BIG) {
825                                 pr_info("%s not enough lmem for ps(%u) err=%d\n",
826                                         __func__, ps, err);
827                                 err = 0;
828                         }
829
830                         break;
831                 }
832
833                 if (obj->base.size != ps) {
834                         pr_err("%s size(%zu) != ps(%u)\n",
835                                __func__, obj->base.size, ps);
836                         err = -EINVAL;
837                         goto out_put;
838                 }
839
840                 i915_gem_object_lock(obj, NULL);
841                 err = i915_gem_object_pin_pages(obj);
842                 if (err) {
843                         if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
844                                 pr_info("%s not enough lmem for ps(%u) err=%d\n",
845                                         __func__, ps, err);
846                                 err = 0;
847                         }
848                         goto out_put;
849                 }
850
851                 daddr = i915_gem_object_get_dma_address(obj, 0);
852                 if (!IS_ALIGNED(daddr, ps)) {
853                         pr_err("%s daddr(%pa) not aligned with ps(%u)\n",
854                                __func__, &daddr, ps);
855                         err = -EINVAL;
856                         goto out_unpin;
857                 }
858
859 out_unpin:
860                 i915_gem_object_unpin_pages(obj);
861                 __i915_gem_object_put_pages(obj);
862 out_put:
863                 i915_gem_object_unlock(obj);
864                 i915_gem_object_put(obj);
865
866                 if (err)
867                         break;
868         }
869
870         return err;
871 }
872
873 static int igt_lmem_create_cleared_cpu(void *arg)
874 {
875         struct drm_i915_private *i915 = arg;
876         I915_RND_STATE(prng);
877         IGT_TIMEOUT(end_time);
878         u32 size, i;
879         int err;
880
881         i915_gem_drain_freed_objects(i915);
882
883         size = max_t(u32, PAGE_SIZE, i915_prandom_u32_max_state(SZ_32M, &prng));
884         size = round_up(size, PAGE_SIZE);
885         i = 0;
886
887         do {
888                 struct drm_i915_gem_object *obj;
889                 unsigned int flags;
890                 u32 dword, val;
891                 void *vaddr;
892
893                 /*
894                  * Alternate between cleared and uncleared allocations, while
895                  * also dirtying the pages each time to check that the pages are
896                  * always cleared if requested, since we should get some overlap
897                  * of the underlying pages, if not all, since we are the only
898                  * user.
899                  */
900
901                 flags = I915_BO_ALLOC_CPU_CLEAR;
902                 if (i & 1)
903                         flags = 0;
904
905                 obj = i915_gem_object_create_lmem(i915, size, flags);
906                 if (IS_ERR(obj))
907                         return PTR_ERR(obj);
908
909                 i915_gem_object_lock(obj, NULL);
910                 err = i915_gem_object_pin_pages(obj);
911                 if (err)
912                         goto out_put;
913
914                 dword = i915_prandom_u32_max_state(PAGE_SIZE / sizeof(u32),
915                                                    &prng);
916
917                 if (flags & I915_BO_ALLOC_CPU_CLEAR) {
918                         err = igt_cpu_check(obj, dword, 0);
919                         if (err) {
920                                 pr_err("%s failed with size=%u, flags=%u\n",
921                                        __func__, size, flags);
922                                 goto out_unpin;
923                         }
924                 }
925
926                 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
927                 if (IS_ERR(vaddr)) {
928                         err = PTR_ERR(vaddr);
929                         goto out_unpin;
930                 }
931
932                 val = prandom_u32_state(&prng);
933
934                 memset32(vaddr, val, obj->base.size / sizeof(u32));
935
936                 i915_gem_object_flush_map(obj);
937                 i915_gem_object_unpin_map(obj);
938 out_unpin:
939                 i915_gem_object_unpin_pages(obj);
940                 __i915_gem_object_put_pages(obj);
941 out_put:
942                 i915_gem_object_unlock(obj);
943                 i915_gem_object_put(obj);
944
945                 if (err)
946                         break;
947                 ++i;
948         } while (!__igt_timeout(end_time, NULL));
949
950         pr_info("%s completed (%u) iterations\n", __func__, i);
951
952         return err;
953 }
954
955 static int igt_lmem_write_gpu(void *arg)
956 {
957         struct drm_i915_private *i915 = arg;
958         struct drm_i915_gem_object *obj;
959         struct i915_gem_context *ctx;
960         struct file *file;
961         I915_RND_STATE(prng);
962         u32 sz;
963         int err;
964
965         file = mock_file(i915);
966         if (IS_ERR(file))
967                 return PTR_ERR(file);
968
969         ctx = live_context(i915, file);
970         if (IS_ERR(ctx)) {
971                 err = PTR_ERR(ctx);
972                 goto out_file;
973         }
974
975         sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE);
976
977         obj = i915_gem_object_create_lmem(i915, sz, 0);
978         if (IS_ERR(obj)) {
979                 err = PTR_ERR(obj);
980                 goto out_file;
981         }
982
983         err = i915_gem_object_pin_pages_unlocked(obj);
984         if (err)
985                 goto out_put;
986
987         err = igt_gpu_write(ctx, obj);
988         if (err)
989                 pr_err("igt_gpu_write failed(%d)\n", err);
990
991         i915_gem_object_unpin_pages(obj);
992 out_put:
993         i915_gem_object_put(obj);
994 out_file:
995         fput(file);
996         return err;
997 }
998
999 static struct intel_engine_cs *
1000 random_engine_class(struct drm_i915_private *i915,
1001                     unsigned int class,
1002                     struct rnd_state *prng)
1003 {
1004         struct intel_engine_cs *engine;
1005         unsigned int count;
1006
1007         count = 0;
1008         for (engine = intel_engine_lookup_user(i915, class, 0);
1009              engine && engine->uabi_class == class;
1010              engine = rb_entry_safe(rb_next(&engine->uabi_node),
1011                                     typeof(*engine), uabi_node))
1012                 count++;
1013
1014         count = i915_prandom_u32_max_state(count, prng);
1015         return intel_engine_lookup_user(i915, class, count);
1016 }
1017
1018 static int igt_lmem_write_cpu(void *arg)
1019 {
1020         struct drm_i915_private *i915 = arg;
1021         struct drm_i915_gem_object *obj;
1022         I915_RND_STATE(prng);
1023         IGT_TIMEOUT(end_time);
1024         u32 bytes[] = {
1025                 0, /* rng placeholder */
1026                 sizeof(u32),
1027                 sizeof(u64),
1028                 64, /* cl */
1029                 PAGE_SIZE,
1030                 PAGE_SIZE - sizeof(u32),
1031                 PAGE_SIZE - sizeof(u64),
1032                 PAGE_SIZE - 64,
1033         };
1034         struct intel_engine_cs *engine;
1035         struct i915_request *rq;
1036         u32 *vaddr;
1037         u32 sz;
1038         u32 i;
1039         int *order;
1040         int count;
1041         int err;
1042
1043         engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng);
1044         if (!engine)
1045                 return 0;
1046
1047         pr_info("%s: using %s\n", __func__, engine->name);
1048
1049         sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE);
1050         sz = max_t(u32, 2 * PAGE_SIZE, sz);
1051
1052         obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS);
1053         if (IS_ERR(obj))
1054                 return PTR_ERR(obj);
1055
1056         vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1057         if (IS_ERR(vaddr)) {
1058                 err = PTR_ERR(vaddr);
1059                 goto out_put;
1060         }
1061
1062         i915_gem_object_lock(obj, NULL);
1063
1064         err = dma_resv_reserve_fences(obj->base.resv, 1);
1065         if (err) {
1066                 i915_gem_object_unlock(obj);
1067                 goto out_put;
1068         }
1069
1070         /* Put the pages into a known state -- from the gpu for added fun */
1071         intel_engine_pm_get(engine);
1072         err = intel_context_migrate_clear(engine->gt->migrate.context, NULL,
1073                                           obj->mm.pages->sgl,
1074                                           i915_gem_get_pat_index(i915,
1075                                                                  I915_CACHE_NONE),
1076                                           true, 0xdeadbeaf, &rq);
1077         if (rq) {
1078                 dma_resv_add_fence(obj->base.resv, &rq->fence,
1079                                    DMA_RESV_USAGE_WRITE);
1080                 i915_request_put(rq);
1081         }
1082
1083         intel_engine_pm_put(engine);
1084         if (!err)
1085                 err = i915_gem_object_set_to_wc_domain(obj, true);
1086         i915_gem_object_unlock(obj);
1087         if (err)
1088                 goto out_unpin;
1089
1090         count = ARRAY_SIZE(bytes);
1091         order = i915_random_order(count * count, &prng);
1092         if (!order) {
1093                 err = -ENOMEM;
1094                 goto out_unpin;
1095         }
1096
1097         /* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */
1098         bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32));
1099         GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32)));
1100
1101         i = 0;
1102         do {
1103                 u32 offset;
1104                 u32 align;
1105                 u32 dword;
1106                 u32 size;
1107                 u32 val;
1108
1109                 size = bytes[order[i] % count];
1110                 i = (i + 1) % (count * count);
1111
1112                 align = bytes[order[i] % count];
1113                 i = (i + 1) % (count * count);
1114
1115                 align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align));
1116
1117                 offset = igt_random_offset(&prng, 0, obj->base.size,
1118                                            size, align);
1119
1120                 val = prandom_u32_state(&prng);
1121                 memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf,
1122                          size / sizeof(u32));
1123
1124                 /*
1125                  * Sample random dw -- don't waste precious time reading every
1126                  * single dw.
1127                  */
1128                 dword = igt_random_offset(&prng, offset,
1129                                           offset + size,
1130                                           sizeof(u32), sizeof(u32));
1131                 dword /= sizeof(u32);
1132                 if (vaddr[dword] != (val ^ 0xdeadbeaf)) {
1133                         pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n",
1134                                __func__, dword, vaddr[dword], val ^ 0xdeadbeaf,
1135                                size, align, offset);
1136                         err = -EINVAL;
1137                         break;
1138                 }
1139         } while (!__igt_timeout(end_time, NULL));
1140
1141 out_unpin:
1142         i915_gem_object_unpin_map(obj);
1143 out_put:
1144         i915_gem_object_put(obj);
1145
1146         return err;
1147 }
1148
1149 static const char *repr_type(u32 type)
1150 {
1151         switch (type) {
1152         case I915_MAP_WB:
1153                 return "WB";
1154         case I915_MAP_WC:
1155                 return "WC";
1156         }
1157
1158         return "";
1159 }
1160
1161 static struct drm_i915_gem_object *
1162 create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type,
1163                           void **out_addr)
1164 {
1165         struct drm_i915_gem_object *obj;
1166         void *addr;
1167
1168         obj = i915_gem_object_create_region(mr, size, 0, 0);
1169         if (IS_ERR(obj)) {
1170                 if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */
1171                         return ERR_PTR(-ENODEV);
1172                 return obj;
1173         }
1174
1175         addr = i915_gem_object_pin_map_unlocked(obj, type);
1176         if (IS_ERR(addr)) {
1177                 i915_gem_object_put(obj);
1178                 if (PTR_ERR(addr) == -ENXIO)
1179                         return ERR_PTR(-ENODEV);
1180                 return addr;
1181         }
1182
1183         *out_addr = addr;
1184         return obj;
1185 }
1186
1187 static int wrap_ktime_compare(const void *A, const void *B)
1188 {
1189         const ktime_t *a = A, *b = B;
1190
1191         return ktime_compare(*a, *b);
1192 }
1193
1194 static void igt_memcpy_long(void *dst, const void *src, size_t size)
1195 {
1196         unsigned long *tmp = dst;
1197         const unsigned long *s = src;
1198
1199         size = size / sizeof(unsigned long);
1200         while (size--)
1201                 *tmp++ = *s++;
1202 }
1203
1204 static inline void igt_memcpy(void *dst, const void *src, size_t size)
1205 {
1206         memcpy(dst, src, size);
1207 }
1208
1209 static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size)
1210 {
1211         i915_memcpy_from_wc(dst, src, size);
1212 }
1213
1214 static int _perf_memcpy(struct intel_memory_region *src_mr,
1215                         struct intel_memory_region *dst_mr,
1216                         u64 size, u32 src_type, u32 dst_type)
1217 {
1218         struct drm_i915_private *i915 = src_mr->i915;
1219         const struct {
1220                 const char *name;
1221                 void (*copy)(void *dst, const void *src, size_t size);
1222                 bool skip;
1223         } tests[] = {
1224                 {
1225                         "memcpy",
1226                         igt_memcpy,
1227                 },
1228                 {
1229                         "memcpy_long",
1230                         igt_memcpy_long,
1231                 },
1232                 {
1233                         "memcpy_from_wc",
1234                         igt_memcpy_from_wc,
1235                         !i915_has_memcpy_from_wc(),
1236                 },
1237         };
1238         struct drm_i915_gem_object *src, *dst;
1239         void *src_addr, *dst_addr;
1240         int ret = 0;
1241         int i;
1242
1243         src = create_region_for_mapping(src_mr, size, src_type, &src_addr);
1244         if (IS_ERR(src)) {
1245                 ret = PTR_ERR(src);
1246                 goto out;
1247         }
1248
1249         dst = create_region_for_mapping(dst_mr, size, dst_type, &dst_addr);
1250         if (IS_ERR(dst)) {
1251                 ret = PTR_ERR(dst);
1252                 goto out_unpin_src;
1253         }
1254
1255         for (i = 0; i < ARRAY_SIZE(tests); ++i) {
1256                 ktime_t t[5];
1257                 int pass;
1258
1259                 if (tests[i].skip)
1260                         continue;
1261
1262                 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
1263                         ktime_t t0, t1;
1264
1265                         t0 = ktime_get();
1266
1267                         tests[i].copy(dst_addr, src_addr, size);
1268
1269                         t1 = ktime_get();
1270                         t[pass] = ktime_sub(t1, t0);
1271                 }
1272
1273                 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
1274                 if (t[0] <= 0) {
1275                         /* ignore the impossible to protect our sanity */
1276                         pr_debug("Skipping %s src(%s, %s) -> dst(%s, %s) %14s %4lluKiB copy, unstable measurement [%lld, %lld]\n",
1277                                  __func__,
1278                                  src_mr->name, repr_type(src_type),
1279                                  dst_mr->name, repr_type(dst_type),
1280                                  tests[i].name, size >> 10,
1281                                  t[0], t[4]);
1282                         continue;
1283                 }
1284
1285                 pr_info("%s src(%s, %s) -> dst(%s, %s) %14s %4llu KiB copy: %5lld MiB/s\n",
1286                         __func__,
1287                         src_mr->name, repr_type(src_type),
1288                         dst_mr->name, repr_type(dst_type),
1289                         tests[i].name, size >> 10,
1290                         div64_u64(mul_u32_u32(4 * size,
1291                                               1000 * 1000 * 1000),
1292                                   t[1] + 2 * t[2] + t[3]) >> 20);
1293
1294                 cond_resched();
1295         }
1296
1297         i915_gem_object_unpin_map(dst);
1298         i915_gem_object_put(dst);
1299 out_unpin_src:
1300         i915_gem_object_unpin_map(src);
1301         i915_gem_object_put(src);
1302
1303         i915_gem_drain_freed_objects(i915);
1304 out:
1305         if (ret == -ENODEV)
1306                 ret = 0;
1307
1308         return ret;
1309 }
1310
1311 static int perf_memcpy(void *arg)
1312 {
1313         struct drm_i915_private *i915 = arg;
1314         static const u32 types[] = {
1315                 I915_MAP_WB,
1316                 I915_MAP_WC,
1317         };
1318         static const u32 sizes[] = {
1319                 SZ_4K,
1320                 SZ_64K,
1321                 SZ_4M,
1322         };
1323         struct intel_memory_region *src_mr, *dst_mr;
1324         int src_id, dst_id;
1325         int i, j, k;
1326         int ret;
1327
1328         for_each_memory_region(src_mr, i915, src_id) {
1329                 for_each_memory_region(dst_mr, i915, dst_id) {
1330                         for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
1331                                 for (j = 0; j < ARRAY_SIZE(types); ++j) {
1332                                         for (k = 0; k < ARRAY_SIZE(types); ++k) {
1333                                                 ret = _perf_memcpy(src_mr,
1334                                                                    dst_mr,
1335                                                                    sizes[i],
1336                                                                    types[j],
1337                                                                    types[k]);
1338                                                 if (ret)
1339                                                         return ret;
1340                                         }
1341                                 }
1342                         }
1343                 }
1344         }
1345
1346         return 0;
1347 }
1348
1349 int intel_memory_region_mock_selftests(void)
1350 {
1351         static const struct i915_subtest tests[] = {
1352                 SUBTEST(igt_mock_reserve),
1353                 SUBTEST(igt_mock_fill),
1354                 SUBTEST(igt_mock_contiguous),
1355                 SUBTEST(igt_mock_splintered_region),
1356                 SUBTEST(igt_mock_max_segment),
1357                 SUBTEST(igt_mock_io_size),
1358         };
1359         struct intel_memory_region *mem;
1360         struct drm_i915_private *i915;
1361         int err;
1362
1363         i915 = mock_gem_device();
1364         if (!i915)
1365                 return -ENOMEM;
1366
1367         mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
1368         if (IS_ERR(mem)) {
1369                 pr_err("failed to create memory region\n");
1370                 err = PTR_ERR(mem);
1371                 goto out_unref;
1372         }
1373
1374         err = i915_subtests(tests, mem);
1375
1376         intel_memory_region_destroy(mem);
1377 out_unref:
1378         mock_destroy_device(i915);
1379         return err;
1380 }
1381
1382 int intel_memory_region_live_selftests(struct drm_i915_private *i915)
1383 {
1384         static const struct i915_subtest tests[] = {
1385                 SUBTEST(igt_lmem_create),
1386                 SUBTEST(igt_lmem_create_with_ps),
1387                 SUBTEST(igt_lmem_create_cleared_cpu),
1388                 SUBTEST(igt_lmem_write_cpu),
1389                 SUBTEST(igt_lmem_write_gpu),
1390         };
1391
1392         if (!HAS_LMEM(i915)) {
1393                 pr_info("device lacks LMEM support, skipping\n");
1394                 return 0;
1395         }
1396
1397         if (intel_gt_is_wedged(to_gt(i915)))
1398                 return 0;
1399
1400         return i915_live_subtests(tests, i915);
1401 }
1402
1403 int intel_memory_region_perf_selftests(struct drm_i915_private *i915)
1404 {
1405         static const struct i915_subtest tests[] = {
1406                 SUBTEST(perf_memcpy),
1407         };
1408
1409         if (intel_gt_is_wedged(to_gt(i915)))
1410                 return 0;
1411
1412         return i915_live_subtests(tests, i915);
1413 }
This page took 0.118364 seconds and 4 git commands to generate.