drivers/gpu/drm/i915/selftests/intel_memory_region.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2019 Intel Corporation
   4  */
   5
   6 #include <linux/prime_numbers.h>
   7 #include <linux/sort.h>
   8
   9 #include <drm/drm_buddy.h>
  10
  11 #include "../i915_selftest.h"
  12
  13 #include "mock_drm.h"
  14 #include "mock_gem_device.h"
  15 #include "mock_region.h"
  16
  17 #include "gem/i915_gem_context.h"
  18 #include "gem/i915_gem_lmem.h"
  19 #include "gem/i915_gem_region.h"
  20 #include "gem/i915_gem_ttm.h"
  21 #include "gem/selftests/igt_gem_utils.h"
  22 #include "gem/selftests/mock_context.h"
  23 #include "gt/intel_engine_pm.h"
  24 #include "gt/intel_engine_user.h"
  25 #include "gt/intel_gt.h"
  26 #include "gt/intel_migrate.h"
  27 #include "i915_memcpy.h"
  28 #include "i915_ttm_buddy_manager.h"
  29 #include "selftests/igt_flush_test.h"
  30 #include "selftests/i915_random.h"
  31
  32 static void close_objects(struct intel_memory_region *mem,
  33                           struct list_head *objects)
  34 {
  35         struct drm_i915_private *i915 = mem->i915;
  36         struct drm_i915_gem_object *obj, *on;
  37
  38         list_for_each_entry_safe(obj, on, objects, st_link) {
  39                 i915_gem_object_lock(obj, NULL);
  40                 if (i915_gem_object_has_pinned_pages(obj))
  41                         i915_gem_object_unpin_pages(obj);
  42                 /* No polluting the memory region between tests */
  43                 __i915_gem_object_put_pages(obj);
  44                 i915_gem_object_unlock(obj);
  45                 list_del(&obj->st_link);
  46                 i915_gem_object_put(obj);
  47         }
  48
  49         cond_resched();
  50
  51         i915_gem_drain_freed_objects(i915);
  52 }
  53
  54 static int igt_mock_fill(void *arg)
  55 {
  56         struct intel_memory_region *mem = arg;
  57         resource_size_t total = resource_size(&mem->region);
  58         resource_size_t page_size;
  59         resource_size_t rem;
  60         unsigned long max_pages;
  61         unsigned long page_num;
  62         LIST_HEAD(objects);
  63         int err = 0;
  64
  65         page_size = PAGE_SIZE;
  66         max_pages = div64_u64(total, page_size);
  67         rem = total;
  68
  69         for_each_prime_number_from(page_num, 1, max_pages) {
  70                 resource_size_t size = page_num * page_size;
  71                 struct drm_i915_gem_object *obj;
  72
  73                 obj = i915_gem_object_create_region(mem, size, 0, 0);
  74                 if (IS_ERR(obj)) {
  75                         err = PTR_ERR(obj);
  76                         break;
  77                 }
  78
  79                 err = i915_gem_object_pin_pages_unlocked(obj);
  80                 if (err) {
  81                         i915_gem_object_put(obj);
  82                         break;
  83                 }
  84
  85                 list_add(&obj->st_link, &objects);
  86                 rem -= size;
  87         }
  88
  89         if (err == -ENOMEM)
  90                 err = 0;
  91         if (err == -ENXIO) {
  92                 if (page_num * page_size <= rem) {
  93                         pr_err("%s failed, space still left in region\n",
  94                                __func__);
  95                         err = -EINVAL;
  96                 } else {
  97                         err = 0;
  98                 }
  99         }
 100
 101         close_objects(mem, &objects);
 102
 103         return err;
 104 }
 105
 106 static struct drm_i915_gem_object *
 107 igt_object_create(struct intel_memory_region *mem,
 108                   struct list_head *objects,
 109                   u64 size,
 110                   unsigned int flags)
 111 {
 112         struct drm_i915_gem_object *obj;
 113         int err;
 114
 115         obj = i915_gem_object_create_region(mem, size, 0, flags);
 116         if (IS_ERR(obj))
 117                 return obj;
 118
 119         err = i915_gem_object_pin_pages_unlocked(obj);
 120         if (err)
 121                 goto put;
 122
 123         list_add(&obj->st_link, objects);
 124         return obj;
 125
 126 put:
 127         i915_gem_object_put(obj);
 128         return ERR_PTR(err);
 129 }
 130
 131 static void igt_object_release(struct drm_i915_gem_object *obj)
 132 {
 133         i915_gem_object_lock(obj, NULL);
 134         i915_gem_object_unpin_pages(obj);
 135         __i915_gem_object_put_pages(obj);
 136         i915_gem_object_unlock(obj);
 137         list_del(&obj->st_link);
 138         i915_gem_object_put(obj);
 139 }
 140
 141 static bool is_contiguous(struct drm_i915_gem_object *obj)
 142 {
 143         struct scatterlist *sg;
 144         dma_addr_t addr = -1;
 145
 146         for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
 147                 if (addr != -1 && sg_dma_address(sg) != addr)
 148                         return false;
 149
 150                 addr = sg_dma_address(sg) + sg_dma_len(sg);
 151         }
 152
 153         return true;
 154 }
 155
 156 static int igt_mock_reserve(void *arg)
 157 {
 158         struct intel_memory_region *mem = arg;
 159         struct drm_i915_private *i915 = mem->i915;
 160         resource_size_t avail = resource_size(&mem->region);
 161         struct drm_i915_gem_object *obj;
 162         const u32 chunk_size = SZ_32M;
 163         u32 i, offset, count, *order;
 164         u64 allocated, cur_avail;
 165         I915_RND_STATE(prng);
 166         LIST_HEAD(objects);
 167         int err = 0;
 168
 169         count = avail / chunk_size;
 170         order = i915_random_order(count, &prng);
 171         if (!order)
 172                 return 0;
 173
 174         mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
 175         if (IS_ERR(mem)) {
 176                 pr_err("failed to create memory region\n");
 177                 err = PTR_ERR(mem);
 178                 goto out_free_order;
 179         }
 180
 181         /* Reserve a bunch of ranges within the region */
 182         for (i = 0; i < count; ++i) {
 183                 u64 start = order[i] * chunk_size;
 184                 u64 size = i915_prandom_u32_max_state(chunk_size, &prng);
 185
 186                 /* Allow for some really big holes */
 187                 if (!size)
 188                         continue;
 189
 190                 size = round_up(size, PAGE_SIZE);
 191                 offset = igt_random_offset(&prng, 0, chunk_size, size,
 192                                            PAGE_SIZE);
 193
 194                 err = intel_memory_region_reserve(mem, start + offset, size);
 195                 if (err) {
 196                         pr_err("%s failed to reserve range", __func__);
 197                         goto out_close;
 198                 }
 199
 200                 /* XXX: maybe sanity check the block range here? */
 201                 avail -= size;
 202         }
 203
 204         /* Try to see if we can allocate from the remaining space */
 205         allocated = 0;
 206         cur_avail = avail;
 207         do {
 208                 u32 size = i915_prandom_u32_max_state(cur_avail, &prng);
 209
 210                 size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE);
 211                 obj = igt_object_create(mem, &objects, size, 0);
 212                 if (IS_ERR(obj)) {
 213                         if (PTR_ERR(obj) == -ENXIO)
 214                                 break;
 215
 216                         err = PTR_ERR(obj);
 217                         goto out_close;
 218                 }
 219                 cur_avail -= size;
 220                 allocated += size;
 221         } while (1);
 222
 223         if (allocated != avail) {
 224                 pr_err("%s mismatch between allocation and free space", __func__);
 225                 err = -EINVAL;
 226         }
 227
 228 out_close:
 229         close_objects(mem, &objects);
 230         intel_memory_region_destroy(mem);
 231 out_free_order:
 232         kfree(order);
 233         return err;
 234 }
 235
 236 static int igt_mock_contiguous(void *arg)
 237 {
 238         struct intel_memory_region *mem = arg;
 239         struct drm_i915_gem_object *obj;
 240         unsigned long n_objects;
 241         LIST_HEAD(objects);
 242         LIST_HEAD(holes);
 243         I915_RND_STATE(prng);
 244         resource_size_t total;
 245         resource_size_t min;
 246         u64 target;
 247         int err = 0;
 248
 249         total = resource_size(&mem->region);
 250
 251         /* Min size */
 252         obj = igt_object_create(mem, &objects, PAGE_SIZE,
 253                                 I915_BO_ALLOC_CONTIGUOUS);
 254         if (IS_ERR(obj))
 255                 return PTR_ERR(obj);
 256
 257         if (!is_contiguous(obj)) {
 258                 pr_err("%s min object spans disjoint sg entries\n", __func__);
 259                 err = -EINVAL;
 260                 goto err_close_objects;
 261         }
 262
 263         igt_object_release(obj);
 264
 265         /* Max size */
 266         obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS);
 267         if (IS_ERR(obj))
 268                 return PTR_ERR(obj);
 269
 270         if (!is_contiguous(obj)) {
 271                 pr_err("%s max object spans disjoint sg entries\n", __func__);
 272                 err = -EINVAL;
 273                 goto err_close_objects;
 274         }
 275
 276         igt_object_release(obj);
 277
 278         /* Internal fragmentation should not bleed into the object size */
 279         target = i915_prandom_u64_state(&prng);
 280         div64_u64_rem(target, total, &target);
 281         target = round_up(target, PAGE_SIZE);
 282         target = max_t(u64, PAGE_SIZE, target);
 283
 284         obj = igt_object_create(mem, &objects, target,
 285                                 I915_BO_ALLOC_CONTIGUOUS);
 286         if (IS_ERR(obj))
 287                 return PTR_ERR(obj);
 288
 289         if (obj->base.size != target) {
 290                 pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__,
 291                        obj->base.size, target);
 292                 err = -EINVAL;
 293                 goto err_close_objects;
 294         }
 295
 296         if (!is_contiguous(obj)) {
 297                 pr_err("%s object spans disjoint sg entries\n", __func__);
 298                 err = -EINVAL;
 299                 goto err_close_objects;
 300         }
 301
 302         igt_object_release(obj);
 303
 304         /*
 305          * Try to fragment the address space, such that half of it is free, but
 306          * the max contiguous block size is SZ_64K.
 307          */
 308
 309         target = SZ_64K;
 310         n_objects = div64_u64(total, target);
 311
 312         while (n_objects--) {
 313                 struct list_head *list;
 314
 315                 if (n_objects % 2)
 316                         list = &holes;
 317                 else
 318                         list = &objects;
 319
 320                 obj = igt_object_create(mem, list, target,
 321                                         I915_BO_ALLOC_CONTIGUOUS);
 322                 if (IS_ERR(obj)) {
 323                         err = PTR_ERR(obj);
 324                         goto err_close_objects;
 325                 }
 326         }
 327
 328         close_objects(mem, &holes);
 329
 330         min = target;
 331         target = total >> 1;
 332
 333         /* Make sure we can still allocate all the fragmented space */
 334         obj = igt_object_create(mem, &objects, target, 0);
 335         if (IS_ERR(obj)) {
 336                 err = PTR_ERR(obj);
 337                 goto err_close_objects;
 338         }
 339
 340         igt_object_release(obj);
 341
 342         /*
 343          * Even though we have enough free space, we don't have a big enough
 344          * contiguous block. Make sure that holds true.
 345          */
 346
 347         do {
 348                 bool should_fail = target > min;
 349
 350                 obj = igt_object_create(mem, &objects, target,
 351                                         I915_BO_ALLOC_CONTIGUOUS);
 352                 if (should_fail != IS_ERR(obj)) {
 353                         pr_err("%s target allocation(%llx) mismatch\n",
 354                                __func__, target);
 355                         err = -EINVAL;
 356                         goto err_close_objects;
 357                 }
 358
 359                 target >>= 1;
 360         } while (target >= PAGE_SIZE);
 361
 362 err_close_objects:
 363         list_splice_tail(&holes, &objects);
 364         close_objects(mem, &objects);
 365         return err;
 366 }
 367
 368 static int igt_mock_splintered_region(void *arg)
 369 {
 370         struct intel_memory_region *mem = arg;
 371         struct drm_i915_private *i915 = mem->i915;
 372         struct i915_ttm_buddy_resource *res;
 373         struct drm_i915_gem_object *obj;
 374         struct drm_buddy *mm;
 375         unsigned int expected_order;
 376         LIST_HEAD(objects);
 377         u64 size;
 378         int err = 0;
 379
 380         /*
 381          * Sanity check we can still allocate everything even if the
 382          * mm.max_order != mm.size. i.e our starting address space size is not a
 383          * power-of-two.
 384          */
 385
 386         size = (SZ_4G - 1) & PAGE_MASK;
 387         mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
 388         if (IS_ERR(mem))
 389                 return PTR_ERR(mem);
 390
 391         obj = igt_object_create(mem, &objects, size, 0);
 392         if (IS_ERR(obj)) {
 393                 err = PTR_ERR(obj);
 394                 goto out_close;
 395         }
 396
 397         res = to_ttm_buddy_resource(obj->mm.res);
 398         mm = res->mm;
 399         if (mm->size != size) {
 400                 pr_err("%s size mismatch(%llu != %llu)\n",
 401                        __func__, mm->size, size);
 402                 err = -EINVAL;
 403                 goto out_put;
 404         }
 405
 406         expected_order = get_order(rounddown_pow_of_two(size));
 407         if (mm->max_order != expected_order) {
 408                 pr_err("%s order mismatch(%u != %u)\n",
 409                        __func__, mm->max_order, expected_order);
 410                 err = -EINVAL;
 411                 goto out_put;
 412         }
 413
 414         close_objects(mem, &objects);
 415
 416         /*
 417          * While we should be able allocate everything without any flag
 418          * restrictions, if we consider I915_BO_ALLOC_CONTIGUOUS then we are
 419          * actually limited to the largest power-of-two for the region size i.e
 420          * max_order, due to the inner workings of the buddy allocator. So make
 421          * sure that does indeed hold true.
 422          */
 423
 424         obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS);
 425         if (!IS_ERR(obj)) {
 426                 pr_err("%s too large contiguous allocation was not rejected\n",
 427                        __func__);
 428                 err = -EINVAL;
 429                 goto out_close;
 430         }
 431
 432         obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size),
 433                                 I915_BO_ALLOC_CONTIGUOUS);
 434         if (IS_ERR(obj)) {
 435                 pr_err("%s largest possible contiguous allocation failed\n",
 436                        __func__);
 437                 err = PTR_ERR(obj);
 438                 goto out_close;
 439         }
 440
 441 out_close:
 442         close_objects(mem, &objects);
 443 out_put:
 444         intel_memory_region_destroy(mem);
 445         return err;
 446 }
 447
 448 #ifndef SZ_8G
 449 #define SZ_8G BIT_ULL(33)
 450 #endif
 451
 452 static int igt_mock_max_segment(void *arg)
 453 {
 454         struct intel_memory_region *mem = arg;
 455         struct drm_i915_private *i915 = mem->i915;
 456         struct i915_ttm_buddy_resource *res;
 457         struct drm_i915_gem_object *obj;
 458         struct drm_buddy_block *block;
 459         struct drm_buddy *mm;
 460         struct list_head *blocks;
 461         struct scatterlist *sg;
 462         I915_RND_STATE(prng);
 463         LIST_HEAD(objects);
 464         unsigned int max_segment;
 465         unsigned int ps;
 466         u64 size;
 467         int err = 0;
 468
 469         /*
 470          * While we may create very large contiguous blocks, we may need
 471          * to break those down for consumption elsewhere. In particular,
 472          * dma-mapping with scatterlist elements have an implicit limit of
 473          * UINT_MAX on each element.
 474          */
 475
 476         size = SZ_8G;
 477         ps = PAGE_SIZE;
 478         if (i915_prandom_u64_state(&prng) & 1)
 479                 ps = SZ_64K; /* For something like DG2 */
 480
 481         max_segment = round_down(UINT_MAX, ps);
 482
 483         mem = mock_region_create(i915, 0, size, ps, 0, 0);
 484         if (IS_ERR(mem))
 485                 return PTR_ERR(mem);
 486
 487         obj = igt_object_create(mem, &objects, size, 0);
 488         if (IS_ERR(obj)) {
 489                 err = PTR_ERR(obj);
 490                 goto out_put;
 491         }
 492
 493         res = to_ttm_buddy_resource(obj->mm.res);
 494         blocks = &res->blocks;
 495         mm = res->mm;
 496         size = 0;
 497         list_for_each_entry(block, blocks, link) {
 498                 if (drm_buddy_block_size(mm, block) > size)
 499                         size = drm_buddy_block_size(mm, block);
 500         }
 501         if (size < max_segment) {
 502                 pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n",
 503                        __func__, max_segment, size);
 504                 err = -EINVAL;
 505                 goto out_close;
 506         }
 507
 508         for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
 509                 dma_addr_t daddr = sg_dma_address(sg);
 510
 511                 if (sg->length > max_segment) {
 512                         pr_err("%s: Created an oversized scatterlist entry, %u > %u\n",
 513                                __func__, sg->length, max_segment);
 514                         err = -EINVAL;
 515                         goto out_close;
 516                 }
 517
 518                 if (!IS_ALIGNED(daddr, ps)) {
 519                         pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n",
 520                                __func__, &daddr, ps);
 521                         err = -EINVAL;
 522                         goto out_close;
 523                 }
 524         }
 525
 526 out_close:
 527         close_objects(mem, &objects);
 528 out_put:
 529         intel_memory_region_destroy(mem);
 530         return err;
 531 }
 532
 533 static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj)
 534 {
 535         struct intel_memory_region *mr = obj->mm.region;
 536         struct i915_ttm_buddy_resource *bman_res =
 537                 to_ttm_buddy_resource(obj->mm.res);
 538         struct drm_buddy *mm = bman_res->mm;
 539         struct drm_buddy_block *block;
 540         u64 total;
 541
 542         total = 0;
 543         list_for_each_entry(block, &bman_res->blocks, link) {
 544                 u64 start = drm_buddy_block_offset(block);
 545                 u64 end = start + drm_buddy_block_size(mm, block);
 546
 547                 if (start < resource_size(&mr->io))
 548                         total += min_t(u64, end, resource_size(&mr->io)) - start;
 549         }
 550
 551         return total;
 552 }
 553
 554 static int igt_mock_io_size(void *arg)
 555 {
 556         struct intel_memory_region *mr = arg;
 557         struct drm_i915_private *i915 = mr->i915;
 558         struct drm_i915_gem_object *obj;
 559         u64 mappable_theft_total;
 560         u64 io_size;
 561         u64 total;
 562         u64 ps;
 563         u64 rem;
 564         u64 size;
 565         I915_RND_STATE(prng);
 566         LIST_HEAD(objects);
 567         int err = 0;
 568
 569         ps = SZ_4K;
 570         if (i915_prandom_u64_state(&prng) & 1)
 571                 ps = SZ_64K; /* For something like DG2 */
 572
 573         div64_u64_rem(i915_prandom_u64_state(&prng), SZ_8G, &total);
 574         total = round_down(total, ps);
 575         total = max_t(u64, total, SZ_1G);
 576
 577         div64_u64_rem(i915_prandom_u64_state(&prng), total - ps, &io_size);
 578         io_size = round_down(io_size, ps);
 579         io_size = max_t(u64, io_size, SZ_256M); /* 256M seems to be the common lower limit */
 580
 581         pr_info("%s with ps=%llx, io_size=%llx, total=%llx\n",
 582                 __func__, ps, io_size, total);
 583
 584         mr = mock_region_create(i915, 0, total, ps, 0, io_size);
 585         if (IS_ERR(mr)) {
 586                 err = PTR_ERR(mr);
 587                 goto out_err;
 588         }
 589
 590         mappable_theft_total = 0;
 591         rem = total - io_size;
 592         do {
 593                 div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
 594                 size = round_down(size, ps);
 595                 size = max(size, ps);
 596
 597                 obj = igt_object_create(mr, &objects, size,
 598                                         I915_BO_ALLOC_GPU_ONLY);
 599                 if (IS_ERR(obj)) {
 600                         pr_err("%s TOPDOWN failed with rem=%llx, size=%llx\n",
 601                                __func__, rem, size);
 602                         err = PTR_ERR(obj);
 603                         goto out_close;
 604                 }
 605
 606                 mappable_theft_total += igt_object_mappable_total(obj);
 607                 rem -= size;
 608         } while (rem);
 609
 610         pr_info("%s mappable theft=(%lluMiB/%lluMiB), total=%lluMiB\n",
 611                 __func__,
 612                 (u64)mappable_theft_total >> 20,
 613                 (u64)io_size >> 20,
 614                 (u64)total >> 20);
 615
 616         /*
 617          * Even if we allocate all of the non-mappable portion, we should still
 618          * be able to dip into the mappable portion.
 619          */
 620         obj = igt_object_create(mr, &objects, io_size,
 621                                 I915_BO_ALLOC_GPU_ONLY);
 622         if (IS_ERR(obj)) {
 623                 pr_err("%s allocation unexpectedly failed\n", __func__);
 624                 err = PTR_ERR(obj);
 625                 goto out_close;
 626         }
 627
 628         close_objects(mr, &objects);
 629
 630         rem = io_size;
 631         do {
 632                 div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
 633                 size = round_down(size, ps);
 634                 size = max(size, ps);
 635
 636                 obj = igt_object_create(mr, &objects, size, 0);
 637                 if (IS_ERR(obj)) {
 638                         pr_err("%s MAPPABLE failed with rem=%llx, size=%llx\n",
 639                                __func__, rem, size);
 640                         err = PTR_ERR(obj);
 641                         goto out_close;
 642                 }
 643
 644                 if (igt_object_mappable_total(obj) != size) {
 645                         pr_err("%s allocation is not mappable(size=%llx)\n",
 646                                __func__, size);
 647                         err = -EINVAL;
 648                         goto out_close;
 649                 }
 650                 rem -= size;
 651         } while (rem);
 652
 653         /*
 654          * We assume CPU access is required by default, which should result in a
 655          * failure here, even though the non-mappable portion is free.
 656          */
 657         obj = igt_object_create(mr, &objects, ps, 0);
 658         if (!IS_ERR(obj)) {
 659                 pr_err("%s allocation unexpectedly succeeded\n", __func__);
 660                 err = -EINVAL;
 661                 goto out_close;
 662         }
 663
 664 out_close:
 665         close_objects(mr, &objects);
 666         intel_memory_region_destroy(mr);
 667 out_err:
 668         if (err == -ENOMEM)
 669                 err = 0;
 670
 671         return err;
 672 }
 673
 674 static int igt_gpu_write_dw(struct intel_context *ce,
 675                             struct i915_vma *vma,
 676                             u32 dword,
 677                             u32 value)
 678 {
 679         return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32),
 680                                vma->size >> PAGE_SHIFT, value);
 681 }
 682
 683 static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 684 {
 685         unsigned long n = obj->base.size >> PAGE_SHIFT;
 686         u32 *ptr;
 687         int err;
 688
 689         err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
 690         if (err)
 691                 return err;
 692
 693         ptr = i915_gem_object_pin_map(obj, I915_MAP_WC);
 694         if (IS_ERR(ptr))
 695                 return PTR_ERR(ptr);
 696
 697         ptr += dword;
 698         while (n--) {
 699                 if (*ptr != val) {
 700                         pr_err("base[%u]=%08x, val=%08x\n",
 701                                dword, *ptr, val);
 702                         err = -EINVAL;
 703                         break;
 704                 }
 705
 706                 ptr += PAGE_SIZE / sizeof(*ptr);
 707         }
 708
 709         i915_gem_object_unpin_map(obj);
 710         return err;
 711 }
 712
 713 static int igt_gpu_write(struct i915_gem_context *ctx,
 714                          struct drm_i915_gem_object *obj)
 715 {
 716         struct i915_gem_engines *engines;
 717         struct i915_gem_engines_iter it;
 718         struct i915_address_space *vm;
 719         struct intel_context *ce;
 720         I915_RND_STATE(prng);
 721         IGT_TIMEOUT(end_time);
 722         unsigned int count;
 723         struct i915_vma *vma;
 724         int *order;
 725         int i, n;
 726         int err = 0;
 727
 728         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 729
 730         n = 0;
 731         count = 0;
 732         for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 733                 count++;
 734                 if (!intel_engine_can_store_dword(ce->engine))
 735                         continue;
 736
 737                 vm = ce->vm;
 738                 n++;
 739         }
 740         i915_gem_context_unlock_engines(ctx);
 741         if (!n)
 742                 return 0;
 743
 744         order = i915_random_order(count * count, &prng);
 745         if (!order)
 746                 return -ENOMEM;
 747
 748         vma = i915_vma_instance(obj, vm, NULL);
 749         if (IS_ERR(vma)) {
 750                 err = PTR_ERR(vma);
 751                 goto out_free;
 752         }
 753
 754         err = i915_vma_pin(vma, 0, 0, PIN_USER);
 755         if (err)
 756                 goto out_free;
 757
 758         i = 0;
 759         engines = i915_gem_context_lock_engines(ctx);
 760         do {
 761                 u32 rng = prandom_u32_state(&prng);
 762                 u32 dword = offset_in_page(rng) / 4;
 763
 764                 ce = engines->engines[order[i] % engines->num_engines];
 765                 i = (i + 1) % (count * count);
 766                 if (!ce || !intel_engine_can_store_dword(ce->engine))
 767                         continue;
 768
 769                 err = igt_gpu_write_dw(ce, vma, dword, rng);
 770                 if (err)
 771                         break;
 772
 773                 i915_gem_object_lock(obj, NULL);
 774                 err = igt_cpu_check(obj, dword, rng);
 775                 i915_gem_object_unlock(obj);
 776                 if (err)
 777                         break;
 778         } while (!__igt_timeout(end_time, NULL));
 779         i915_gem_context_unlock_engines(ctx);
 780
 781 out_free:
 782         kfree(order);
 783
 784         if (err == -ENOMEM)
 785                 err = 0;
 786
 787         return err;
 788 }
 789
 790 static int igt_lmem_create(void *arg)
 791 {
 792         struct drm_i915_private *i915 = arg;
 793         struct drm_i915_gem_object *obj;
 794         int err = 0;
 795
 796         obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0);
 797         if (IS_ERR(obj))
 798                 return PTR_ERR(obj);
 799
 800         err = i915_gem_object_pin_pages_unlocked(obj);
 801         if (err)
 802                 goto out_put;
 803
 804         i915_gem_object_unpin_pages(obj);
 805 out_put:
 806         i915_gem_object_put(obj);
 807
 808         return err;
 809 }
 810
 811 static int igt_lmem_create_with_ps(void *arg)
 812 {
 813         struct drm_i915_private *i915 = arg;
 814         int err = 0;
 815         u32 ps;
 816
 817         for (ps = PAGE_SIZE; ps <= SZ_1G; ps <<= 1) {
 818                 struct drm_i915_gem_object *obj;
 819                 dma_addr_t daddr;
 820
 821                 obj = __i915_gem_object_create_lmem_with_ps(i915, ps, ps, 0);
 822                 if (IS_ERR(obj)) {
 823                         err = PTR_ERR(obj);
 824                         if (err == -ENXIO || err == -E2BIG) {
 825                                 pr_info("%s not enough lmem for ps(%u) err=%d\n",
 826                                         __func__, ps, err);
 827                                 err = 0;
 828                         }
 829
 830                         break;
 831                 }
 832
 833                 if (obj->base.size != ps) {
 834                         pr_err("%s size(%zu) != ps(%u)\n",
 835                                __func__, obj->base.size, ps);
 836                         err = -EINVAL;
 837                         goto out_put;
 838                 }
 839
 840                 i915_gem_object_lock(obj, NULL);
 841                 err = i915_gem_object_pin_pages(obj);
 842                 if (err) {
 843                         if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
 844                                 pr_info("%s not enough lmem for ps(%u) err=%d\n",
 845                                         __func__, ps, err);
 846                                 err = 0;
 847                         }
 848                         goto out_put;
 849                 }
 850
 851                 daddr = i915_gem_object_get_dma_address(obj, 0);
 852                 if (!IS_ALIGNED(daddr, ps)) {
 853                         pr_err("%s daddr(%pa) not aligned with ps(%u)\n",
 854                                __func__, &daddr, ps);
 855                         err = -EINVAL;
 856                         goto out_unpin;
 857                 }
 858
 859 out_unpin:
 860                 i915_gem_object_unpin_pages(obj);
 861                 __i915_gem_object_put_pages(obj);
 862 out_put:
 863                 i915_gem_object_unlock(obj);
 864                 i915_gem_object_put(obj);
 865
 866                 if (err)
 867                         break;
 868         }
 869
 870         return err;
 871 }
 872
 873 static int igt_lmem_create_cleared_cpu(void *arg)
 874 {
 875         struct drm_i915_private *i915 = arg;
 876         I915_RND_STATE(prng);
 877         IGT_TIMEOUT(end_time);
 878         u32 size, i;
 879         int err;
 880
 881         i915_gem_drain_freed_objects(i915);
 882
 883         size = max_t(u32, PAGE_SIZE, i915_prandom_u32_max_state(SZ_32M, &prng));
 884         size = round_up(size, PAGE_SIZE);
 885         i = 0;
 886
 887         do {
 888                 struct drm_i915_gem_object *obj;
 889                 unsigned int flags;
 890                 u32 dword, val;
 891                 void *vaddr;
 892
 893                 /*
 894                  * Alternate between cleared and uncleared allocations, while
 895                  * also dirtying the pages each time to check that the pages are
 896                  * always cleared if requested, since we should get some overlap
 897                  * of the underlying pages, if not all, since we are the only
 898                  * user.
 899                  */
 900
 901                 flags = I915_BO_ALLOC_CPU_CLEAR;
 902                 if (i & 1)
 903                         flags = 0;
 904
 905                 obj = i915_gem_object_create_lmem(i915, size, flags);
 906                 if (IS_ERR(obj))
 907                         return PTR_ERR(obj);
 908
 909                 i915_gem_object_lock(obj, NULL);
 910                 err = i915_gem_object_pin_pages(obj);
 911                 if (err)
 912                         goto out_put;
 913
 914                 dword = i915_prandom_u32_max_state(PAGE_SIZE / sizeof(u32),
 915                                                    &prng);
 916
 917                 if (flags & I915_BO_ALLOC_CPU_CLEAR) {
 918                         err = igt_cpu_check(obj, dword, 0);
 919                         if (err) {
 920                                 pr_err("%s failed with size=%u, flags=%u\n",
 921                                        __func__, size, flags);
 922                                 goto out_unpin;
 923                         }
 924                 }
 925
 926                 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
 927                 if (IS_ERR(vaddr)) {
 928                         err = PTR_ERR(vaddr);
 929                         goto out_unpin;
 930                 }
 931
 932                 val = prandom_u32_state(&prng);
 933
 934                 memset32(vaddr, val, obj->base.size / sizeof(u32));
 935
 936                 i915_gem_object_flush_map(obj);
 937                 i915_gem_object_unpin_map(obj);
 938 out_unpin:
 939                 i915_gem_object_unpin_pages(obj);
 940                 __i915_gem_object_put_pages(obj);
 941 out_put:
 942                 i915_gem_object_unlock(obj);
 943                 i915_gem_object_put(obj);
 944
 945                 if (err)
 946                         break;
 947                 ++i;
 948         } while (!__igt_timeout(end_time, NULL));
 949
 950         pr_info("%s completed (%u) iterations\n", __func__, i);
 951
 952         return err;
 953 }
 954
 955 static int igt_lmem_write_gpu(void *arg)
 956 {
 957         struct drm_i915_private *i915 = arg;
 958         struct drm_i915_gem_object *obj;
 959         struct i915_gem_context *ctx;
 960         struct file *file;
 961         I915_RND_STATE(prng);
 962         u32 sz;
 963         int err;
 964
 965         file = mock_file(i915);
 966         if (IS_ERR(file))
 967                 return PTR_ERR(file);
 968
 969         ctx = live_context(i915, file);
 970         if (IS_ERR(ctx)) {
 971                 err = PTR_ERR(ctx);
 972                 goto out_file;
 973         }
 974
 975         sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE);
 976
 977         obj = i915_gem_object_create_lmem(i915, sz, 0);
 978         if (IS_ERR(obj)) {
 979                 err = PTR_ERR(obj);
 980                 goto out_file;
 981         }
 982
 983         err = i915_gem_object_pin_pages_unlocked(obj);
 984         if (err)
 985                 goto out_put;
 986
 987         err = igt_gpu_write(ctx, obj);
 988         if (err)
 989                 pr_err("igt_gpu_write failed(%d)\n", err);
 990
 991         i915_gem_object_unpin_pages(obj);
 992 out_put:
 993         i915_gem_object_put(obj);
 994 out_file:
 995         fput(file);
 996         return err;
 997 }
 998
 999 static struct intel_engine_cs *
1000 random_engine_class(struct drm_i915_private *i915,
1001                     unsigned int class,
1002                     struct rnd_state *prng)
1003 {
1004         struct intel_engine_cs *engine;
1005         unsigned int count;
1006
1007         count = 0;
1008         for (engine = intel_engine_lookup_user(i915, class, 0);
1009              engine && engine->uabi_class == class;
1010              engine = rb_entry_safe(rb_next(&engine->uabi_node),
1011                                     typeof(*engine), uabi_node))
1012                 count++;
1013
1014         count = i915_prandom_u32_max_state(count, prng);
1015         return intel_engine_lookup_user(i915, class, count);
1016 }
1017
1018 static int igt_lmem_write_cpu(void *arg)
1019 {
1020         struct drm_i915_private *i915 = arg;
1021         struct drm_i915_gem_object *obj;
1022         I915_RND_STATE(prng);
1023         IGT_TIMEOUT(end_time);
1024         u32 bytes[] = {
1025                 0, /* rng placeholder */
1026                 sizeof(u32),
1027                 sizeof(u64),
1028                 64, /* cl */
1029                 PAGE_SIZE,
1030                 PAGE_SIZE - sizeof(u32),
1031                 PAGE_SIZE - sizeof(u64),
1032                 PAGE_SIZE - 64,
1033         };
1034         struct intel_engine_cs *engine;
1035         struct i915_request *rq;
1036         u32 *vaddr;
1037         u32 sz;
1038         u32 i;
1039         int *order;
1040         int count;
1041         int err;
1042
1043         engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng);
1044         if (!engine)
1045                 return 0;
1046
1047         pr_info("%s: using %s\n", __func__, engine->name);
1048
1049         sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE);
1050         sz = max_t(u32, 2 * PAGE_SIZE, sz);
1051
1052         obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS);
1053         if (IS_ERR(obj))
1054                 return PTR_ERR(obj);
1055
1056         vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1057         if (IS_ERR(vaddr)) {
1058                 err = PTR_ERR(vaddr);
1059                 goto out_put;
1060         }
1061
1062         i915_gem_object_lock(obj, NULL);
1063
1064         err = dma_resv_reserve_fences(obj->base.resv, 1);
1065         if (err) {
1066                 i915_gem_object_unlock(obj);
1067                 goto out_put;
1068         }
1069
1070         /* Put the pages into a known state -- from the gpu for added fun */
1071         intel_engine_pm_get(engine);
1072         err = intel_context_migrate_clear(engine->gt->migrate.context, NULL,
1073                                           obj->mm.pages->sgl,
1074                                           i915_gem_get_pat_index(i915,
1075                                                                  I915_CACHE_NONE),
1076                                           true, 0xdeadbeaf, &rq);
1077         if (rq) {
1078                 dma_resv_add_fence(obj->base.resv, &rq->fence,
1079                                    DMA_RESV_USAGE_WRITE);
1080                 i915_request_put(rq);
1081         }
1082
1083         intel_engine_pm_put(engine);
1084         if (!err)
1085                 err = i915_gem_object_set_to_wc_domain(obj, true);
1086         i915_gem_object_unlock(obj);
1087         if (err)
1088                 goto out_unpin;
1089
1090         count = ARRAY_SIZE(bytes);
1091         order = i915_random_order(count * count, &prng);
1092         if (!order) {
1093                 err = -ENOMEM;
1094                 goto out_unpin;
1095         }
1096
1097         /* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */
1098         bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32));
1099         GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32)));
1100
1101         i = 0;
1102         do {
1103                 u32 offset;
1104                 u32 align;
1105                 u32 dword;
1106                 u32 size;
1107                 u32 val;
1108
1109                 size = bytes[order[i] % count];
1110                 i = (i + 1) % (count * count);
1111
1112                 align = bytes[order[i] % count];
1113                 i = (i + 1) % (count * count);
1114
1115                 align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align));
1116
1117                 offset = igt_random_offset(&prng, 0, obj->base.size,
1118                                            size, align);
1119
1120                 val = prandom_u32_state(&prng);
1121                 memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf,
1122                          size / sizeof(u32));
1123
1124                 /*
1125                  * Sample random dw -- don't waste precious time reading every
1126                  * single dw.
1127                  */
1128                 dword = igt_random_offset(&prng, offset,
1129                                           offset + size,
1130                                           sizeof(u32), sizeof(u32));
1131                 dword /= sizeof(u32);
1132                 if (vaddr[dword] != (val ^ 0xdeadbeaf)) {
1133                         pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n",
1134                                __func__, dword, vaddr[dword], val ^ 0xdeadbeaf,
1135                                size, align, offset);
1136                         err = -EINVAL;
1137                         break;
1138                 }
1139         } while (!__igt_timeout(end_time, NULL));
1140
1141 out_unpin:
1142         i915_gem_object_unpin_map(obj);
1143 out_put:
1144         i915_gem_object_put(obj);
1145
1146         return err;
1147 }
1148
1149 static const char *repr_type(u32 type)
1150 {
1151         switch (type) {
1152         case I915_MAP_WB:
1153                 return "WB";
1154         case I915_MAP_WC:
1155                 return "WC";
1156         }
1157
1158         return "";
1159 }
1160
1161 static struct drm_i915_gem_object *
1162 create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type,
1163                           void **out_addr)
1164 {
1165         struct drm_i915_gem_object *obj;
1166         void *addr;
1167
1168         obj = i915_gem_object_create_region(mr, size, 0, 0);
1169         if (IS_ERR(obj)) {
1170                 if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */
1171                         return ERR_PTR(-ENODEV);
1172                 return obj;
1173         }
1174
1175         addr = i915_gem_object_pin_map_unlocked(obj, type);
1176         if (IS_ERR(addr)) {
1177                 i915_gem_object_put(obj);
1178                 if (PTR_ERR(addr) == -ENXIO)
1179                         return ERR_PTR(-ENODEV);
1180                 return addr;
1181         }
1182
1183         *out_addr = addr;
1184         return obj;
1185 }
1186
1187 static int wrap_ktime_compare(const void *A, const void *B)
1188 {
1189         const ktime_t *a = A, *b = B;
1190
1191         return ktime_compare(*a, *b);
1192 }
1193
1194 static void igt_memcpy_long(void *dst, const void *src, size_t size)
1195 {
1196         unsigned long *tmp = dst;
1197         const unsigned long *s = src;
1198
1199         size = size / sizeof(unsigned long);
1200         while (size--)
1201                 *tmp++ = *s++;
1202 }
1203
1204 static inline void igt_memcpy(void *dst, const void *src, size_t size)
1205 {
1206         memcpy(dst, src, size);
1207 }
1208
1209 static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size)
1210 {
1211         i915_memcpy_from_wc(dst, src, size);
1212 }
1213
1214 static int _perf_memcpy(struct intel_memory_region *src_mr,
1215                         struct intel_memory_region *dst_mr,
1216                         u64 size, u32 src_type, u32 dst_type)
1217 {
1218         struct drm_i915_private *i915 = src_mr->i915;
1219         const struct {
1220                 const char *name;
1221                 void (*copy)(void *dst, const void *src, size_t size);
1222                 bool skip;
1223         } tests[] = {
1224                 {
1225                         "memcpy",
1226                         igt_memcpy,
1227                 },
1228                 {
1229                         "memcpy_long",
1230                         igt_memcpy_long,
1231                 },
1232                 {
1233                         "memcpy_from_wc",
1234                         igt_memcpy_from_wc,
1235                         !i915_has_memcpy_from_wc(),
1236                 },
1237         };
1238         struct drm_i915_gem_object *src, *dst;
1239         void *src_addr, *dst_addr;
1240         int ret = 0;
1241         int i;
1242
1243         src = create_region_for_mapping(src_mr, size, src_type, &src_addr);
1244         if (IS_ERR(src)) {
1245                 ret = PTR_ERR(src);
1246                 goto out;
1247         }
1248
1249         dst = create_region_for_mapping(dst_mr, size, dst_type, &dst_addr);
1250         if (IS_ERR(dst)) {
1251                 ret = PTR_ERR(dst);
1252                 goto out_unpin_src;
1253         }
1254
1255         for (i = 0; i < ARRAY_SIZE(tests); ++i) {
1256                 ktime_t t[5];
1257                 int pass;
1258
1259                 if (tests[i].skip)
1260                         continue;
1261
1262                 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
1263                         ktime_t t0, t1;
1264
1265                         t0 = ktime_get();
1266
1267                         tests[i].copy(dst_addr, src_addr, size);
1268
1269                         t1 = ktime_get();
1270                         t[pass] = ktime_sub(t1, t0);
1271                 }
1272
1273                 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
1274                 if (t[0] <= 0) {
1275                         /* ignore the impossible to protect our sanity */
1276                         pr_debug("Skipping %s src(%s, %s) -> dst(%s, %s) %14s %4lluKiB copy, unstable measurement [%lld, %lld]\n",
1277                                  __func__,
1278                                  src_mr->name, repr_type(src_type),
1279                                  dst_mr->name, repr_type(dst_type),
1280                                  tests[i].name, size >> 10,
1281                                  t[0], t[4]);
1282                         continue;
1283                 }
1284
1285                 pr_info("%s src(%s, %s) -> dst(%s, %s) %14s %4llu KiB copy: %5lld MiB/s\n",
1286                         __func__,
1287                         src_mr->name, repr_type(src_type),
1288                         dst_mr->name, repr_type(dst_type),
1289                         tests[i].name, size >> 10,
1290                         div64_u64(mul_u32_u32(4 * size,
1291                                               1000 * 1000 * 1000),
1292                                   t[1] + 2 * t[2] + t[3]) >> 20);
1293
1294                 cond_resched();
1295         }
1296
1297         i915_gem_object_unpin_map(dst);
1298         i915_gem_object_put(dst);
1299 out_unpin_src:
1300         i915_gem_object_unpin_map(src);
1301         i915_gem_object_put(src);
1302
1303         i915_gem_drain_freed_objects(i915);
1304 out:
1305         if (ret == -ENODEV)
1306                 ret = 0;
1307
1308         return ret;
1309 }
1310
1311 static int perf_memcpy(void *arg)
1312 {
1313         struct drm_i915_private *i915 = arg;
1314         static const u32 types[] = {
1315                 I915_MAP_WB,
1316                 I915_MAP_WC,
1317         };
1318         static const u32 sizes[] = {
1319                 SZ_4K,
1320                 SZ_64K,
1321                 SZ_4M,
1322         };
1323         struct intel_memory_region *src_mr, *dst_mr;
1324         int src_id, dst_id;
1325         int i, j, k;
1326         int ret;
1327
1328         for_each_memory_region(src_mr, i915, src_id) {
1329                 for_each_memory_region(dst_mr, i915, dst_id) {
1330                         for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
1331                                 for (j = 0; j < ARRAY_SIZE(types); ++j) {
1332                                         for (k = 0; k < ARRAY_SIZE(types); ++k) {
1333                                                 ret = _perf_memcpy(src_mr,
1334                                                                    dst_mr,
1335                                                                    sizes[i],
1336                                                                    types[j],
1337                                                                    types[k]);
1338                                                 if (ret)
1339                                                         return ret;
1340                                         }
1341                                 }
1342                         }
1343                 }
1344         }
1345
1346         return 0;
1347 }
1348
1349 int intel_memory_region_mock_selftests(void)
1350 {
1351         static const struct i915_subtest tests[] = {
1352                 SUBTEST(igt_mock_reserve),
1353                 SUBTEST(igt_mock_fill),
1354                 SUBTEST(igt_mock_contiguous),
1355                 SUBTEST(igt_mock_splintered_region),
1356                 SUBTEST(igt_mock_max_segment),
1357                 SUBTEST(igt_mock_io_size),
1358         };
1359         struct intel_memory_region *mem;
1360         struct drm_i915_private *i915;
1361         int err;
1362
1363         i915 = mock_gem_device();
1364         if (!i915)
1365                 return -ENOMEM;
1366
1367         mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
1368         if (IS_ERR(mem)) {
1369                 pr_err("failed to create memory region\n");
1370                 err = PTR_ERR(mem);
1371                 goto out_unref;
1372         }
1373
1374         err = i915_subtests(tests, mem);
1375
1376         intel_memory_region_destroy(mem);
1377 out_unref:
1378         mock_destroy_device(i915);
1379         return err;
1380 }
1381
1382 int intel_memory_region_live_selftests(struct drm_i915_private *i915)
1383 {
1384         static const struct i915_subtest tests[] = {
1385                 SUBTEST(igt_lmem_create),
1386                 SUBTEST(igt_lmem_create_with_ps),
1387                 SUBTEST(igt_lmem_create_cleared_cpu),
1388                 SUBTEST(igt_lmem_write_cpu),
1389                 SUBTEST(igt_lmem_write_gpu),
1390         };
1391
1392         if (!HAS_LMEM(i915)) {
1393                 pr_info("device lacks LMEM support, skipping\n");
1394                 return 0;
1395         }
1396
1397         if (intel_gt_is_wedged(to_gt(i915)))
1398                 return 0;
1399
1400         return i915_live_subtests(tests, i915);
1401 }
1402
1403 int intel_memory_region_perf_selftests(struct drm_i915_private *i915)
1404 {
1405         static const struct i915_subtest tests[] = {
1406                 SUBTEST(perf_memcpy),
1407         };
1408
1409         if (intel_gt_is_wedged(to_gt(i915)))
1410                 return 0;
1411
1412         return i915_live_subtests(tests, i915);
1413 }