drivers/gpu/drm/i915/gt/selftest_tlb.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2022 Intel Corporation
   4  */
   5
   6 #include "i915_selftest.h"
   7
   8 #include "gem/i915_gem_internal.h"
   9 #include "gem/i915_gem_lmem.h"
  10 #include "gem/i915_gem_region.h"
  11
  12 #include "gen8_engine_cs.h"
  13 #include "i915_gem_ww.h"
  14 #include "intel_engine_regs.h"
  15 #include "intel_gpu_commands.h"
  16 #include "intel_context.h"
  17 #include "intel_gt.h"
  18 #include "intel_ring.h"
  19
  20 #include "selftests/igt_flush_test.h"
  21 #include "selftests/i915_random.h"
  22
  23 static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val)
  24 {
  25         GEM_BUG_ON(addr < i915_vma_offset(vma));
  26         GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val));
  27         memset64(page_mask_bits(vma->obj->mm.mapping) +
  28                  (addr - i915_vma_offset(vma)), val, 1);
  29 }
  30
  31 static int
  32 pte_tlbinv(struct intel_context *ce,
  33            struct i915_vma *va,
  34            struct i915_vma *vb,
  35            u64 align,
  36            void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length),
  37            u64 length,
  38            struct rnd_state *prng)
  39 {
  40         const unsigned int pat_index =
  41                 i915_gem_get_pat_index(ce->vm->i915, I915_CACHE_NONE);
  42         struct drm_i915_gem_object *batch;
  43         struct drm_mm_node vb_node;
  44         struct i915_request *rq;
  45         struct i915_vma *vma;
  46         u64 addr;
  47         int err;
  48         u32 *cs;
  49
  50         batch = i915_gem_object_create_internal(ce->vm->i915, 4096);
  51         if (IS_ERR(batch))
  52                 return PTR_ERR(batch);
  53
  54         vma = i915_vma_instance(batch, ce->vm, NULL);
  55         if (IS_ERR(vma)) {
  56                 err = PTR_ERR(vma);
  57                 goto out;
  58         }
  59
  60         err = i915_vma_pin(vma, 0, 0, PIN_USER);
  61         if (err)
  62                 goto out;
  63
  64         /* Pin va at random but aligned offset after vma */
  65         addr = round_up(vma->node.start + vma->node.size, align);
  66         /* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */
  67         addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)),
  68                                  va->size, align);
  69         err = i915_vma_pin(va,  0, 0, addr | PIN_OFFSET_FIXED | PIN_USER);
  70         if (err) {
  71                 pr_err("Cannot pin at %llx+%llx\n", addr, va->size);
  72                 goto out;
  73         }
  74         GEM_BUG_ON(i915_vma_offset(va) != addr);
  75         if (vb != va) {
  76                 vb_node = vb->node;
  77                 vb->node = va->node; /* overwrites the _same_ PTE  */
  78         }
  79
  80         /*
  81          * Now choose random dword at the 1st pinned page.
  82          *
  83          * SZ_64K pages on dg1 require that the whole PT be marked
  84          * containing 64KiB entries. So we make sure that vma
  85          * covers the whole PT, despite being randomly aligned to 64KiB
  86          * and restrict our sampling to the 2MiB PT within where
  87          * we know that we will be using 64KiB pages.
  88          */
  89         if (align == SZ_64K)
  90                 addr = round_up(addr, SZ_2M);
  91         addr = igt_random_offset(prng, addr, addr + align, 8, 8);
  92
  93         if (va != vb)
  94                 pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n",
  95                         ce->engine->name, va->obj->mm.region->name ?: "smem",
  96                         addr, align, va->resource->page_sizes_gtt,
  97                         va->page_sizes.phys, va->page_sizes.sg,
  98                         addr & -length, length);
  99
 100         cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC);
 101         *cs++ = MI_NOOP; /* for later termination */
 102         /*
 103          * Sample the target to see if we spot the updated backing store.
 104          * Gen8 VCS compares immediate value with bitwise-and of two
 105          * consecutive DWORDS pointed by addr, other gen/engines compare value
 106          * with DWORD pointed by addr. Moreover we want to exercise DWORD size
 107          * invalidations. To fulfill all these requirements below values
 108          * have been chosen.
 109          */
 110         *cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2;
 111         *cs++ = 0; /* break if *addr == 0 */
 112         *cs++ = lower_32_bits(addr);
 113         *cs++ = upper_32_bits(addr);
 114         vma_set_qw(va, addr, -1);
 115         vma_set_qw(vb, addr, 0);
 116
 117         /* Keep sampling until we get bored */
 118         *cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1;
 119         *cs++ = lower_32_bits(i915_vma_offset(vma));
 120         *cs++ = upper_32_bits(i915_vma_offset(vma));
 121
 122         i915_gem_object_flush_map(batch);
 123
 124         rq = i915_request_create(ce);
 125         if (IS_ERR(rq)) {
 126                 err = PTR_ERR(rq);
 127                 goto out_va;
 128         }
 129
 130         err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0);
 131         if (err) {
 132                 i915_request_add(rq);
 133                 goto out_va;
 134         }
 135
 136         i915_request_get(rq);
 137         i915_request_add(rq);
 138
 139         /*
 140          * Short sleep to sanitycheck the batch is spinning before we begin.
 141          * FIXME: Why is GSC so slow?
 142          */
 143         if (ce->engine->class == OTHER_CLASS)
 144                 msleep(200);
 145         else
 146                 msleep(10);
 147
 148         if (va == vb) {
 149                 if (!i915_request_completed(rq)) {
 150                         pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n",
 151                                ce->engine->name, va->obj->mm.region->name ?: "smem",
 152                                addr, align, va->resource->page_sizes_gtt,
 153                                va->page_sizes.phys, va->page_sizes.sg);
 154                         err = -EIO;
 155                 }
 156         } else if (!i915_request_completed(rq)) {
 157                 struct i915_vma_resource vb_res = {
 158                         .bi.pages = vb->obj->mm.pages,
 159                         .bi.page_sizes = vb->obj->mm.page_sizes,
 160                         .start = i915_vma_offset(vb),
 161                         .vma_size = i915_vma_size(vb)
 162                 };
 163                 unsigned int pte_flags = 0;
 164
 165                 /* Flip the PTE between A and B */
 166                 if (i915_gem_object_is_lmem(vb->obj))
 167                         pte_flags |= PTE_LM;
 168                 ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags);
 169
 170                 /* Flush the PTE update to concurrent HW */
 171                 tlbinv(ce->vm, addr & -length, length);
 172
 173                 if (wait_for(i915_request_completed(rq), HZ / 2)) {
 174                         pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n",
 175                                ce->engine->name);
 176                         err = -EINVAL;
 177                 }
 178         } else {
 179                 pr_err("Spinner ended unexpectedly\n");
 180                 err = -EIO;
 181         }
 182         i915_request_put(rq);
 183
 184         cs = page_mask_bits(batch->mm.mapping);
 185         *cs = MI_BATCH_BUFFER_END;
 186         wmb();
 187
 188 out_va:
 189         if (vb != va)
 190                 vb->node = vb_node;
 191         i915_vma_unpin(va);
 192         if (i915_vma_unbind_unlocked(va))
 193                 err = -EIO;
 194 out:
 195         i915_gem_object_put(batch);
 196         return err;
 197 }
 198
 199 static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
 200 {
 201         struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
 202         resource_size_t size = SZ_1G;
 203
 204         /*
 205          * Allocation of largest possible page size allows to test all types
 206          * of pages. To succeed with both allocations, especially in case of Small
 207          * BAR, try to allocate no more than quarter of mappable memory.
 208          */
 209         if (mr && size > resource_size(&mr->io) / 4)
 210                 size = resource_size(&mr->io) / 4;
 211
 212         return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
 213 }
 214
 215 static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)
 216 {
 217         /*
 218          * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1).
 219          * While that does not require the whole 2M block to be contiguous
 220          * it is easier to make it so, since we need that for SZ_2M pagees.
 221          * Since we randomly offset the start of the vma, we need a 4M object
 222          * so that there is a 2M range within it is suitable for SZ_64K PTE.
 223          */
 224         return i915_gem_object_create_internal(gt->i915, SZ_4M);
 225 }
 226
 227 static int
 228 mem_tlbinv(struct intel_gt *gt,
 229            struct drm_i915_gem_object *(*create_fn)(struct intel_gt *),
 230            void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length))
 231 {
 232         unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size;
 233         struct intel_engine_cs *engine;
 234         struct drm_i915_gem_object *A, *B;
 235         struct i915_ppgtt *ppgtt;
 236         struct i915_vma *va, *vb;
 237         enum intel_engine_id id;
 238         I915_RND_STATE(prng);
 239         void *vaddr;
 240         int err;
 241
 242         /*
 243          * Check that the TLB invalidate is able to revoke an active
 244          * page. We load a page into a spinning COND_BBE loop and then
 245          * remap that page to a new physical address. The old address, and
 246          * so the loop keeps spinning, is retained in the TLB cache until
 247          * we issue an invalidate.
 248          */
 249
 250         A = create_fn(gt);
 251         if (IS_ERR(A))
 252                 return PTR_ERR(A);
 253
 254         vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC);
 255         if (IS_ERR(vaddr)) {
 256                 err = PTR_ERR(vaddr);
 257                 goto out_a;
 258         }
 259
 260         B = create_fn(gt);
 261         if (IS_ERR(B)) {
 262                 err = PTR_ERR(B);
 263                 goto out_a;
 264         }
 265
 266         vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC);
 267         if (IS_ERR(vaddr)) {
 268                 err = PTR_ERR(vaddr);
 269                 goto out_b;
 270         }
 271
 272         GEM_BUG_ON(A->base.size != B->base.size);
 273         if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1))
 274                 pr_warn("Failed to allocate contiguous pages for size %zx\n",
 275                         A->base.size);
 276
 277         ppgtt = i915_ppgtt_create(gt, 0);
 278         if (IS_ERR(ppgtt)) {
 279                 err = PTR_ERR(ppgtt);
 280                 goto out_b;
 281         }
 282
 283         va = i915_vma_instance(A, &ppgtt->vm, NULL);
 284         if (IS_ERR(va)) {
 285                 err = PTR_ERR(va);
 286                 goto out_vm;
 287         }
 288
 289         vb = i915_vma_instance(B, &ppgtt->vm, NULL);
 290         if (IS_ERR(vb)) {
 291                 err = PTR_ERR(vb);
 292                 goto out_vm;
 293         }
 294
 295         err = 0;
 296         for_each_engine(engine, gt, id) {
 297                 struct i915_gem_ww_ctx ww;
 298                 struct intel_context *ce;
 299                 int bit;
 300
 301                 ce = intel_context_create(engine);
 302                 if (IS_ERR(ce)) {
 303                         err = PTR_ERR(ce);
 304                         break;
 305                 }
 306
 307                 i915_vm_put(ce->vm);
 308                 ce->vm = i915_vm_get(&ppgtt->vm);
 309
 310                 for_i915_gem_ww(&ww, err, true)
 311                         err = intel_context_pin_ww(ce, &ww);
 312                 if (err)
 313                         goto err_put;
 314
 315                 for_each_set_bit(bit,
 316                                  (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes,
 317                                  BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) {
 318                         unsigned int len;
 319
 320                         if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, va->obj))
 321                                 continue;
 322
 323                         /* sanitycheck the semaphore wake up */
 324                         err = pte_tlbinv(ce, va, va,
 325                                          BIT_ULL(bit),
 326                                          NULL, SZ_4K,
 327                                          &prng);
 328                         if (err)
 329                                 goto err_unpin;
 330
 331                         for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) {
 332                                 err = pte_tlbinv(ce, va, vb,
 333                                                  BIT_ULL(bit),
 334                                                  tlbinv,
 335                                                  BIT_ULL(len),
 336                                                  &prng);
 337                                 if (err)
 338                                         goto err_unpin;
 339                                 if (len == ppgtt_size)
 340                                         break;
 341                         }
 342                 }
 343 err_unpin:
 344                 intel_context_unpin(ce);
 345 err_put:
 346                 intel_context_put(ce);
 347                 if (err)
 348                         break;
 349         }
 350
 351         if (igt_flush_test(gt->i915))
 352                 err = -EIO;
 353
 354 out_vm:
 355         i915_vm_put(&ppgtt->vm);
 356 out_b:
 357         i915_gem_object_put(B);
 358 out_a:
 359         i915_gem_object_put(A);
 360         return err;
 361 }
 362
 363 static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length)
 364 {
 365         intel_gt_invalidate_tlb_full(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1);
 366 }
 367
 368 static int invalidate_full(void *arg)
 369 {
 370         struct intel_gt *gt = arg;
 371         int err;
 372
 373         if (GRAPHICS_VER(gt->i915) < 8)
 374                 return 0; /* TLB invalidate not implemented */
 375
 376         err = mem_tlbinv(gt, create_smem, tlbinv_full);
 377         if (err == 0)
 378                 err = mem_tlbinv(gt, create_lmem, tlbinv_full);
 379         if (err == -ENODEV || err == -ENXIO)
 380                 err = 0;
 381
 382         return err;
 383 }
 384
 385 int intel_tlb_live_selftests(struct drm_i915_private *i915)
 386 {
 387         static const struct i915_subtest tests[] = {
 388                 SUBTEST(invalidate_full),
 389         };
 390         struct intel_gt *gt;
 391         unsigned int i;
 392
 393         for_each_gt(gt, i915, i) {
 394                 int err;
 395
 396                 if (intel_gt_is_wedged(gt))
 397                         continue;
 398
 399                 err = intel_gt_live_subtests(tests, gt);
 400                 if (err)
 401                         return err;
 402         }
 403
 404         return 0;
 405 }