drivers/gpu/drm/panthor/panthor_heap.c

   1 // SPDX-License-Identifier: GPL-2.0 or MIT
   2 /* Copyright 2023 Collabora ltd. */
   3
   4 #include <linux/iosys-map.h>
   5 #include <linux/rwsem.h>
   6
   7 #include <drm/panthor_drm.h>
   8
   9 #include "panthor_device.h"
  10 #include "panthor_gem.h"
  11 #include "panthor_heap.h"
  12 #include "panthor_mmu.h"
  13 #include "panthor_regs.h"
  14
  15 /*
  16  * The GPU heap context is an opaque structure used by the GPU to track the
  17  * heap allocations. The driver should only touch it to initialize it (zero all
  18  * fields). Because the CPU and GPU can both access this structure it is
  19  * required to be GPU cache line aligned.
  20  */
  21 #define HEAP_CONTEXT_SIZE       32
  22
  23 /**
  24  * struct panthor_heap_chunk_header - Heap chunk header
  25  */
  26 struct panthor_heap_chunk_header {
  27         /**
  28          * @next: Next heap chunk in the list.
  29          *
  30          * This is a GPU VA.
  31          */
  32         u64 next;
  33
  34         /** @unknown: MBZ. */
  35         u32 unknown[14];
  36 };
  37
  38 /**
  39  * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
  40  */
  41 struct panthor_heap_chunk {
  42         /** @node: Used to insert the heap chunk in panthor_heap::chunks. */
  43         struct list_head node;
  44
  45         /** @bo: Buffer object backing the heap chunk. */
  46         struct panthor_kernel_bo *bo;
  47 };
  48
  49 /**
  50  * struct panthor_heap - Structure used to manage tiler heap contexts.
  51  */
  52 struct panthor_heap {
  53         /** @chunks: List containing all heap chunks allocated so far. */
  54         struct list_head chunks;
  55
  56         /** @lock: Lock protecting insertion in the chunks list. */
  57         struct mutex lock;
  58
  59         /** @chunk_size: Size of each chunk. */
  60         u32 chunk_size;
  61
  62         /** @max_chunks: Maximum number of chunks. */
  63         u32 max_chunks;
  64
  65         /**
  66          * @target_in_flight: Number of in-flight render passes after which
  67          * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
  68          */
  69         u32 target_in_flight;
  70
  71         /** @chunk_count: Number of heap chunks currently allocated. */
  72         u32 chunk_count;
  73 };
  74
  75 #define MAX_HEAPS_PER_POOL    128
  76
  77 /**
  78  * struct panthor_heap_pool - Pool of heap contexts
  79  *
  80  * The pool is attached to a panthor_file and can't be shared across processes.
  81  */
  82 struct panthor_heap_pool {
  83         /** @refcount: Reference count. */
  84         struct kref refcount;
  85
  86         /** @ptdev: Device. */
  87         struct panthor_device *ptdev;
  88
  89         /** @vm: VM this pool is bound to. */
  90         struct panthor_vm *vm;
  91
  92         /** @lock: Lock protecting access to @xa. */
  93         struct rw_semaphore lock;
  94
  95         /** @xa: Array storing panthor_heap objects. */
  96         struct xarray xa;
  97
  98         /** @gpu_contexts: Buffer object containing the GPU heap contexts. */
  99         struct panthor_kernel_bo *gpu_contexts;
 100 };
 101
 102 static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
 103 {
 104         u32 l2_features = ptdev->gpu_info.l2_features;
 105         u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
 106
 107         return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
 108 }
 109
 110 static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
 111 {
 112         return panthor_heap_ctx_stride(pool->ptdev) * id;
 113 }
 114
 115 static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
 116 {
 117         return pool->gpu_contexts->kmap +
 118                panthor_get_heap_ctx_offset(pool, id);
 119 }
 120
 121 static void panthor_free_heap_chunk(struct panthor_vm *vm,
 122                                     struct panthor_heap *heap,
 123                                     struct panthor_heap_chunk *chunk)
 124 {
 125         mutex_lock(&heap->lock);
 126         list_del(&chunk->node);
 127         heap->chunk_count--;
 128         mutex_unlock(&heap->lock);
 129
 130         panthor_kernel_bo_destroy(chunk->bo);
 131         kfree(chunk);
 132 }
 133
 134 static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
 135                                     struct panthor_vm *vm,
 136                                     struct panthor_heap *heap,
 137                                     bool initial_chunk)
 138 {
 139         struct panthor_heap_chunk *chunk;
 140         struct panthor_heap_chunk_header *hdr;
 141         int ret;
 142
 143         chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
 144         if (!chunk)
 145                 return -ENOMEM;
 146
 147         chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
 148                                              DRM_PANTHOR_BO_NO_MMAP,
 149                                              DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
 150                                              PANTHOR_VM_KERNEL_AUTO_VA);
 151         if (IS_ERR(chunk->bo)) {
 152                 ret = PTR_ERR(chunk->bo);
 153                 goto err_free_chunk;
 154         }
 155
 156         ret = panthor_kernel_bo_vmap(chunk->bo);
 157         if (ret)
 158                 goto err_destroy_bo;
 159
 160         hdr = chunk->bo->kmap;
 161         memset(hdr, 0, sizeof(*hdr));
 162
 163         if (initial_chunk && !list_empty(&heap->chunks)) {
 164                 struct panthor_heap_chunk *prev_chunk;
 165                 u64 prev_gpuva;
 166
 167                 prev_chunk = list_first_entry(&heap->chunks,
 168                                               struct panthor_heap_chunk,
 169                                               node);
 170
 171                 prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
 172                 hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
 173                             (heap->chunk_size >> 12);
 174         }
 175
 176         panthor_kernel_bo_vunmap(chunk->bo);
 177
 178         mutex_lock(&heap->lock);
 179         list_add(&chunk->node, &heap->chunks);
 180         heap->chunk_count++;
 181         mutex_unlock(&heap->lock);
 182
 183         return 0;
 184
 185 err_destroy_bo:
 186         panthor_kernel_bo_destroy(chunk->bo);
 187
 188 err_free_chunk:
 189         kfree(chunk);
 190
 191         return ret;
 192 }
 193
 194 static void panthor_free_heap_chunks(struct panthor_vm *vm,
 195                                      struct panthor_heap *heap)
 196 {
 197         struct panthor_heap_chunk *chunk, *tmp;
 198
 199         list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
 200                 panthor_free_heap_chunk(vm, heap, chunk);
 201 }
 202
 203 static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
 204                                      struct panthor_vm *vm,
 205                                      struct panthor_heap *heap,
 206                                      u32 chunk_count)
 207 {
 208         int ret;
 209         u32 i;
 210
 211         for (i = 0; i < chunk_count; i++) {
 212                 ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
 213                 if (ret)
 214                         return ret;
 215         }
 216
 217         return 0;
 218 }
 219
 220 static int
 221 panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
 222 {
 223         struct panthor_heap *heap;
 224
 225         heap = xa_erase(&pool->xa, handle);
 226         if (!heap)
 227                 return -EINVAL;
 228
 229         panthor_free_heap_chunks(pool->vm, heap);
 230         mutex_destroy(&heap->lock);
 231         kfree(heap);
 232         return 0;
 233 }
 234
 235 /**
 236  * panthor_heap_destroy() - Destroy a heap context
 237  * @pool: Pool this context belongs to.
 238  * @handle: Handle returned by panthor_heap_create().
 239  */
 240 int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
 241 {
 242         int ret;
 243
 244         down_write(&pool->lock);
 245         ret = panthor_heap_destroy_locked(pool, handle);
 246         up_write(&pool->lock);
 247
 248         return ret;
 249 }
 250
 251 /**
 252  * panthor_heap_create() - Create a heap context
 253  * @pool: Pool to instantiate the heap context from.
 254  * @initial_chunk_count: Number of chunk allocated at initialization time.
 255  * Must be at least 1.
 256  * @chunk_size: The size of each chunk. Must be page-aligned and lie in the
 257  * [128k:8M] range.
 258  * @max_chunks: Maximum number of chunks that can be allocated.
 259  * @target_in_flight: Maximum number of in-flight render passes.
 260  * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
 261  * context.
 262  * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
 263  * assigned to the heap context.
 264  *
 265  * Return: a positive handle on success, a negative error otherwise.
 266  */
 267 int panthor_heap_create(struct panthor_heap_pool *pool,
 268                         u32 initial_chunk_count,
 269                         u32 chunk_size,
 270                         u32 max_chunks,
 271                         u32 target_in_flight,
 272                         u64 *heap_ctx_gpu_va,
 273                         u64 *first_chunk_gpu_va)
 274 {
 275         struct panthor_heap *heap;
 276         struct panthor_heap_chunk *first_chunk;
 277         struct panthor_vm *vm;
 278         int ret = 0;
 279         u32 id;
 280
 281         if (initial_chunk_count == 0)
 282                 return -EINVAL;
 283
 284         if (initial_chunk_count > max_chunks)
 285                 return -EINVAL;
 286
 287         if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
 288             chunk_size < SZ_128K || chunk_size > SZ_8M)
 289                 return -EINVAL;
 290
 291         down_read(&pool->lock);
 292         vm = panthor_vm_get(pool->vm);
 293         up_read(&pool->lock);
 294
 295         /* The pool has been destroyed, we can't create a new heap. */
 296         if (!vm)
 297                 return -EINVAL;
 298
 299         heap = kzalloc(sizeof(*heap), GFP_KERNEL);
 300         if (!heap) {
 301                 ret = -ENOMEM;
 302                 goto err_put_vm;
 303         }
 304
 305         mutex_init(&heap->lock);
 306         INIT_LIST_HEAD(&heap->chunks);
 307         heap->chunk_size = chunk_size;
 308         heap->max_chunks = max_chunks;
 309         heap->target_in_flight = target_in_flight;
 310
 311         ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
 312                                         initial_chunk_count);
 313         if (ret)
 314                 goto err_free_heap;
 315
 316         first_chunk = list_first_entry(&heap->chunks,
 317                                        struct panthor_heap_chunk,
 318                                        node);
 319         *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
 320
 321         down_write(&pool->lock);
 322         /* The pool has been destroyed, we can't create a new heap. */
 323         if (!pool->vm) {
 324                 ret = -EINVAL;
 325         } else {
 326                 ret = xa_alloc(&pool->xa, &id, heap,
 327                                XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
 328                 if (!ret) {
 329                         void *gpu_ctx = panthor_get_heap_ctx(pool, id);
 330
 331                         memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
 332                         *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
 333                                            panthor_get_heap_ctx_offset(pool, id);
 334                 }
 335         }
 336         up_write(&pool->lock);
 337
 338         if (ret)
 339                 goto err_free_heap;
 340
 341         panthor_vm_put(vm);
 342         return id;
 343
 344 err_free_heap:
 345         panthor_free_heap_chunks(pool->vm, heap);
 346         mutex_destroy(&heap->lock);
 347         kfree(heap);
 348
 349 err_put_vm:
 350         panthor_vm_put(vm);
 351         return ret;
 352 }
 353
 354 /**
 355  * panthor_heap_return_chunk() - Return an unused heap chunk
 356  * @pool: The pool this heap belongs to.
 357  * @heap_gpu_va: The GPU address of the heap context.
 358  * @chunk_gpu_va: The chunk VA to return.
 359  *
 360  * This function is used when a chunk allocated with panthor_heap_grow()
 361  * couldn't be linked to the heap context through the FW interface because
 362  * the group requesting the allocation was scheduled out in the meantime.
 363  */
 364 int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
 365                               u64 heap_gpu_va,
 366                               u64 chunk_gpu_va)
 367 {
 368         u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
 369         u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
 370         struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
 371         struct panthor_heap *heap;
 372         int ret;
 373
 374         if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
 375                 return -EINVAL;
 376
 377         down_read(&pool->lock);
 378         heap = xa_load(&pool->xa, heap_id);
 379         if (!heap) {
 380                 ret = -EINVAL;
 381                 goto out_unlock;
 382         }
 383
 384         chunk_gpu_va &= GENMASK_ULL(63, 12);
 385
 386         mutex_lock(&heap->lock);
 387         list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
 388                 if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
 389                         removed = chunk;
 390                         list_del(&chunk->node);
 391                         heap->chunk_count--;
 392                         break;
 393                 }
 394         }
 395         mutex_unlock(&heap->lock);
 396
 397         if (removed) {
 398                 panthor_kernel_bo_destroy(chunk->bo);
 399                 kfree(chunk);
 400                 ret = 0;
 401         } else {
 402                 ret = -EINVAL;
 403         }
 404
 405 out_unlock:
 406         up_read(&pool->lock);
 407         return ret;
 408 }
 409
 410 /**
 411  * panthor_heap_grow() - Make a heap context grow.
 412  * @pool: The pool this heap belongs to.
 413  * @heap_gpu_va: The GPU address of the heap context.
 414  * @renderpasses_in_flight: Number of render passes currently in-flight.
 415  * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
 416  * @new_chunk_gpu_va: Pointer used to return the chunk VA.
 417  *
 418  * Return:
 419  * - 0 if a new heap was allocated
 420  * - -ENOMEM if the tiler context reached the maximum number of chunks
 421  *   or if too many render passes are in-flight
 422  *   or if the allocation failed
 423  * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
 424  */
 425 int panthor_heap_grow(struct panthor_heap_pool *pool,
 426                       u64 heap_gpu_va,
 427                       u32 renderpasses_in_flight,
 428                       u32 pending_frag_count,
 429                       u64 *new_chunk_gpu_va)
 430 {
 431         u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
 432         u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
 433         struct panthor_heap_chunk *chunk;
 434         struct panthor_heap *heap;
 435         int ret;
 436
 437         if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
 438                 return -EINVAL;
 439
 440         down_read(&pool->lock);
 441         heap = xa_load(&pool->xa, heap_id);
 442         if (!heap) {
 443                 ret = -EINVAL;
 444                 goto out_unlock;
 445         }
 446
 447         /* If we reached the target in-flight render passes, or if we
 448          * reached the maximum number of chunks, let the FW figure another way to
 449          * find some memory (wait for render passes to finish, or call the exception
 450          * handler provided by the userspace driver, if any).
 451          */
 452         if (renderpasses_in_flight > heap->target_in_flight ||
 453             heap->chunk_count >= heap->max_chunks) {
 454                 ret = -ENOMEM;
 455                 goto out_unlock;
 456         }
 457
 458         /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
 459          * which goes through the blocking allocation path. Ultimately, we
 460          * want a non-blocking allocation, so we can immediately report to the
 461          * FW when the system is running out of memory. In that case, the FW
 462          * can call a user-provided exception handler, which might try to free
 463          * some tiler memory by issuing an intermediate fragment job. If the
 464          * exception handler can't do anything, it will flag the queue as
 465          * faulty so the job that triggered this tiler chunk allocation and all
 466          * further jobs in this queue fail immediately instead of having to
 467          * wait for the job timeout.
 468          */
 469         ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
 470         if (ret)
 471                 goto out_unlock;
 472
 473         chunk = list_first_entry(&heap->chunks,
 474                                  struct panthor_heap_chunk,
 475                                  node);
 476         *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
 477                             (heap->chunk_size >> 12);
 478         ret = 0;
 479
 480 out_unlock:
 481         up_read(&pool->lock);
 482         return ret;
 483 }
 484
 485 static void panthor_heap_pool_release(struct kref *refcount)
 486 {
 487         struct panthor_heap_pool *pool =
 488                 container_of(refcount, struct panthor_heap_pool, refcount);
 489
 490         xa_destroy(&pool->xa);
 491         kfree(pool);
 492 }
 493
 494 /**
 495  * panthor_heap_pool_put() - Release a heap pool reference
 496  * @pool: Pool to release the reference on. Can be NULL.
 497  */
 498 void panthor_heap_pool_put(struct panthor_heap_pool *pool)
 499 {
 500         if (pool)
 501                 kref_put(&pool->refcount, panthor_heap_pool_release);
 502 }
 503
 504 /**
 505  * panthor_heap_pool_get() - Get a heap pool reference
 506  * @pool: Pool to get the reference on. Can be NULL.
 507  *
 508  * Return: @pool.
 509  */
 510 struct panthor_heap_pool *
 511 panthor_heap_pool_get(struct panthor_heap_pool *pool)
 512 {
 513         if (pool)
 514                 kref_get(&pool->refcount);
 515
 516         return pool;
 517 }
 518
 519 /**
 520  * panthor_heap_pool_create() - Create a heap pool
 521  * @ptdev: Device.
 522  * @vm: The VM this heap pool will be attached to.
 523  *
 524  * Heap pools might contain up to 128 heap contexts, and are per-VM.
 525  *
 526  * Return: A valid pointer on success, a negative error code otherwise.
 527  */
 528 struct panthor_heap_pool *
 529 panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
 530 {
 531         size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
 532                               panthor_heap_ctx_stride(ptdev),
 533                               4096);
 534         struct panthor_heap_pool *pool;
 535         int ret = 0;
 536
 537         pool = kzalloc(sizeof(*pool), GFP_KERNEL);
 538         if (!pool)
 539                 return ERR_PTR(-ENOMEM);
 540
 541         /* We want a weak ref here: the heap pool belongs to the VM, so we're
 542          * sure that, as long as the heap pool exists, the VM exists too.
 543          */
 544         pool->vm = vm;
 545         pool->ptdev = ptdev;
 546         init_rwsem(&pool->lock);
 547         xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
 548         kref_init(&pool->refcount);
 549
 550         pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
 551                                                       DRM_PANTHOR_BO_NO_MMAP,
 552                                                       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
 553                                                       PANTHOR_VM_KERNEL_AUTO_VA);
 554         if (IS_ERR(pool->gpu_contexts)) {
 555                 ret = PTR_ERR(pool->gpu_contexts);
 556                 goto err_destroy_pool;
 557         }
 558
 559         ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
 560         if (ret)
 561                 goto err_destroy_pool;
 562
 563         return pool;
 564
 565 err_destroy_pool:
 566         panthor_heap_pool_destroy(pool);
 567         return ERR_PTR(ret);
 568 }
 569
 570 /**
 571  * panthor_heap_pool_destroy() - Destroy a heap pool.
 572  * @pool: Pool to destroy.
 573  *
 574  * This function destroys all heap contexts and their resources. Thus
 575  * preventing any use of the heap context or the chunk attached to them
 576  * after that point.
 577  *
 578  * If the GPU still has access to some heap contexts, a fault should be
 579  * triggered, which should flag the command stream groups using these
 580  * context as faulty.
 581  *
 582  * The heap pool object is only released when all references to this pool
 583  * are released.
 584  */
 585 void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
 586 {
 587         struct panthor_heap *heap;
 588         unsigned long i;
 589
 590         if (!pool)
 591                 return;
 592
 593         down_write(&pool->lock);
 594         xa_for_each(&pool->xa, i, heap)
 595                 drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
 596
 597         if (!IS_ERR_OR_NULL(pool->gpu_contexts))
 598                 panthor_kernel_bo_destroy(pool->gpu_contexts);
 599
 600         /* Reflects the fact the pool has been destroyed. */
 601         pool->vm = NULL;
 602         up_write(&pool->lock);
 603
 604         panthor_heap_pool_put(pool);
 605 }