1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
4 #include <linux/iosys-map.h>
5 #include <linux/rwsem.h>
7 #include <drm/panthor_drm.h>
9 #include "panthor_device.h"
10 #include "panthor_gem.h"
11 #include "panthor_heap.h"
12 #include "panthor_mmu.h"
13 #include "panthor_regs.h"
16 * The GPU heap context is an opaque structure used by the GPU to track the
17 * heap allocations. The driver should only touch it to initialize it (zero all
18 * fields). Because the CPU and GPU can both access this structure it is
19 * required to be GPU cache line aligned.
21 #define HEAP_CONTEXT_SIZE 32
24 * struct panthor_heap_chunk_header - Heap chunk header
26 struct panthor_heap_chunk_header {
28 * @next: Next heap chunk in the list.
39 * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
41 struct panthor_heap_chunk {
42 /** @node: Used to insert the heap chunk in panthor_heap::chunks. */
43 struct list_head node;
45 /** @bo: Buffer object backing the heap chunk. */
46 struct panthor_kernel_bo *bo;
50 * struct panthor_heap - Structure used to manage tiler heap contexts.
53 /** @chunks: List containing all heap chunks allocated so far. */
54 struct list_head chunks;
56 /** @lock: Lock protecting insertion in the chunks list. */
59 /** @chunk_size: Size of each chunk. */
62 /** @max_chunks: Maximum number of chunks. */
66 * @target_in_flight: Number of in-flight render passes after which
67 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
71 /** @chunk_count: Number of heap chunks currently allocated. */
75 #define MAX_HEAPS_PER_POOL 128
78 * struct panthor_heap_pool - Pool of heap contexts
80 * The pool is attached to a panthor_file and can't be shared across processes.
82 struct panthor_heap_pool {
83 /** @refcount: Reference count. */
86 /** @ptdev: Device. */
87 struct panthor_device *ptdev;
89 /** @vm: VM this pool is bound to. */
90 struct panthor_vm *vm;
92 /** @lock: Lock protecting access to @xa. */
93 struct rw_semaphore lock;
95 /** @xa: Array storing panthor_heap objects. */
98 /** @gpu_contexts: Buffer object containing the GPU heap contexts. */
99 struct panthor_kernel_bo *gpu_contexts;
102 static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
104 u32 l2_features = ptdev->gpu_info.l2_features;
105 u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
107 return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
110 static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
112 return panthor_heap_ctx_stride(pool->ptdev) * id;
115 static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
117 return pool->gpu_contexts->kmap +
118 panthor_get_heap_ctx_offset(pool, id);
121 static void panthor_free_heap_chunk(struct panthor_vm *vm,
122 struct panthor_heap *heap,
123 struct panthor_heap_chunk *chunk)
125 mutex_lock(&heap->lock);
126 list_del(&chunk->node);
128 mutex_unlock(&heap->lock);
130 panthor_kernel_bo_destroy(chunk->bo);
134 static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
135 struct panthor_vm *vm,
136 struct panthor_heap *heap,
139 struct panthor_heap_chunk *chunk;
140 struct panthor_heap_chunk_header *hdr;
143 chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
147 chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
148 DRM_PANTHOR_BO_NO_MMAP,
149 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
150 PANTHOR_VM_KERNEL_AUTO_VA);
151 if (IS_ERR(chunk->bo)) {
152 ret = PTR_ERR(chunk->bo);
156 ret = panthor_kernel_bo_vmap(chunk->bo);
160 hdr = chunk->bo->kmap;
161 memset(hdr, 0, sizeof(*hdr));
163 if (initial_chunk && !list_empty(&heap->chunks)) {
164 struct panthor_heap_chunk *prev_chunk;
167 prev_chunk = list_first_entry(&heap->chunks,
168 struct panthor_heap_chunk,
171 prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
172 hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
173 (heap->chunk_size >> 12);
176 panthor_kernel_bo_vunmap(chunk->bo);
178 mutex_lock(&heap->lock);
179 list_add(&chunk->node, &heap->chunks);
181 mutex_unlock(&heap->lock);
186 panthor_kernel_bo_destroy(chunk->bo);
194 static void panthor_free_heap_chunks(struct panthor_vm *vm,
195 struct panthor_heap *heap)
197 struct panthor_heap_chunk *chunk, *tmp;
199 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
200 panthor_free_heap_chunk(vm, heap, chunk);
203 static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
204 struct panthor_vm *vm,
205 struct panthor_heap *heap,
211 for (i = 0; i < chunk_count; i++) {
212 ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
221 panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
223 struct panthor_heap *heap;
225 heap = xa_erase(&pool->xa, handle);
229 panthor_free_heap_chunks(pool->vm, heap);
230 mutex_destroy(&heap->lock);
236 * panthor_heap_destroy() - Destroy a heap context
237 * @pool: Pool this context belongs to.
238 * @handle: Handle returned by panthor_heap_create().
240 int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
244 down_write(&pool->lock);
245 ret = panthor_heap_destroy_locked(pool, handle);
246 up_write(&pool->lock);
252 * panthor_heap_create() - Create a heap context
253 * @pool: Pool to instantiate the heap context from.
254 * @initial_chunk_count: Number of chunk allocated at initialization time.
255 * Must be at least 1.
256 * @chunk_size: The size of each chunk. Must be page-aligned and lie in the
258 * @max_chunks: Maximum number of chunks that can be allocated.
259 * @target_in_flight: Maximum number of in-flight render passes.
260 * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
262 * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
263 * assigned to the heap context.
265 * Return: a positive handle on success, a negative error otherwise.
267 int panthor_heap_create(struct panthor_heap_pool *pool,
268 u32 initial_chunk_count,
271 u32 target_in_flight,
272 u64 *heap_ctx_gpu_va,
273 u64 *first_chunk_gpu_va)
275 struct panthor_heap *heap;
276 struct panthor_heap_chunk *first_chunk;
277 struct panthor_vm *vm;
281 if (initial_chunk_count == 0)
284 if (initial_chunk_count > max_chunks)
287 if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
288 chunk_size < SZ_128K || chunk_size > SZ_8M)
291 down_read(&pool->lock);
292 vm = panthor_vm_get(pool->vm);
293 up_read(&pool->lock);
295 /* The pool has been destroyed, we can't create a new heap. */
299 heap = kzalloc(sizeof(*heap), GFP_KERNEL);
305 mutex_init(&heap->lock);
306 INIT_LIST_HEAD(&heap->chunks);
307 heap->chunk_size = chunk_size;
308 heap->max_chunks = max_chunks;
309 heap->target_in_flight = target_in_flight;
311 ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
312 initial_chunk_count);
316 first_chunk = list_first_entry(&heap->chunks,
317 struct panthor_heap_chunk,
319 *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
321 down_write(&pool->lock);
322 /* The pool has been destroyed, we can't create a new heap. */
326 ret = xa_alloc(&pool->xa, &id, heap,
327 XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
329 void *gpu_ctx = panthor_get_heap_ctx(pool, id);
331 memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
332 *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
333 panthor_get_heap_ctx_offset(pool, id);
336 up_write(&pool->lock);
345 panthor_free_heap_chunks(pool->vm, heap);
346 mutex_destroy(&heap->lock);
355 * panthor_heap_return_chunk() - Return an unused heap chunk
356 * @pool: The pool this heap belongs to.
357 * @heap_gpu_va: The GPU address of the heap context.
358 * @chunk_gpu_va: The chunk VA to return.
360 * This function is used when a chunk allocated with panthor_heap_grow()
361 * couldn't be linked to the heap context through the FW interface because
362 * the group requesting the allocation was scheduled out in the meantime.
364 int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
368 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
369 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
370 struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
371 struct panthor_heap *heap;
374 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
377 down_read(&pool->lock);
378 heap = xa_load(&pool->xa, heap_id);
384 chunk_gpu_va &= GENMASK_ULL(63, 12);
386 mutex_lock(&heap->lock);
387 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
388 if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
390 list_del(&chunk->node);
395 mutex_unlock(&heap->lock);
398 panthor_kernel_bo_destroy(chunk->bo);
406 up_read(&pool->lock);
411 * panthor_heap_grow() - Make a heap context grow.
412 * @pool: The pool this heap belongs to.
413 * @heap_gpu_va: The GPU address of the heap context.
414 * @renderpasses_in_flight: Number of render passes currently in-flight.
415 * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
416 * @new_chunk_gpu_va: Pointer used to return the chunk VA.
419 * - 0 if a new heap was allocated
420 * - -ENOMEM if the tiler context reached the maximum number of chunks
421 * or if too many render passes are in-flight
422 * or if the allocation failed
423 * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
425 int panthor_heap_grow(struct panthor_heap_pool *pool,
427 u32 renderpasses_in_flight,
428 u32 pending_frag_count,
429 u64 *new_chunk_gpu_va)
431 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
432 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
433 struct panthor_heap_chunk *chunk;
434 struct panthor_heap *heap;
437 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
440 down_read(&pool->lock);
441 heap = xa_load(&pool->xa, heap_id);
447 /* If we reached the target in-flight render passes, or if we
448 * reached the maximum number of chunks, let the FW figure another way to
449 * find some memory (wait for render passes to finish, or call the exception
450 * handler provided by the userspace driver, if any).
452 if (renderpasses_in_flight > heap->target_in_flight ||
453 heap->chunk_count >= heap->max_chunks) {
458 /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
459 * which goes through the blocking allocation path. Ultimately, we
460 * want a non-blocking allocation, so we can immediately report to the
461 * FW when the system is running out of memory. In that case, the FW
462 * can call a user-provided exception handler, which might try to free
463 * some tiler memory by issuing an intermediate fragment job. If the
464 * exception handler can't do anything, it will flag the queue as
465 * faulty so the job that triggered this tiler chunk allocation and all
466 * further jobs in this queue fail immediately instead of having to
467 * wait for the job timeout.
469 ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
473 chunk = list_first_entry(&heap->chunks,
474 struct panthor_heap_chunk,
476 *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
477 (heap->chunk_size >> 12);
481 up_read(&pool->lock);
485 static void panthor_heap_pool_release(struct kref *refcount)
487 struct panthor_heap_pool *pool =
488 container_of(refcount, struct panthor_heap_pool, refcount);
490 xa_destroy(&pool->xa);
495 * panthor_heap_pool_put() - Release a heap pool reference
496 * @pool: Pool to release the reference on. Can be NULL.
498 void panthor_heap_pool_put(struct panthor_heap_pool *pool)
501 kref_put(&pool->refcount, panthor_heap_pool_release);
505 * panthor_heap_pool_get() - Get a heap pool reference
506 * @pool: Pool to get the reference on. Can be NULL.
510 struct panthor_heap_pool *
511 panthor_heap_pool_get(struct panthor_heap_pool *pool)
514 kref_get(&pool->refcount);
520 * panthor_heap_pool_create() - Create a heap pool
522 * @vm: The VM this heap pool will be attached to.
524 * Heap pools might contain up to 128 heap contexts, and are per-VM.
526 * Return: A valid pointer on success, a negative error code otherwise.
528 struct panthor_heap_pool *
529 panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
531 size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
532 panthor_heap_ctx_stride(ptdev),
534 struct panthor_heap_pool *pool;
537 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
539 return ERR_PTR(-ENOMEM);
541 /* We want a weak ref here: the heap pool belongs to the VM, so we're
542 * sure that, as long as the heap pool exists, the VM exists too.
546 init_rwsem(&pool->lock);
547 xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
548 kref_init(&pool->refcount);
550 pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
551 DRM_PANTHOR_BO_NO_MMAP,
552 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
553 PANTHOR_VM_KERNEL_AUTO_VA);
554 if (IS_ERR(pool->gpu_contexts)) {
555 ret = PTR_ERR(pool->gpu_contexts);
556 goto err_destroy_pool;
559 ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
561 goto err_destroy_pool;
566 panthor_heap_pool_destroy(pool);
571 * panthor_heap_pool_destroy() - Destroy a heap pool.
572 * @pool: Pool to destroy.
574 * This function destroys all heap contexts and their resources. Thus
575 * preventing any use of the heap context or the chunk attached to them
578 * If the GPU still has access to some heap contexts, a fault should be
579 * triggered, which should flag the command stream groups using these
582 * The heap pool object is only released when all references to this pool
585 void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
587 struct panthor_heap *heap;
593 down_write(&pool->lock);
594 xa_for_each(&pool->xa, i, heap)
595 drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
597 if (!IS_ERR_OR_NULL(pool->gpu_contexts))
598 panthor_kernel_bo_destroy(pool->gpu_contexts);
600 /* Reflects the fact the pool has been destroyed. */
602 up_write(&pool->lock);
604 panthor_heap_pool_put(pool);