1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
4 #ifdef CONFIG_ARM_ARCH_TIMER
5 #include <asm/arch_timer.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/firmware.h>
11 #include <linux/iopoll.h>
12 #include <linux/iosys-map.h>
13 #include <linux/mutex.h>
14 #include <linux/platform_device.h>
16 #include <drm/drm_drv.h>
17 #include <drm/drm_managed.h>
19 #include "panthor_device.h"
20 #include "panthor_fw.h"
21 #include "panthor_gem.h"
22 #include "panthor_gpu.h"
23 #include "panthor_mmu.h"
24 #include "panthor_regs.h"
25 #include "panthor_sched.h"
27 #define CSF_FW_NAME "mali_csffw.bin"
29 #define PING_INTERVAL_MS 12000
30 #define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024)
31 #define PROGRESS_TIMEOUT_SCALE_SHIFT 10
32 #define IDLE_HYSTERESIS_US 800
33 #define PWROFF_HYSTERESIS_US 10000
36 * struct panthor_fw_binary_hdr - Firmware binary header.
38 struct panthor_fw_binary_hdr {
39 /** @magic: Magic value to check binary validity. */
41 #define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e
43 /** @minor: Minor FW version. */
46 /** @major: Major FW version. */
48 #define CSF_FW_BINARY_HEADER_MAJOR_MAX 0
50 /** @padding1: MBZ. */
53 /** @version_hash: FW version hash. */
56 /** @padding2: MBZ. */
59 /** @size: FW binary size. */
64 * enum panthor_fw_binary_entry_type - Firmware binary entry type
66 enum panthor_fw_binary_entry_type {
67 /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
68 CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
70 /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
71 CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
73 /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
74 CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
76 /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
77 CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
79 /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
80 CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
83 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how
84 * the FW binary was built.
86 CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6
89 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff)
90 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff)
91 #define CSF_FW_BINARY_ENTRY_UPDATE BIT(30)
92 #define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31)
94 #define CSF_FW_BINARY_IFACE_ENTRY_RD_RD BIT(0)
95 #define CSF_FW_BINARY_IFACE_ENTRY_RD_WR BIT(1)
96 #define CSF_FW_BINARY_IFACE_ENTRY_RD_EX BIT(2)
97 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE (0 << 3)
98 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED (1 << 3)
99 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT (2 << 3)
100 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT (3 << 3)
101 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK GENMASK(4, 3)
102 #define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT BIT(5)
103 #define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED BIT(30)
104 #define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO BIT(31)
106 #define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS \
107 (CSF_FW_BINARY_IFACE_ENTRY_RD_RD | \
108 CSF_FW_BINARY_IFACE_ENTRY_RD_WR | \
109 CSF_FW_BINARY_IFACE_ENTRY_RD_EX | \
110 CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK | \
111 CSF_FW_BINARY_IFACE_ENTRY_RD_PROT | \
112 CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED | \
113 CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)
116 * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
118 struct panthor_fw_binary_section_entry_hdr {
119 /** @flags: Section flags. */
122 /** @va: MCU virtual range to map this binary section to. */
124 /** @start: Start address. */
127 /** @end: End address. */
131 /** @data: Data to initialize the FW section with. */
133 /** @start: Start offset in the FW binary. */
136 /** @end: End offset in the FW binary. */
141 struct panthor_fw_build_info_hdr {
142 /** @meta_start: Offset of the build info data in the FW binary */
144 /** @meta_size: Size of the build info data in the FW binary */
149 * struct panthor_fw_binary_iter - Firmware binary iterator
151 * Used to parse a firmware binary.
153 struct panthor_fw_binary_iter {
154 /** @data: FW binary data. */
157 /** @size: FW binary size. */
160 /** @offset: Iterator offset. */
165 * struct panthor_fw_section - FW section
167 struct panthor_fw_section {
168 /** @node: Used to keep track of FW sections. */
169 struct list_head node;
171 /** @flags: Section flags, as encoded in the FW binary. */
174 /** @mem: Section memory. */
175 struct panthor_kernel_bo *mem;
178 * @name: Name of the section, as specified in the binary.
185 * @data: Initial data copied to the FW memory.
187 * We keep data around so we can reload sections after a reset.
190 /** @buf: Buffed used to store init data. */
193 /** @size: Size of @buf in bytes. */
198 #define CSF_MCU_SHARED_REGION_START 0x04000000ULL
199 #define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL
201 #define MIN_CS_PER_CSG 8
203 #define MAX_CSG_PRIO 0xf
205 #define CSF_IFACE_VERSION(major, minor, patch) \
206 (((major) << 24) | ((minor) << 16) | (patch))
207 #define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24)
208 #define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff)
209 #define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff)
211 #define CSF_GROUP_CONTROL_OFFSET 0x1000
212 #define CSF_STREAM_CONTROL_OFFSET 0x40
213 #define CSF_UNPRESERVED_REG_COUNT 4
216 * struct panthor_fw_iface - FW interfaces
218 struct panthor_fw_iface {
219 /** @global: Global interface. */
220 struct panthor_fw_global_iface global;
222 /** @groups: Group slot interfaces. */
223 struct panthor_fw_csg_iface groups[MAX_CSGS];
225 /** @streams: Command stream slot interfaces. */
226 struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
230 * struct panthor_fw - Firmware management
234 struct panthor_vm *vm;
236 /** @sections: List of FW sections. */
237 struct list_head sections;
239 /** @shared_section: The section containing the FW interfaces. */
240 struct panthor_fw_section *shared_section;
242 /** @iface: FW interfaces. */
243 struct panthor_fw_iface iface;
245 /** @watchdog: Collection of fields relating to the FW watchdog. */
247 /** @ping_work: Delayed work used to ping the FW. */
248 struct delayed_work ping_work;
252 * @req_waitqueue: FW request waitqueue.
254 * Everytime a request is sent to a command stream group or the global
255 * interface, the caller will first busy wait for the request to be
256 * acknowledged, and then fallback to a sleeping wait.
258 * This wait queue is here to support the sleeping wait flavor.
260 wait_queue_head_t req_waitqueue;
262 /** @booted: True is the FW is booted */
266 * @fast_reset: True if the post_reset logic can proceed with a fast reset.
268 * A fast reset is just a reset where the driver doesn't reload the FW sections.
270 * Any time the firmware is properly suspended, a fast reset can take place.
271 * On the other hand, if the halt operation failed, the driver will reload
272 * all sections to make sure we start from a fresh state.
276 /** @irq: Job irq data. */
277 struct panthor_irq irq;
280 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
282 return ptdev->fw->vm;
286 * panthor_fw_get_glb_iface() - Get the global interface
289 * Return: The global interface.
291 struct panthor_fw_global_iface *
292 panthor_fw_get_glb_iface(struct panthor_device *ptdev)
294 return &ptdev->fw->iface.global;
298 * panthor_fw_get_csg_iface() - Get a command stream group slot interface
300 * @csg_slot: Index of the command stream group slot.
302 * Return: The command stream group slot interface.
304 struct panthor_fw_csg_iface *
305 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
307 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
310 return &ptdev->fw->iface.groups[csg_slot];
314 * panthor_fw_get_cs_iface() - Get a command stream slot interface
316 * @csg_slot: Index of the command stream group slot.
317 * @cs_slot: Index of the command stream slot.
319 * Return: The command stream slot interface.
321 struct panthor_fw_cs_iface *
322 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
324 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
327 return &ptdev->fw->iface.streams[csg_slot][cs_slot];
331 * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
333 * @timeout_us: Timeout expressed in micro-seconds.
335 * The FW has two timer sources: the GPU counter or arch-timer. We need
336 * to express timeouts in term of number of cycles and specify which
337 * timer source should be used.
339 * Return: A value suitable for timeout fields in the global interface.
341 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
343 bool use_cycle_counter = false;
347 #ifdef CONFIG_ARM_ARCH_TIMER
348 timer_rate = arch_timer_get_cntfrq();
352 use_cycle_counter = true;
353 timer_rate = clk_get_rate(ptdev->clks.core);
356 if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
357 /* We couldn't get a valid clock rate, let's just pick the
358 * maximum value so the FW still handles the core
359 * power on/off requests.
361 return GLB_TIMER_VAL(~0) |
362 GLB_TIMER_SOURCE_GPU_COUNTER;
365 mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
367 if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
368 mod_cycles = GLB_TIMER_VAL(~0);
370 return GLB_TIMER_VAL(mod_cycles) |
371 (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
374 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
375 struct panthor_fw_binary_iter *iter,
376 void *out, size_t size)
378 size_t new_offset = iter->offset + size;
380 if (new_offset > iter->size || new_offset < iter->offset) {
381 drm_err(&ptdev->base, "Firmware too small\n");
385 memcpy(out, iter->data + iter->offset, size);
386 iter->offset = new_offset;
390 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
391 struct panthor_fw_binary_iter *iter,
392 struct panthor_fw_binary_iter *sub_iter,
395 size_t new_offset = iter->offset + size;
397 if (new_offset > iter->size || new_offset < iter->offset) {
398 drm_err(&ptdev->base, "Firmware entry too long\n");
402 sub_iter->offset = 0;
403 sub_iter->data = iter->data + iter->offset;
404 sub_iter->size = size;
405 iter->offset = new_offset;
409 static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
410 struct panthor_fw_section *section)
412 bool was_mapped = !!section->mem->kmap;
415 if (!section->data.size &&
416 !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO))
419 ret = panthor_kernel_bo_vmap(section->mem);
420 if (drm_WARN_ON(&ptdev->base, ret))
423 memcpy(section->mem->kmap, section->data.buf, section->data.size);
424 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) {
425 memset(section->mem->kmap + section->data.size, 0,
426 panthor_kernel_bo_size(section->mem) - section->data.size);
430 panthor_kernel_bo_vunmap(section->mem);
434 * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
436 * @input: Pointer holding the input interface on success.
437 * Should be ignored on failure.
438 * @output: Pointer holding the output interface on success.
439 * Should be ignored on failure.
440 * @input_fw_va: Pointer holding the input interface FW VA on success.
441 * Should be ignored on failure.
442 * @output_fw_va: Pointer holding the output interface FW VA on success.
443 * Should be ignored on failure.
445 * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
446 * interface is at offset 0, and the output interface at offset 4096.
448 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
450 struct panthor_kernel_bo *
451 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
452 struct panthor_fw_ringbuf_input_iface **input,
453 const struct panthor_fw_ringbuf_output_iface **output,
454 u32 *input_fw_va, u32 *output_fw_va)
456 struct panthor_kernel_bo *mem;
459 mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
460 DRM_PANTHOR_BO_NO_MMAP,
461 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
462 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
463 PANTHOR_VM_KERNEL_AUTO_VA);
467 ret = panthor_kernel_bo_vmap(mem);
469 panthor_kernel_bo_destroy(mem);
473 memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
475 *output = mem->kmap + SZ_4K;
476 *input_fw_va = panthor_kernel_bo_gpuva(mem);
477 *output_fw_va = *input_fw_va + SZ_4K;
483 * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
485 * @size: Size of the suspend buffer.
487 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
489 struct panthor_kernel_bo *
490 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
492 return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
493 DRM_PANTHOR_BO_NO_MMAP,
494 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
495 PANTHOR_VM_KERNEL_AUTO_VA);
498 static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
499 const struct firmware *fw,
500 struct panthor_fw_binary_iter *iter,
503 ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm);
504 struct panthor_fw_binary_section_entry_hdr hdr;
505 struct panthor_fw_section *section;
510 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
514 if (hdr.data.end < hdr.data.start) {
515 drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
516 hdr.data.end, hdr.data.start);
520 if (hdr.va.end < hdr.va.start) {
521 drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
522 hdr.va.end, hdr.va.start);
526 if (hdr.data.end > fw->size) {
527 drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
528 hdr.data.end, fw->size);
532 if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) {
533 drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
534 hdr.va.start, hdr.va.end);
538 if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
539 drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
544 if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
545 drm_warn(&ptdev->base,
546 "Firmware protected mode entry not be supported, ignoring");
550 if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
551 !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
552 drm_err(&ptdev->base,
553 "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
557 name_len = iter->size - iter->offset;
559 section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
563 list_add_tail(§ion->node, &ptdev->fw->sections);
564 section->flags = hdr.flags;
565 section->data.size = hdr.data.end - hdr.data.start;
567 if (section->data.size > 0) {
568 void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
573 memcpy(data, fw->data + hdr.data.start, section->data.size);
574 section->data.buf = data;
578 char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
583 memcpy(name, iter->data + iter->offset, name_len);
584 name[name_len] = '\0';
585 section->name = name;
588 section_size = hdr.va.end - hdr.va.start;
590 u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
591 struct panthor_gem_object *bo;
592 u32 vm_map_flags = 0;
593 struct sg_table *sgt;
594 u64 va = hdr.va.start;
596 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
597 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
599 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
600 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
602 /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to
603 * non-cacheable for now. We might want to introduce a new
604 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
605 * memory and is currently not used by our driver) for
606 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
607 * of IO-coherent systems.
609 if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
610 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
612 section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
614 DRM_PANTHOR_BO_NO_MMAP,
616 if (IS_ERR(section->mem))
617 return PTR_ERR(section->mem);
619 if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
622 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
623 ret = panthor_kernel_bo_vmap(section->mem);
628 panthor_fw_init_section_mem(ptdev, section);
630 bo = to_panthor_bo(section->mem->obj);
631 sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
635 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
638 if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
639 ptdev->fw->shared_section = section;
644 static int panthor_fw_read_build_info(struct panthor_device *ptdev,
645 const struct firmware *fw,
646 struct panthor_fw_binary_iter *iter,
649 struct panthor_fw_build_info_hdr hdr;
651 const char git_sha_header[sizeof(header)] = "git_sha: ";
654 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
658 if (hdr.meta_start > fw->size ||
659 hdr.meta_start + hdr.meta_size > fw->size) {
660 drm_err(&ptdev->base, "Firmware build info corrupt\n");
661 /* We don't need the build info, so continue */
665 if (memcmp(git_sha_header, fw->data + hdr.meta_start,
666 sizeof(git_sha_header))) {
667 /* Not the expected header, this isn't metadata we understand */
671 /* Check that the git SHA is NULL terminated as expected */
672 if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') {
673 drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n");
674 /* Don't treat as fatal */
678 drm_info(&ptdev->base, "Firmware git sha: %s\n",
679 fw->data + hdr.meta_start + sizeof(git_sha_header));
685 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
687 struct panthor_fw_section *section;
689 list_for_each_entry(section, &ptdev->fw->sections, node) {
690 struct sg_table *sgt;
692 if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
695 panthor_fw_init_section_mem(ptdev, section);
696 sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
697 if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
698 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
702 static int panthor_fw_load_entry(struct panthor_device *ptdev,
703 const struct firmware *fw,
704 struct panthor_fw_binary_iter *iter)
706 struct panthor_fw_binary_iter eiter;
710 ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
714 if ((iter->offset % sizeof(u32)) ||
715 (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
716 drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
717 (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
721 if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
722 CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
725 switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
726 case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
727 return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
728 case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA:
729 return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr);
731 /* FIXME: handle those entry types? */
732 case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
733 case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
734 case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
735 case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
741 if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
744 drm_err(&ptdev->base,
745 "Unsupported non-optional entry type %u in firmware\n",
746 CSF_FW_BINARY_ENTRY_TYPE(ehdr));
750 static int panthor_fw_load(struct panthor_device *ptdev)
752 const struct firmware *fw = NULL;
753 struct panthor_fw_binary_iter iter = {};
754 struct panthor_fw_binary_hdr hdr;
758 snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
759 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
760 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
763 ret = request_firmware(&fw, fw_path, ptdev->base.dev);
765 drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
770 iter.data = fw->data;
771 iter.size = fw->size;
772 ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
776 if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
778 drm_err(&ptdev->base, "Invalid firmware magic\n");
782 if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
784 drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
785 hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
789 if (hdr.size > iter.size) {
790 drm_err(&ptdev->base, "Firmware image is truncated\n");
794 iter.size = hdr.size;
796 while (iter.offset < hdr.size) {
797 ret = panthor_fw_load_entry(ptdev, fw, &iter);
802 if (!ptdev->fw->shared_section) {
803 drm_err(&ptdev->base, "Shared interface region not found\n");
809 release_firmware(fw);
814 * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
816 * @mcu_va: MCU address.
818 * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
820 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
822 u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
823 u64 shared_mem_end = shared_mem_start +
824 panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
825 if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
828 return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
831 static int panthor_init_cs_iface(struct panthor_device *ptdev,
832 unsigned int csg_idx, unsigned int cs_idx)
834 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
835 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
836 struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
837 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
838 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
839 (csg_idx * glb_iface->control->group_stride) +
840 CSF_STREAM_CONTROL_OFFSET +
841 (cs_idx * csg_iface->control->stream_stride);
842 struct panthor_fw_cs_iface *first_cs_iface =
843 panthor_fw_get_cs_iface(ptdev, 0, 0);
845 if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
848 spin_lock_init(&cs_iface->lock);
849 cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
850 cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
851 cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
853 if (!cs_iface->input || !cs_iface->output) {
854 drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
858 if (cs_iface != first_cs_iface) {
859 if (cs_iface->control->features != first_cs_iface->control->features) {
860 drm_err(&ptdev->base, "Expecting identical CS slots");
864 u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
866 ptdev->csif_info.cs_reg_count = reg_count;
867 ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
873 static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
874 const struct panthor_fw_csg_control_iface *b)
876 if (a->features != b->features)
878 if (a->suspend_size != b->suspend_size)
880 if (a->protm_suspend_size != b->protm_suspend_size)
882 if (a->stream_num != b->stream_num)
887 static int panthor_init_csg_iface(struct panthor_device *ptdev,
888 unsigned int csg_idx)
890 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
891 struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
892 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
893 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
896 if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
899 spin_lock_init(&csg_iface->lock);
900 csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
901 csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
902 csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
904 if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
905 csg_iface->control->stream_num > MAX_CS_PER_CSG)
908 if (!csg_iface->input || !csg_iface->output) {
909 drm_err(&ptdev->base, "Invalid group control interface input/output VA");
914 struct panthor_fw_csg_iface *first_csg_iface =
915 panthor_fw_get_csg_iface(ptdev, 0);
917 if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
918 drm_err(&ptdev->base, "Expecting identical CSG slots");
923 for (i = 0; i < csg_iface->control->stream_num; i++) {
924 int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
933 static u32 panthor_get_instr_features(struct panthor_device *ptdev)
935 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
937 if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
940 return glb_iface->control->instr_features;
943 static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
945 struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
948 if (!ptdev->fw->shared_section->mem->kmap)
951 spin_lock_init(&glb_iface->lock);
952 glb_iface->control = ptdev->fw->shared_section->mem->kmap;
954 if (!glb_iface->control->version) {
955 drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
959 glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
960 glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
961 if (!glb_iface->input || !glb_iface->output) {
962 drm_err(&ptdev->base, "Invalid global control interface input/output VA");
966 if (glb_iface->control->group_num > MAX_CSGS ||
967 glb_iface->control->group_num < MIN_CSGS) {
968 drm_err(&ptdev->base, "Invalid number of control groups");
972 for (i = 0; i < glb_iface->control->group_num; i++) {
973 int ret = panthor_init_csg_iface(ptdev, i);
979 drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x",
980 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
981 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
982 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
983 glb_iface->control->features,
984 panthor_get_instr_features(ptdev));
988 static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
990 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
992 /* Enable all cores. */
993 glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
996 glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
997 glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
998 glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
1000 /* Enable interrupts we care about. */
1001 glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
1003 GLB_CFG_PROGRESS_TIMER |
1004 GLB_CFG_POWEROFF_TIMER |
1008 panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
1009 panthor_fw_toggle_reqs(glb_iface, req, ack,
1011 GLB_CFG_POWEROFF_TIMER |
1012 GLB_CFG_PROGRESS_TIMER);
1014 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1016 /* Kick the watchdog. */
1017 mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
1018 msecs_to_jiffies(PING_INTERVAL_MS));
1021 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
1023 if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
1024 ptdev->fw->booted = true;
1026 wake_up_all(&ptdev->fw->req_waitqueue);
1028 /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
1029 if (!ptdev->fw->booted)
1032 panthor_sched_report_fw_events(ptdev, status);
1034 PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
1036 static int panthor_fw_start(struct panthor_device *ptdev)
1038 bool timedout = false;
1040 ptdev->fw->booted = false;
1041 panthor_job_irq_resume(&ptdev->fw->irq, ~0);
1042 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
1044 if (!wait_event_timeout(ptdev->fw->req_waitqueue,
1046 msecs_to_jiffies(1000))) {
1047 if (!ptdev->fw->booted &&
1048 !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
1053 static const char * const status_str[] = {
1054 [MCU_STATUS_DISABLED] = "disabled",
1055 [MCU_STATUS_ENABLED] = "enabled",
1056 [MCU_STATUS_HALT] = "halt",
1057 [MCU_STATUS_FATAL] = "fatal",
1059 u32 status = gpu_read(ptdev, MCU_STATUS);
1061 drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1062 status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1069 static void panthor_fw_stop(struct panthor_device *ptdev)
1073 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
1074 if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1075 status == MCU_STATUS_DISABLED, 10, 100000))
1076 drm_err(&ptdev->base, "Failed to stop MCU");
1080 * panthor_fw_pre_reset() - Call before a reset.
1082 * @on_hang: true if the reset was triggered on a GPU hang.
1084 * If the reset is not triggered on a hang, we try to gracefully halt the
1085 * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1087 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1089 /* Make sure we won't be woken up by a ping. */
1090 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1092 ptdev->fw->fast_reset = false;
1095 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1098 panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1099 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1100 if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1101 status == MCU_STATUS_HALT, 10, 100000) &&
1102 glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
1103 ptdev->fw->fast_reset = true;
1105 drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1108 /* The FW detects 0 -> 1 transitions. Make sure we reset
1109 * the HALT bit before the FW is rebooted.
1111 panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1114 panthor_job_irq_suspend(&ptdev->fw->irq);
1118 * panthor_fw_post_reset() - Call after a reset.
1121 * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1122 * make sure we can recover from a memory corruption.
1124 int panthor_fw_post_reset(struct panthor_device *ptdev)
1128 /* Make the MCU VM active. */
1129 ret = panthor_vm_active(ptdev->fw->vm);
1133 /* If this is a fast reset, try to start the MCU without reloading
1134 * the FW sections. If it fails, go for a full reset.
1136 if (ptdev->fw->fast_reset) {
1137 ret = panthor_fw_start(ptdev);
1141 /* Forcibly reset the MCU and force a slow reset, so we get a
1142 * fresh boot on the next panthor_fw_start() call.
1144 panthor_fw_stop(ptdev);
1145 ptdev->fw->fast_reset = false;
1146 drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
1148 ret = panthor_vm_flush_all(ptdev->fw->vm);
1150 drm_err(&ptdev->base, "FW slow reset failed (couldn't flush FW's AS l2cache)");
1155 /* Reload all sections, including RO ones. We're not supposed
1156 * to end up here anyway, let's just assume the overhead of
1157 * reloading everything is acceptable.
1159 panthor_reload_fw_sections(ptdev, true);
1161 ret = panthor_fw_start(ptdev);
1163 drm_err(&ptdev->base, "FW slow reset failed (couldn't start the FW )");
1168 /* We must re-initialize the global interface even on fast-reset. */
1169 panthor_fw_init_global_iface(ptdev);
1174 * panthor_fw_unplug() - Called when the device is unplugged.
1177 * This function must make sure all pending operations are flushed before
1178 * will release device resources, thus preventing any interaction with
1181 * If there is still FW-related work running after this function returns,
1182 * they must use drm_dev_{enter,exit}() and skip any HW access when
1183 * drm_dev_enter() returns false.
1185 void panthor_fw_unplug(struct panthor_device *ptdev)
1187 struct panthor_fw_section *section;
1189 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1191 /* Make sure the IRQ handler can be called after that point. */
1192 if (ptdev->fw->irq.irq)
1193 panthor_job_irq_suspend(&ptdev->fw->irq);
1195 panthor_fw_stop(ptdev);
1197 list_for_each_entry(section, &ptdev->fw->sections, node)
1198 panthor_kernel_bo_destroy(section->mem);
1200 /* We intentionally don't call panthor_vm_idle() and let
1201 * panthor_mmu_unplug() release the AS we acquired with
1202 * panthor_vm_active() so we don't have to track the VM active/idle
1203 * state to keep the active_refcnt balanced.
1205 panthor_vm_put(ptdev->fw->vm);
1206 ptdev->fw->vm = NULL;
1208 panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
1212 * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1213 * @req_ptr: Pointer to the req register.
1214 * @ack_ptr: Pointer to the ack register.
1215 * @wq: Wait queue to use for the sleeping wait.
1216 * @req_mask: Mask of requests to wait for.
1217 * @acked: Pointer to field that's updated with the acked requests.
1218 * If the function returns 0, *acked == req_mask.
1219 * @timeout_ms: Timeout expressed in milliseconds.
1221 * Return: 0 on success, -ETIMEDOUT otherwise.
1223 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1224 wait_queue_head_t *wq,
1225 u32 req_mask, u32 *acked,
1228 u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1231 /* Busy wait for a few µsecs before falling back to a sleeping wait. */
1233 ret = read_poll_timeout_atomic(READ_ONCE, ack,
1234 (ack & req_mask) == req,
1240 if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1241 msecs_to_jiffies(timeout_ms)))
1244 /* Check one last time, in case we were not woken up for some reason. */
1245 ack = READ_ONCE(*ack_ptr);
1246 if ((ack & req_mask) == req)
1249 *acked = ~(req ^ ack) & req_mask;
1254 * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1256 * @req_mask: Mask of requests to wait for.
1257 * @acked: Pointer to field that's updated with the acked requests.
1258 * If the function returns 0, *acked == req_mask.
1259 * @timeout_ms: Timeout expressed in milliseconds.
1261 * Return: 0 on success, -ETIMEDOUT otherwise.
1263 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1264 u32 req_mask, u32 *acked,
1267 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1269 /* GLB_HALT doesn't get acked through the FW interface. */
1270 if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1273 return panthor_fw_wait_acks(&glb_iface->input->req,
1274 &glb_iface->output->ack,
1275 &ptdev->fw->req_waitqueue,
1276 req_mask, acked, timeout_ms);
1280 * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1282 * @csg_slot: CSG slot ID.
1283 * @req_mask: Mask of requests to wait for.
1284 * @acked: Pointer to field that's updated with the acked requests.
1285 * If the function returns 0, *acked == req_mask.
1286 * @timeout_ms: Timeout expressed in milliseconds.
1288 * Return: 0 on success, -ETIMEDOUT otherwise.
1290 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1291 u32 req_mask, u32 *acked, u32 timeout_ms)
1293 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1296 if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1299 ret = panthor_fw_wait_acks(&csg_iface->input->req,
1300 &csg_iface->output->ack,
1301 &ptdev->fw->req_waitqueue,
1302 req_mask, acked, timeout_ms);
1305 * Check that all bits in the state field were updated, if any mismatch
1306 * then clear all bits in the state field. This allows code to do
1307 * (acked & CSG_STATE_MASK) and get the right value.
1310 if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1311 *acked &= ~CSG_STATE_MASK;
1317 * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1319 * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1321 * This function is toggling bits in the doorbell_req and ringing the
1322 * global doorbell. It doesn't require a user doorbell to be attached to
1325 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1327 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1329 panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1330 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1333 static void panthor_fw_ping_work(struct work_struct *work)
1335 struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1336 struct panthor_device *ptdev = fw->irq.ptdev;
1337 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1341 if (panthor_device_reset_is_pending(ptdev))
1344 panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1345 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1347 ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1349 panthor_device_schedule_reset(ptdev);
1350 drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1352 mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1353 msecs_to_jiffies(PING_INTERVAL_MS));
1358 * panthor_fw_init() - Initialize FW related data.
1361 * Return: 0 on success, a negative error code otherwise.
1363 int panthor_fw_init(struct panthor_device *ptdev)
1365 struct panthor_fw *fw;
1368 fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1373 init_waitqueue_head(&fw->req_waitqueue);
1374 INIT_LIST_HEAD(&fw->sections);
1375 INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1377 irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1381 ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
1383 drm_err(&ptdev->base, "failed to request job irq");
1387 ret = panthor_gpu_l2_power_on(ptdev);
1391 fw->vm = panthor_vm_create(ptdev, true,
1393 CSF_MCU_SHARED_REGION_START,
1394 CSF_MCU_SHARED_REGION_SIZE);
1395 if (IS_ERR(fw->vm)) {
1396 ret = PTR_ERR(fw->vm);
1401 ret = panthor_fw_load(ptdev);
1405 ret = panthor_vm_active(fw->vm);
1409 ret = panthor_fw_start(ptdev);
1413 ret = panthor_fw_init_ifaces(ptdev);
1417 panthor_fw_init_global_iface(ptdev);
1421 panthor_fw_unplug(ptdev);
1425 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");