2 * Copyright 2022 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
24 #include <drm/drm_drv.h>
27 #include "amdgpu_ucode.h"
28 #include "amdgpu_vpe.h"
29 #include "soc15_common.h"
32 #define AMDGPU_CSA_VPE_SIZE 64
33 /* VPE CSA resides in the 4th page of CSA */
34 #define AMDGPU_CSA_VPE_OFFSET (4096 * 3)
36 static void vpe_set_ring_funcs(struct amdgpu_device *adev);
38 int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev)
40 struct amdgpu_firmware_info ucode = {
41 .ucode_id = AMDGPU_UCODE_ID_VPE,
42 .mc_addr = adev->vpe.cmdbuf_gpu_addr,
46 return psp_execute_ip_fw_load(&adev->psp, &ucode);
49 int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe)
51 struct amdgpu_device *adev = vpe->ring.adev;
52 const struct vpe_firmware_header_v1_0 *vpe_hdr;
53 char fw_prefix[32], fw_name[64];
56 amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix));
57 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", fw_prefix);
59 ret = amdgpu_ucode_request(adev, &adev->vpe.fw, fw_name);
63 vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data;
64 adev->vpe.fw_version = le32_to_cpu(vpe_hdr->header.ucode_version);
65 adev->vpe.feature_version = le32_to_cpu(vpe_hdr->ucode_feature_version);
67 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
68 struct amdgpu_firmware_info *info;
70 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTX];
71 info->ucode_id = AMDGPU_UCODE_ID_VPE_CTX;
72 info->fw = adev->vpe.fw;
73 adev->firmware.fw_size +=
74 ALIGN(le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
76 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTL];
77 info->ucode_id = AMDGPU_UCODE_ID_VPE_CTL;
78 info->fw = adev->vpe.fw;
79 adev->firmware.fw_size +=
80 ALIGN(le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
85 dev_err(adev->dev, "fail to initialize vpe microcode\n");
86 release_firmware(adev->vpe.fw);
91 int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe)
93 struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
94 struct amdgpu_ring *ring = &vpe->ring;
97 ring->ring_obj = NULL;
98 ring->use_doorbell = true;
99 ring->vm_hub = AMDGPU_MMHUB0(0);
100 ring->doorbell_index = (adev->doorbell_index.vpe_ring << 1);
101 snprintf(ring->name, 4, "vpe");
103 ret = amdgpu_ring_init(adev, ring, 1024, &vpe->trap_irq, 0,
104 AMDGPU_RING_PRIO_DEFAULT, NULL);
111 int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe)
113 amdgpu_ring_fini(&vpe->ring);
118 static int vpe_early_init(void *handle)
120 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
121 struct amdgpu_vpe *vpe = &adev->vpe;
123 switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
124 case IP_VERSION(6, 1, 0):
125 vpe_v6_1_set_funcs(vpe);
131 vpe_set_ring_funcs(adev);
138 static int vpe_common_init(struct amdgpu_vpe *vpe)
140 struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
143 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
144 AMDGPU_GEM_DOMAIN_GTT,
145 &adev->vpe.cmdbuf_obj,
146 &adev->vpe.cmdbuf_gpu_addr,
147 (void **)&adev->vpe.cmdbuf_cpu_addr);
149 dev_err(adev->dev, "VPE: failed to allocate cmdbuf bo %d\n", r);
156 static int vpe_sw_init(void *handle)
158 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
159 struct amdgpu_vpe *vpe = &adev->vpe;
162 ret = vpe_common_init(vpe);
166 ret = vpe_irq_init(vpe);
170 ret = vpe_ring_init(vpe);
174 ret = vpe_init_microcode(vpe);
181 static int vpe_sw_fini(void *handle)
183 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
184 struct amdgpu_vpe *vpe = &adev->vpe;
186 release_firmware(vpe->fw);
191 amdgpu_bo_free_kernel(&adev->vpe.cmdbuf_obj,
192 &adev->vpe.cmdbuf_gpu_addr,
193 (void **)&adev->vpe.cmdbuf_cpu_addr);
198 static int vpe_hw_init(void *handle)
200 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
201 struct amdgpu_vpe *vpe = &adev->vpe;
204 ret = vpe_load_microcode(vpe);
208 ret = vpe_ring_start(vpe);
215 static int vpe_hw_fini(void *handle)
217 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
218 struct amdgpu_vpe *vpe = &adev->vpe;
225 static int vpe_suspend(void *handle)
227 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
229 return vpe_hw_fini(adev);
232 static int vpe_resume(void *handle)
234 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
236 return vpe_hw_init(adev);
239 static void vpe_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
243 for (i = 0; i < count; i++)
245 amdgpu_ring_write(ring, ring->funcs->nop |
246 VPE_CMD_NOP_HEADER_COUNT(count - 1));
248 amdgpu_ring_write(ring, ring->funcs->nop);
251 static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid)
253 struct amdgpu_device *adev = ring->adev;
255 uint64_t csa_mc_addr;
257 if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
260 csa_mc_addr = amdgpu_csa_vaddr(adev) + AMDGPU_CSA_VPE_OFFSET +
261 index * AMDGPU_CSA_VPE_SIZE;
266 static void vpe_ring_emit_ib(struct amdgpu_ring *ring,
267 struct amdgpu_job *job,
268 struct amdgpu_ib *ib,
271 uint32_t vmid = AMDGPU_JOB_GET_VMID(job);
272 uint64_t csa_mc_addr = vpe_get_csa_mc_addr(ring, vmid);
274 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0) |
275 VPE_CMD_INDIRECT_HEADER_VMID(vmid & 0xf));
277 /* base must be 32 byte aligned */
278 amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0);
279 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
280 amdgpu_ring_write(ring, ib->length_dw);
281 amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
282 amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
285 static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr,
286 uint64_t seq, unsigned int flags)
291 /* write the fence */
292 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
293 /* zero in first two bits */
294 WARN_ON_ONCE(addr & 0x3);
295 amdgpu_ring_write(ring, lower_32_bits(addr));
296 amdgpu_ring_write(ring, upper_32_bits(addr));
297 amdgpu_ring_write(ring, i == 0 ? lower_32_bits(seq) : upper_32_bits(seq));
299 } while ((flags & AMDGPU_FENCE_FLAG_64BIT) && (i++ < 1));
301 if (flags & AMDGPU_FENCE_FLAG_INT) {
302 /* generate an interrupt */
303 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_TRAP, 0));
304 amdgpu_ring_write(ring, 0);
309 static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
311 uint32_t seq = ring->fence_drv.sync_seq;
312 uint64_t addr = ring->fence_drv.gpu_addr;
315 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
316 VPE_POLL_REGMEM_SUBOP_REGMEM) |
317 VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
318 VPE_CMD_POLL_REGMEM_HEADER_MEM(1));
319 amdgpu_ring_write(ring, addr & 0xfffffffc);
320 amdgpu_ring_write(ring, upper_32_bits(addr));
321 amdgpu_ring_write(ring, seq); /* reference */
322 amdgpu_ring_write(ring, 0xffffffff); /* mask */
323 amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
324 VPE_CMD_POLL_REGMEM_DW5_INTERVAL(4));
327 static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
329 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0));
330 amdgpu_ring_write(ring, reg << 2);
331 amdgpu_ring_write(ring, val);
334 static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
335 uint32_t val, uint32_t mask)
337 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
338 VPE_POLL_REGMEM_SUBOP_REGMEM) |
339 VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
340 VPE_CMD_POLL_REGMEM_HEADER_MEM(0));
341 amdgpu_ring_write(ring, reg << 2);
342 amdgpu_ring_write(ring, 0);
343 amdgpu_ring_write(ring, val); /* reference */
344 amdgpu_ring_write(ring, mask); /* mask */
345 amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
346 VPE_CMD_POLL_REGMEM_DW5_INTERVAL(10));
349 static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid,
352 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
355 static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring)
359 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
360 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
361 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
362 amdgpu_ring_write(ring, 1);
363 ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
364 amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
369 static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
373 WARN_ON_ONCE(offset > ring->buf_mask);
374 WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa);
376 cur = (ring->wptr - 1) & ring->buf_mask;
378 ring->ring[offset] = cur - offset;
380 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
383 static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
385 struct amdgpu_device *adev = ring->adev;
386 struct amdgpu_vpe *vpe = &adev->vpe;
387 uint32_t preempt_reg = vpe->regs.queue0_preempt;
390 /* assert preemption condition */
391 amdgpu_ring_set_preempt_cond_exec(ring, false);
393 /* emit the trailing fence */
394 ring->trail_seq += 1;
395 amdgpu_ring_alloc(ring, 10);
396 vpe_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, 0);
397 amdgpu_ring_commit(ring);
399 /* assert IB preemption */
400 WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 1);
402 /* poll the trailing fence */
403 for (i = 0; i < adev->usec_timeout; i++) {
404 if (ring->trail_seq ==
405 le32_to_cpu(*(ring->trail_fence_cpu_addr)))
410 if (i >= adev->usec_timeout) {
412 dev_err(adev->dev, "ring %d failed to be preempted\n", ring->idx);
415 /* deassert IB preemption */
416 WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 0);
418 /* deassert the preemption condition */
419 amdgpu_ring_set_preempt_cond_exec(ring, true);
424 static int vpe_set_clockgating_state(void *handle,
425 enum amd_clockgating_state state)
430 static int vpe_set_powergating_state(void *handle,
431 enum amd_powergating_state state)
436 static uint64_t vpe_ring_get_rptr(struct amdgpu_ring *ring)
438 struct amdgpu_device *adev = ring->adev;
439 struct amdgpu_vpe *vpe = &adev->vpe;
442 if (ring->use_doorbell) {
443 rptr = atomic64_read((atomic64_t *)ring->rptr_cpu_addr);
444 dev_dbg(adev->dev, "rptr/doorbell before shift == 0x%016llx\n", rptr);
446 rptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_hi));
448 rptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_lo));
449 dev_dbg(adev->dev, "rptr before shift [%i] == 0x%016llx\n", ring->me, rptr);
455 static uint64_t vpe_ring_get_wptr(struct amdgpu_ring *ring)
457 struct amdgpu_device *adev = ring->adev;
458 struct amdgpu_vpe *vpe = &adev->vpe;
461 if (ring->use_doorbell) {
462 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
463 dev_dbg(adev->dev, "wptr/doorbell before shift == 0x%016llx\n", wptr);
465 wptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi));
467 wptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo));
468 dev_dbg(adev->dev, "wptr before shift [%i] == 0x%016llx\n", ring->me, wptr);
474 static void vpe_ring_set_wptr(struct amdgpu_ring *ring)
476 struct amdgpu_device *adev = ring->adev;
477 struct amdgpu_vpe *vpe = &adev->vpe;
479 if (ring->use_doorbell) {
480 dev_dbg(adev->dev, "Using doorbell, \
481 wptr_offs == 0x%08x, \
482 lower_32_bits(ring->wptr) << 2 == 0x%08x, \
483 upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
485 lower_32_bits(ring->wptr << 2),
486 upper_32_bits(ring->wptr << 2));
487 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2);
488 WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
490 dev_dbg(adev->dev, "Not using doorbell, \
491 regVPEC_QUEUE0_RB_WPTR == 0x%08x, \
492 regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n",
493 lower_32_bits(ring->wptr << 2),
494 upper_32_bits(ring->wptr << 2));
495 WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo),
496 lower_32_bits(ring->wptr << 2));
497 WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi),
498 upper_32_bits(ring->wptr << 2));
502 static int vpe_ring_test_ring(struct amdgpu_ring *ring)
504 struct amdgpu_device *adev = ring->adev;
505 const uint32_t test_pattern = 0xdeadbeef;
510 ret = amdgpu_device_wb_get(adev, &index);
512 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
516 adev->wb.wb[index] = 0;
517 wb_addr = adev->wb.gpu_addr + (index * 4);
519 ret = amdgpu_ring_alloc(ring, 4);
521 dev_err(adev->dev, "amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, ret);
525 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
526 amdgpu_ring_write(ring, lower_32_bits(wb_addr));
527 amdgpu_ring_write(ring, upper_32_bits(wb_addr));
528 amdgpu_ring_write(ring, test_pattern);
529 amdgpu_ring_commit(ring);
531 for (i = 0; i < adev->usec_timeout; i++) {
532 if (le32_to_cpu(adev->wb.wb[index]) == test_pattern)
539 amdgpu_device_wb_free(adev, index);
544 static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout)
546 struct amdgpu_device *adev = ring->adev;
547 const uint32_t test_pattern = 0xdeadbeef;
548 struct amdgpu_ib ib = {};
549 struct dma_fence *f = NULL;
554 ret = amdgpu_device_wb_get(adev, &index);
556 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
560 adev->wb.wb[index] = 0;
561 wb_addr = adev->wb.gpu_addr + (index * 4);
563 ret = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
567 ib.ptr[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0);
568 ib.ptr[1] = lower_32_bits(wb_addr);
569 ib.ptr[2] = upper_32_bits(wb_addr);
570 ib.ptr[3] = test_pattern;
571 ib.ptr[4] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
572 ib.ptr[5] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
573 ib.ptr[6] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
574 ib.ptr[7] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
577 ret = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
581 ret = dma_fence_wait_timeout(f, false, timeout);
583 ret = ret ? : -ETIMEDOUT;
587 ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL;
590 amdgpu_ib_free(adev, &ib, NULL);
593 amdgpu_device_wb_free(adev, index);
598 static const struct amdgpu_ring_funcs vpe_ring_funcs = {
599 .type = AMDGPU_RING_TYPE_VPE,
601 .nop = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0),
602 .support_64bit_ptrs = true,
603 .get_rptr = vpe_ring_get_rptr,
604 .get_wptr = vpe_ring_get_wptr,
605 .set_wptr = vpe_ring_set_wptr,
607 5 + /* vpe_ring_init_cond_exec */
608 6 + /* vpe_ring_emit_pipeline_sync */
609 10 + 10 + 10 + /* vpe_ring_emit_fence */
610 /* vpe_ring_emit_vm_flush */
611 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
612 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6,
613 .emit_ib_size = 7 + 6,
614 .emit_ib = vpe_ring_emit_ib,
615 .emit_pipeline_sync = vpe_ring_emit_pipeline_sync,
616 .emit_fence = vpe_ring_emit_fence,
617 .emit_vm_flush = vpe_ring_emit_vm_flush,
618 .emit_wreg = vpe_ring_emit_wreg,
619 .emit_reg_wait = vpe_ring_emit_reg_wait,
620 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
621 .insert_nop = vpe_ring_insert_nop,
622 .pad_ib = amdgpu_ring_generic_pad_ib,
623 .test_ring = vpe_ring_test_ring,
624 .test_ib = vpe_ring_test_ib,
625 .init_cond_exec = vpe_ring_init_cond_exec,
626 .patch_cond_exec = vpe_ring_patch_cond_exec,
627 .preempt_ib = vpe_ring_preempt_ib,
630 static void vpe_set_ring_funcs(struct amdgpu_device *adev)
632 adev->vpe.ring.funcs = &vpe_ring_funcs;
635 const struct amd_ip_funcs vpe_ip_funcs = {
637 .early_init = vpe_early_init,
639 .sw_init = vpe_sw_init,
640 .sw_fini = vpe_sw_fini,
641 .hw_init = vpe_hw_init,
642 .hw_fini = vpe_hw_fini,
643 .suspend = vpe_suspend,
644 .resume = vpe_resume,
646 .set_clockgating_state = vpe_set_clockgating_state,
647 .set_powergating_state = vpe_set_powergating_state,
650 const struct amdgpu_ip_block_version vpe_v6_1_ip_block = {
651 .type = AMD_IP_BLOCK_TYPE_VPE,
655 .funcs = &vpe_ip_funcs,