]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
Merge tag 'sysctl-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl...
[linux.git] / drivers / gpu / drm / amd / amdgpu / vcn_v4_0.c
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/firmware.h>
25 #include "amdgpu.h"
26 #include "amdgpu_vcn.h"
27 #include "amdgpu_pm.h"
28 #include "amdgpu_cs.h"
29 #include "soc15.h"
30 #include "soc15d.h"
31 #include "soc15_hw_ip.h"
32 #include "vcn_v2_0.h"
33 #include "mmsch_v4_0.h"
34 #include "vcn_v4_0.h"
35
36 #include "vcn/vcn_4_0_0_offset.h"
37 #include "vcn/vcn_4_0_0_sh_mask.h"
38 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
39
40 #include <drm/drm_drv.h>
41
42 #define mmUVD_DPG_LMA_CTL                                                       regUVD_DPG_LMA_CTL
43 #define mmUVD_DPG_LMA_CTL_BASE_IDX                                              regUVD_DPG_LMA_CTL_BASE_IDX
44 #define mmUVD_DPG_LMA_DATA                                                      regUVD_DPG_LMA_DATA
45 #define mmUVD_DPG_LMA_DATA_BASE_IDX                                             regUVD_DPG_LMA_DATA_BASE_IDX
46
47 #define VCN_VID_SOC_ADDRESS_2_0                                                 0x1fb00
48 #define VCN1_VID_SOC_ADDRESS_3_0                                                0x48300
49
50 #define VCN_HARVEST_MMSCH                                                               0
51
52 #define RDECODE_MSG_CREATE                                                      0x00000000
53 #define RDECODE_MESSAGE_CREATE                                                  0x00000001
54
55 static int amdgpu_ih_clientid_vcns[] = {
56         SOC15_IH_CLIENTID_VCN,
57         SOC15_IH_CLIENTID_VCN1
58 };
59
60 static int vcn_v4_0_start_sriov(struct amdgpu_device *adev);
61 static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
62 static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
63 static int vcn_v4_0_set_powergating_state(void *handle,
64         enum amd_powergating_state state);
65 static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev,
66         int inst_idx, struct dpg_pause_state *new_state);
67 static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
68 static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev);
69
70 /**
71  * vcn_v4_0_early_init - set function pointers and load microcode
72  *
73  * @handle: amdgpu_device pointer
74  *
75  * Set ring and irq function pointers
76  * Load microcode from filesystem
77  */
78 static int vcn_v4_0_early_init(void *handle)
79 {
80         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
81         int i;
82
83         if (amdgpu_sriov_vf(adev)) {
84                 adev->vcn.harvest_config = VCN_HARVEST_MMSCH;
85                 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
86                         if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
87                                 adev->vcn.harvest_config |= 1 << i;
88                                 dev_info(adev->dev, "VCN%d is disabled by hypervisor\n", i);
89                         }
90                 }
91         }
92
93         /* re-use enc ring as unified ring */
94         adev->vcn.num_enc_rings = 1;
95
96         vcn_v4_0_set_unified_ring_funcs(adev);
97         vcn_v4_0_set_irq_funcs(adev);
98         vcn_v4_0_set_ras_funcs(adev);
99
100         return amdgpu_vcn_early_init(adev);
101 }
102
103 static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
104 {
105         volatile struct amdgpu_vcn4_fw_shared *fw_shared;
106
107         fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
108         fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
109         fw_shared->sq.is_enabled = 1;
110
111         fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
112         fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
113                 AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
114
115         if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
116             IP_VERSION(4, 0, 2)) {
117                 fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
118                 fw_shared->drm_key_wa.method =
119                         AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
120         }
121
122         if (amdgpu_vcnfw_log)
123                 amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
124
125         return 0;
126 }
127
128 /**
129  * vcn_v4_0_sw_init - sw init for VCN block
130  *
131  * @handle: amdgpu_device pointer
132  *
133  * Load firmware and sw initialization
134  */
135 static int vcn_v4_0_sw_init(void *handle)
136 {
137         struct amdgpu_ring *ring;
138         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
139         int i, r;
140
141         r = amdgpu_vcn_sw_init(adev);
142         if (r)
143                 return r;
144
145         amdgpu_vcn_setup_ucode(adev);
146
147         r = amdgpu_vcn_resume(adev);
148         if (r)
149                 return r;
150
151         for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
152                 if (adev->vcn.harvest_config & (1 << i))
153                         continue;
154
155                 /* Init instance 0 sched_score to 1, so it's scheduled after other instances */
156                 if (i == 0)
157                         atomic_set(&adev->vcn.inst[i].sched_score, 1);
158                 else
159                         atomic_set(&adev->vcn.inst[i].sched_score, 0);
160
161                 /* VCN UNIFIED TRAP */
162                 r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
163                                 VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
164                 if (r)
165                         return r;
166
167                 /* VCN POISON TRAP */
168                 r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
169                                 VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].ras_poison_irq);
170                 if (r)
171                         return r;
172
173                 ring = &adev->vcn.inst[i].ring_enc[0];
174                 ring->use_doorbell = true;
175                 if (amdgpu_sriov_vf(adev))
176                         ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1;
177                 else
178                         ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
179                 ring->vm_hub = AMDGPU_MMHUB0(0);
180                 sprintf(ring->name, "vcn_unified_%d", i);
181
182                 r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
183                                                 AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
184                 if (r)
185                         return r;
186
187                 vcn_v4_0_fw_shared_init(adev, i);
188         }
189
190         if (amdgpu_sriov_vf(adev)) {
191                 r = amdgpu_virt_alloc_mm_table(adev);
192                 if (r)
193                         return r;
194         }
195
196         if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
197                 adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
198
199         r = amdgpu_vcn_ras_sw_init(adev);
200         if (r)
201                 return r;
202
203         return 0;
204 }
205
206 /**
207  * vcn_v4_0_sw_fini - sw fini for VCN block
208  *
209  * @handle: amdgpu_device pointer
210  *
211  * VCN suspend and free up sw allocation
212  */
213 static int vcn_v4_0_sw_fini(void *handle)
214 {
215         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
216         int i, r, idx;
217
218         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
219                 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
220                         volatile struct amdgpu_vcn4_fw_shared *fw_shared;
221
222                         if (adev->vcn.harvest_config & (1 << i))
223                                 continue;
224
225                         fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
226                         fw_shared->present_flag_0 = 0;
227                         fw_shared->sq.is_enabled = 0;
228                 }
229
230                 drm_dev_exit(idx);
231         }
232
233         if (amdgpu_sriov_vf(adev))
234                 amdgpu_virt_free_mm_table(adev);
235
236         r = amdgpu_vcn_suspend(adev);
237         if (r)
238                 return r;
239
240         r = amdgpu_vcn_sw_fini(adev);
241
242         return r;
243 }
244
245 /**
246  * vcn_v4_0_hw_init - start and test VCN block
247  *
248  * @handle: amdgpu_device pointer
249  *
250  * Initialize the hardware, boot up the VCPU and do some testing
251  */
252 static int vcn_v4_0_hw_init(void *handle)
253 {
254         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
255         struct amdgpu_ring *ring;
256         int i, r;
257
258         if (amdgpu_sriov_vf(adev)) {
259                 r = vcn_v4_0_start_sriov(adev);
260                 if (r)
261                         goto done;
262
263                 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
264                         if (adev->vcn.harvest_config & (1 << i))
265                                 continue;
266
267                         ring = &adev->vcn.inst[i].ring_enc[0];
268                         ring->wptr = 0;
269                         ring->wptr_old = 0;
270                         vcn_v4_0_unified_ring_set_wptr(ring);
271                         ring->sched.ready = true;
272
273                 }
274         } else {
275                 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
276                         if (adev->vcn.harvest_config & (1 << i))
277                                 continue;
278
279                         ring = &adev->vcn.inst[i].ring_enc[0];
280
281                         adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
282                                         ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
283
284                         r = amdgpu_ring_test_helper(ring);
285                         if (r)
286                                 goto done;
287
288                 }
289         }
290
291         return 0;
292 done:
293         if (!r)
294                 DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
295                         (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
296
297         return r;
298 }
299
300 /**
301  * vcn_v4_0_hw_fini - stop the hardware block
302  *
303  * @handle: amdgpu_device pointer
304  *
305  * Stop the VCN block, mark ring as not ready any more
306  */
307 static int vcn_v4_0_hw_fini(void *handle)
308 {
309         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
310         int i;
311
312         cancel_delayed_work_sync(&adev->vcn.idle_work);
313
314         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
315                 if (adev->vcn.harvest_config & (1 << i))
316                         continue;
317                 if (!amdgpu_sriov_vf(adev)) {
318                         if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
319                         (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
320                                 RREG32_SOC15(VCN, i, regUVD_STATUS))) {
321                         vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
322                         }
323                 }
324                 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
325                         amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
326         }
327
328         return 0;
329 }
330
331 /**
332  * vcn_v4_0_suspend - suspend VCN block
333  *
334  * @handle: amdgpu_device pointer
335  *
336  * HW fini and suspend VCN block
337  */
338 static int vcn_v4_0_suspend(void *handle)
339 {
340         int r;
341         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
342
343         r = vcn_v4_0_hw_fini(adev);
344         if (r)
345                 return r;
346
347         r = amdgpu_vcn_suspend(adev);
348
349         return r;
350 }
351
352 /**
353  * vcn_v4_0_resume - resume VCN block
354  *
355  * @handle: amdgpu_device pointer
356  *
357  * Resume firmware and hw init VCN block
358  */
359 static int vcn_v4_0_resume(void *handle)
360 {
361         int r;
362         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
363
364         r = amdgpu_vcn_resume(adev);
365         if (r)
366                 return r;
367
368         r = vcn_v4_0_hw_init(adev);
369
370         return r;
371 }
372
373 /**
374  * vcn_v4_0_mc_resume - memory controller programming
375  *
376  * @adev: amdgpu_device pointer
377  * @inst: instance number
378  *
379  * Let the VCN memory controller know it's offsets
380  */
381 static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst)
382 {
383         uint32_t offset, size;
384         const struct common_firmware_header *hdr;
385
386         hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data;
387         size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
388
389         /* cache window 0: fw */
390         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
391                 WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
392                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
393                 WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
394                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
395                 WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0);
396                 offset = 0;
397         } else {
398                 WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
399                         lower_32_bits(adev->vcn.inst[inst].gpu_addr));
400                 WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
401                         upper_32_bits(adev->vcn.inst[inst].gpu_addr));
402                 offset = size;
403                 WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
404         }
405         WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size);
406
407         /* cache window 1: stack */
408         WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
409                 lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
410         WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
411                 upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
412         WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0);
413         WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
414
415         /* cache window 2: context */
416         WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
417                 lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
418         WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
419                 upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
420         WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0);
421         WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
422
423         /* non-cache window */
424         WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
425                 lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
426         WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
427                 upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
428         WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
429         WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0,
430                 AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
431 }
432
433 /**
434  * vcn_v4_0_mc_resume_dpg_mode - memory controller programming for dpg mode
435  *
436  * @adev: amdgpu_device pointer
437  * @inst_idx: instance number index
438  * @indirect: indirectly write sram
439  *
440  * Let the VCN memory controller know it's offsets with dpg mode
441  */
442 static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
443 {
444         uint32_t offset, size;
445         const struct common_firmware_header *hdr;
446         hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
447         size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
448
449         /* cache window 0: fw */
450         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
451                 if (!indirect) {
452                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
453                                 VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
454                                 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
455                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
456                                 VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
457                                 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
458                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
459                                 VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
460                 } else {
461                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
462                                 VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
463                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
464                                 VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
465                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
466                                 VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
467                 }
468                 offset = 0;
469         } else {
470                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
471                         VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
472                         lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
473                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
474                         VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
475                         upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
476                 offset = size;
477                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
478                         VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
479                         AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
480         }
481
482         if (!indirect)
483                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
484                         VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
485         else
486                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
487                         VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
488
489         /* cache window 1: stack */
490         if (!indirect) {
491                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
492                         VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
493                         lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
494                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
495                         VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
496                         upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
497                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
498                         VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
499         } else {
500                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
501                         VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
502                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
503                         VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
504                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
505                         VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
506         }
507         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
508                         VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
509
510         /* cache window 2: context */
511         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
512                         VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
513                         lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
514         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
515                         VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
516                         upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
517         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
518                         VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
519         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
520                         VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
521
522         /* non-cache window */
523         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
524                         VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
525                         lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
526         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
527                         VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
528                         upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
529         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
530                         VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
531         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
532                         VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0),
533                         AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
534
535         /* VCN global tiling registers */
536         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
537                 VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
538 }
539
540 /**
541  * vcn_v4_0_disable_static_power_gating - disable VCN static power gating
542  *
543  * @adev: amdgpu_device pointer
544  * @inst: instance number
545  *
546  * Disable static power gating for VCN block
547  */
548 static void vcn_v4_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
549 {
550         uint32_t data = 0;
551
552         if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
553                 data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
554                         | 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
555                         | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
556                         | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
557                         | 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
558                         | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
559                         | 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
560                         | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
561                         | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
562                         | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
563                         | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
564                         | 2 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
565                         | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
566                         | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
567
568                 WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
569                 SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS,
570                         UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF);
571         } else {
572                 uint32_t value;
573
574                 value = (inst) ? 0x2200800 : 0;
575                 data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
576                         | 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
577                         | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
578                         | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
579                         | 1 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
580                         | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
581                         | 1 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
582                         | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
583                         | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
584                         | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
585                         | 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
586                         | 1 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
587                         | 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
588                         | 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
589
590                 WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
591                 SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS, value,  0x3F3FFFFF);
592         }
593
594         data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
595         data &= ~0x103;
596         if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
597                 data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
598                         UVD_POWER_STATUS__UVD_PG_EN_MASK;
599
600         WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
601
602         return;
603 }
604
605 /**
606  * vcn_v4_0_enable_static_power_gating - enable VCN static power gating
607  *
608  * @adev: amdgpu_device pointer
609  * @inst: instance number
610  *
611  * Enable static power gating for VCN block
612  */
613 static void vcn_v4_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
614 {
615         uint32_t data;
616
617         if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
618                 /* Before power off, this indicator has to be turned on */
619                 data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
620                 data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
621                 data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
622                 WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
623
624                 data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
625                         | 2 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
626                         | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
627                         | 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
628                         | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
629                         | 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
630                         | 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
631                         | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
632                         | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
633                         | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
634                         | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
635                         | 2 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
636                         | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
637                         | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
638                 WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
639
640                 data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
641                         | 2 << UVD_PGFSM_STATUS__UVDS_PWR_STATUS__SHIFT
642                         | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
643                         | 2 << UVD_PGFSM_STATUS__UVDTC_PWR_STATUS__SHIFT
644                         | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
645                         | 2 << UVD_PGFSM_STATUS__UVDTA_PWR_STATUS__SHIFT
646                         | 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT
647                         | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
648                         | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
649                         | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
650                         | 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT
651                         | 2 << UVD_PGFSM_STATUS__UVDTB_PWR_STATUS__SHIFT
652                         | 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT
653                         | 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT);
654                 SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS, data, 0x3F3FFFFF);
655         }
656
657         return;
658 }
659
660 /**
661  * vcn_v4_0_disable_clock_gating - disable VCN clock gating
662  *
663  * @adev: amdgpu_device pointer
664  * @inst: instance number
665  *
666  * Disable clock gating for VCN block
667  */
668 static void vcn_v4_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
669 {
670         uint32_t data;
671
672         if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
673                 return;
674
675         /* VCN disable CGC */
676         data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
677         data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
678         data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
679         data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
680         WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
681
682         data = RREG32_SOC15(VCN, inst, regUVD_CGC_GATE);
683         data &= ~(UVD_CGC_GATE__SYS_MASK
684                 | UVD_CGC_GATE__UDEC_MASK
685                 | UVD_CGC_GATE__MPEG2_MASK
686                 | UVD_CGC_GATE__REGS_MASK
687                 | UVD_CGC_GATE__RBC_MASK
688                 | UVD_CGC_GATE__LMI_MC_MASK
689                 | UVD_CGC_GATE__LMI_UMC_MASK
690                 | UVD_CGC_GATE__IDCT_MASK
691                 | UVD_CGC_GATE__MPRD_MASK
692                 | UVD_CGC_GATE__MPC_MASK
693                 | UVD_CGC_GATE__LBSI_MASK
694                 | UVD_CGC_GATE__LRBBM_MASK
695                 | UVD_CGC_GATE__UDEC_RE_MASK
696                 | UVD_CGC_GATE__UDEC_CM_MASK
697                 | UVD_CGC_GATE__UDEC_IT_MASK
698                 | UVD_CGC_GATE__UDEC_DB_MASK
699                 | UVD_CGC_GATE__UDEC_MP_MASK
700                 | UVD_CGC_GATE__WCB_MASK
701                 | UVD_CGC_GATE__VCPU_MASK
702                 | UVD_CGC_GATE__MMSCH_MASK);
703
704         WREG32_SOC15(VCN, inst, regUVD_CGC_GATE, data);
705         SOC15_WAIT_ON_RREG(VCN, inst, regUVD_CGC_GATE, 0,  0xFFFFFFFF);
706
707         data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
708         data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
709                 | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
710                 | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
711                 | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
712                 | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
713                 | UVD_CGC_CTRL__SYS_MODE_MASK
714                 | UVD_CGC_CTRL__UDEC_MODE_MASK
715                 | UVD_CGC_CTRL__MPEG2_MODE_MASK
716                 | UVD_CGC_CTRL__REGS_MODE_MASK
717                 | UVD_CGC_CTRL__RBC_MODE_MASK
718                 | UVD_CGC_CTRL__LMI_MC_MODE_MASK
719                 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
720                 | UVD_CGC_CTRL__IDCT_MODE_MASK
721                 | UVD_CGC_CTRL__MPRD_MODE_MASK
722                 | UVD_CGC_CTRL__MPC_MODE_MASK
723                 | UVD_CGC_CTRL__LBSI_MODE_MASK
724                 | UVD_CGC_CTRL__LRBBM_MODE_MASK
725                 | UVD_CGC_CTRL__WCB_MODE_MASK
726                 | UVD_CGC_CTRL__VCPU_MODE_MASK
727                 | UVD_CGC_CTRL__MMSCH_MODE_MASK);
728         WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
729
730         data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE);
731         data |= (UVD_SUVD_CGC_GATE__SRE_MASK
732                 | UVD_SUVD_CGC_GATE__SIT_MASK
733                 | UVD_SUVD_CGC_GATE__SMP_MASK
734                 | UVD_SUVD_CGC_GATE__SCM_MASK
735                 | UVD_SUVD_CGC_GATE__SDB_MASK
736                 | UVD_SUVD_CGC_GATE__SRE_H264_MASK
737                 | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
738                 | UVD_SUVD_CGC_GATE__SIT_H264_MASK
739                 | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
740                 | UVD_SUVD_CGC_GATE__SCM_H264_MASK
741                 | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
742                 | UVD_SUVD_CGC_GATE__SDB_H264_MASK
743                 | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
744                 | UVD_SUVD_CGC_GATE__SCLR_MASK
745                 | UVD_SUVD_CGC_GATE__UVD_SC_MASK
746                 | UVD_SUVD_CGC_GATE__ENT_MASK
747                 | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
748                 | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
749                 | UVD_SUVD_CGC_GATE__SITE_MASK
750                 | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
751                 | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
752                 | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
753                 | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
754                 | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
755         WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE, data);
756
757         data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
758         data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
759                 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
760                 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
761                 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
762                 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
763                 | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
764                 | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
765                 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
766                 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
767                 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
768         WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
769 }
770
771 /**
772  * vcn_v4_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
773  *
774  * @adev: amdgpu_device pointer
775  * @sram_sel: sram select
776  * @inst_idx: instance number index
777  * @indirect: indirectly write sram
778  *
779  * Disable clock gating for VCN block with dpg mode
780  */
781 static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
782       int inst_idx, uint8_t indirect)
783 {
784         uint32_t reg_data = 0;
785
786         if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
787                 return;
788
789         /* enable sw clock gating control */
790         reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
791         reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
792         reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
793         reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
794                  UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
795                  UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
796                  UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
797                  UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
798                  UVD_CGC_CTRL__SYS_MODE_MASK |
799                  UVD_CGC_CTRL__UDEC_MODE_MASK |
800                  UVD_CGC_CTRL__MPEG2_MODE_MASK |
801                  UVD_CGC_CTRL__REGS_MODE_MASK |
802                  UVD_CGC_CTRL__RBC_MODE_MASK |
803                  UVD_CGC_CTRL__LMI_MC_MODE_MASK |
804                  UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
805                  UVD_CGC_CTRL__IDCT_MODE_MASK |
806                  UVD_CGC_CTRL__MPRD_MODE_MASK |
807                  UVD_CGC_CTRL__MPC_MODE_MASK |
808                  UVD_CGC_CTRL__LBSI_MODE_MASK |
809                  UVD_CGC_CTRL__LRBBM_MODE_MASK |
810                  UVD_CGC_CTRL__WCB_MODE_MASK |
811                  UVD_CGC_CTRL__VCPU_MODE_MASK);
812         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
813                 VCN, inst_idx, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
814
815         /* turn off clock gating */
816         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
817                 VCN, inst_idx, regUVD_CGC_GATE), 0, sram_sel, indirect);
818
819         /* turn on SUVD clock gating */
820         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
821                 VCN, inst_idx, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
822
823         /* turn on sw mode in UVD_SUVD_CGC_CTRL */
824         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
825                 VCN, inst_idx, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
826 }
827
828 /**
829  * vcn_v4_0_enable_clock_gating - enable VCN clock gating
830  *
831  * @adev: amdgpu_device pointer
832  * @inst: instance number
833  *
834  * Enable clock gating for VCN block
835  */
836 static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
837 {
838         uint32_t data;
839
840         if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
841                 return;
842
843         /* enable VCN CGC */
844         data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
845         data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
846         data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
847         data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
848         WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
849
850         data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
851         data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
852                 | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
853                 | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
854                 | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
855                 | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
856                 | UVD_CGC_CTRL__SYS_MODE_MASK
857                 | UVD_CGC_CTRL__UDEC_MODE_MASK
858                 | UVD_CGC_CTRL__MPEG2_MODE_MASK
859                 | UVD_CGC_CTRL__REGS_MODE_MASK
860                 | UVD_CGC_CTRL__RBC_MODE_MASK
861                 | UVD_CGC_CTRL__LMI_MC_MODE_MASK
862                 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
863                 | UVD_CGC_CTRL__IDCT_MODE_MASK
864                 | UVD_CGC_CTRL__MPRD_MODE_MASK
865                 | UVD_CGC_CTRL__MPC_MODE_MASK
866                 | UVD_CGC_CTRL__LBSI_MODE_MASK
867                 | UVD_CGC_CTRL__LRBBM_MODE_MASK
868                 | UVD_CGC_CTRL__WCB_MODE_MASK
869                 | UVD_CGC_CTRL__VCPU_MODE_MASK
870                 | UVD_CGC_CTRL__MMSCH_MODE_MASK);
871         WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
872
873         data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
874         data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
875                 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
876                 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
877                 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
878                 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
879                 | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
880                 | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
881                 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
882                 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
883                 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
884         WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
885 }
886
887 static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx,
888                                 bool indirect)
889 {
890         uint32_t tmp;
891
892         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
893                 return;
894
895         tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
896               VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
897               VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
898               VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
899         WREG32_SOC15_DPG_MODE(inst_idx,
900                               SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
901                               tmp, 0, indirect);
902
903         tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
904         WREG32_SOC15_DPG_MODE(inst_idx,
905                               SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
906                               tmp, 0, indirect);
907 }
908
909 /**
910  * vcn_v4_0_start_dpg_mode - VCN start with dpg mode
911  *
912  * @adev: amdgpu_device pointer
913  * @inst_idx: instance number index
914  * @indirect: indirectly write sram
915  *
916  * Start VCN block with dpg mode
917  */
918 static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
919 {
920         volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
921         struct amdgpu_ring *ring;
922         uint32_t tmp;
923
924         /* disable register anti-hang mechanism */
925         WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
926                 ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
927         /* enable dynamic power gating mode */
928         tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS);
929         tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
930         tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
931         WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp);
932
933         if (indirect)
934                 adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
935
936         /* enable clock gating */
937         vcn_v4_0_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
938
939         /* enable VCPU clock */
940         tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
941         tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
942         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
943                 VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
944
945         /* disable master interupt */
946         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
947                 VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect);
948
949         /* setup regUVD_LMI_CTRL */
950         tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
951                 UVD_LMI_CTRL__REQ_MODE_MASK |
952                 UVD_LMI_CTRL__CRC_RESET_MASK |
953                 UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
954                 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
955                 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
956                 (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
957                 0x00100000L);
958         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
959                 VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect);
960
961         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
962                 VCN, inst_idx, regUVD_MPC_CNTL),
963                 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
964
965         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
966                 VCN, inst_idx, regUVD_MPC_SET_MUXA0),
967                 ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
968                  (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
969                  (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
970                  (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
971
972         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
973                 VCN, inst_idx, regUVD_MPC_SET_MUXB0),
974                  ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
975                  (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
976                  (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
977                  (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
978
979         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
980                 VCN, inst_idx, regUVD_MPC_SET_MUX),
981                 ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
982                  (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
983                  (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
984
985         vcn_v4_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
986
987         tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
988         tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
989         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
990                 VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
991
992         /* enable LMI MC and UMC channels */
993         tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
994         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
995                 VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
996
997         vcn_v4_0_enable_ras(adev, inst_idx, indirect);
998
999         /* enable master interrupt */
1000         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1001                 VCN, inst_idx, regUVD_MASTINT_EN),
1002                 UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
1003
1004
1005         if (indirect)
1006                 amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
1007
1008         ring = &adev->vcn.inst[inst_idx].ring_enc[0];
1009
1010         WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
1011         WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1012         WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
1013
1014         tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
1015         tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
1016         WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
1017         fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
1018         WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
1019         WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
1020
1021         tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
1022         WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
1023         ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
1024
1025         tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
1026         tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
1027         WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
1028         fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
1029
1030         WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
1031                         ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
1032                         VCN_RB1_DB_CTRL__EN_MASK);
1033
1034         return 0;
1035 }
1036
1037
1038 /**
1039  * vcn_v4_0_start - VCN start
1040  *
1041  * @adev: amdgpu_device pointer
1042  *
1043  * Start VCN block
1044  */
1045 static int vcn_v4_0_start(struct amdgpu_device *adev)
1046 {
1047         volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1048         struct amdgpu_ring *ring;
1049         uint32_t tmp;
1050         int i, j, k, r;
1051
1052         if (adev->pm.dpm_enabled)
1053                 amdgpu_dpm_enable_uvd(adev, true);
1054
1055         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1056                 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1057
1058                 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1059                         r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
1060                         continue;
1061                 }
1062
1063                 /* disable VCN power gating */
1064                 vcn_v4_0_disable_static_power_gating(adev, i);
1065
1066                 /* set VCN status busy */
1067                 tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
1068                 WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
1069
1070                 /*SW clock gating */
1071                 vcn_v4_0_disable_clock_gating(adev, i);
1072
1073                 /* enable VCPU clock */
1074                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
1075                                 UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
1076
1077                 /* disable master interrupt */
1078                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
1079                                 ~UVD_MASTINT_EN__VCPU_EN_MASK);
1080
1081                 /* enable LMI MC and UMC channels */
1082                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
1083                                 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1084
1085                 tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
1086                 tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1087                 tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1088                 WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
1089
1090                 /* setup regUVD_LMI_CTRL */
1091                 tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
1092                 WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
1093                                 UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
1094                                 UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
1095                                 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
1096                                 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
1097
1098                 /* setup regUVD_MPC_CNTL */
1099                 tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
1100                 tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
1101                 tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
1102                 WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
1103
1104                 /* setup UVD_MPC_SET_MUXA0 */
1105                 WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
1106                                 ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
1107                                  (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
1108                                  (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
1109                                  (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
1110
1111                 /* setup UVD_MPC_SET_MUXB0 */
1112                 WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
1113                                 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
1114                                  (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
1115                                  (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
1116                                  (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
1117
1118                 /* setup UVD_MPC_SET_MUX */
1119                 WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
1120                                 ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
1121                                  (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
1122                                  (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
1123
1124                 vcn_v4_0_mc_resume(adev, i);
1125
1126                 /* VCN global tiling registers */
1127                 WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
1128                                 adev->gfx.config.gb_addr_config);
1129
1130                 /* unblock VCPU register access */
1131                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
1132                                 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1133
1134                 /* release VCPU reset to boot */
1135                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
1136                                 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1137
1138                 for (j = 0; j < 10; ++j) {
1139                         uint32_t status;
1140
1141                         for (k = 0; k < 100; ++k) {
1142                                 status = RREG32_SOC15(VCN, i, regUVD_STATUS);
1143                                 if (status & 2)
1144                                         break;
1145                                 mdelay(10);
1146                                 if (amdgpu_emu_mode == 1)
1147                                         msleep(1);
1148                         }
1149
1150                         if (amdgpu_emu_mode == 1) {
1151                                 r = -1;
1152                                 if (status & 2) {
1153                                         r = 0;
1154                                         break;
1155                                 }
1156                         } else {
1157                                 r = 0;
1158                                 if (status & 2)
1159                                         break;
1160
1161                                 dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
1162                                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
1163                                                         UVD_VCPU_CNTL__BLK_RST_MASK,
1164                                                         ~UVD_VCPU_CNTL__BLK_RST_MASK);
1165                                 mdelay(10);
1166                                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
1167                                                 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1168
1169                                 mdelay(10);
1170                                 r = -1;
1171                         }
1172                 }
1173
1174                 if (r) {
1175                         dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
1176                         return r;
1177                 }
1178
1179                 /* enable master interrupt */
1180                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
1181                                 UVD_MASTINT_EN__VCPU_EN_MASK,
1182                                 ~UVD_MASTINT_EN__VCPU_EN_MASK);
1183
1184                 /* clear the busy bit of VCN_STATUS */
1185                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
1186                                 ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1187
1188                 ring = &adev->vcn.inst[i].ring_enc[0];
1189                 WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
1190                                 ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
1191                                 VCN_RB1_DB_CTRL__EN_MASK);
1192
1193                 WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
1194                 WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1195                 WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
1196
1197                 tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
1198                 tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
1199                 WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
1200                 fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
1201                 WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
1202                 WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
1203
1204                 tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
1205                 WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
1206                 ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
1207
1208                 tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
1209                 tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
1210                 WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
1211                 fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
1212         }
1213
1214         return 0;
1215 }
1216
1217 static int vcn_v4_0_init_ring_metadata(struct amdgpu_device *adev, uint32_t vcn_inst, struct amdgpu_ring *ring_enc)
1218 {
1219         struct amdgpu_vcn_rb_metadata *rb_metadata = NULL;
1220         uint8_t *rb_ptr = (uint8_t *)ring_enc->ring;
1221
1222         rb_ptr += ring_enc->ring_size;
1223         rb_metadata = (struct amdgpu_vcn_rb_metadata *)rb_ptr;
1224
1225         memset(rb_metadata, 0, sizeof(struct amdgpu_vcn_rb_metadata));
1226         rb_metadata->size = sizeof(struct amdgpu_vcn_rb_metadata);
1227         rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
1228         rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG);
1229         rb_metadata->version = 1;
1230         rb_metadata->ring_id = vcn_inst & 0xFF;
1231
1232         return 0;
1233 }
1234
1235 static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
1236 {
1237         int i;
1238         struct amdgpu_ring *ring_enc;
1239         uint64_t cache_addr;
1240         uint64_t rb_enc_addr;
1241         uint64_t ctx_addr;
1242         uint32_t param, resp, expected;
1243         uint32_t offset, cache_size;
1244         uint32_t tmp, timeout;
1245
1246         struct amdgpu_mm_table *table = &adev->virt.mm_table;
1247         uint32_t *table_loc;
1248         uint32_t table_size;
1249         uint32_t size, size_dw;
1250         uint32_t init_status;
1251         uint32_t enabled_vcn;
1252
1253         struct mmsch_v4_0_cmd_direct_write
1254                 direct_wt = { {0} };
1255         struct mmsch_v4_0_cmd_direct_read_modify_write
1256                 direct_rd_mod_wt = { {0} };
1257         struct mmsch_v4_0_cmd_end end = { {0} };
1258         struct mmsch_v4_0_init_header header;
1259
1260         volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1261         volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
1262
1263         direct_wt.cmd_header.command_type =
1264                 MMSCH_COMMAND__DIRECT_REG_WRITE;
1265         direct_rd_mod_wt.cmd_header.command_type =
1266                 MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
1267         end.cmd_header.command_type =
1268                 MMSCH_COMMAND__END;
1269
1270         header.version = MMSCH_VERSION;
1271         header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2;
1272         for (i = 0; i < MMSCH_V4_0_VCN_INSTANCES; i++) {
1273                 header.inst[i].init_status = 0;
1274                 header.inst[i].table_offset = 0;
1275                 header.inst[i].table_size = 0;
1276         }
1277
1278         table_loc = (uint32_t *)table->cpu_addr;
1279         table_loc += header.total_size;
1280         for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
1281                 if (adev->vcn.harvest_config & (1 << i))
1282                         continue;
1283
1284                 // Must re/init fw_shared at beginning
1285                 vcn_v4_0_fw_shared_init(adev, i);
1286
1287                 table_size = 0;
1288
1289                 MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
1290                         regUVD_STATUS),
1291                         ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
1292
1293                 cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
1294
1295                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1296                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1297                                 regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1298                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
1299                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1300                                 regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1301                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
1302                         offset = 0;
1303                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1304                                 regUVD_VCPU_CACHE_OFFSET0),
1305                                 0);
1306                 } else {
1307                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1308                                 regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1309                                 lower_32_bits(adev->vcn.inst[i].gpu_addr));
1310                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1311                                 regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1312                                 upper_32_bits(adev->vcn.inst[i].gpu_addr));
1313                         offset = cache_size;
1314                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1315                                 regUVD_VCPU_CACHE_OFFSET0),
1316                                 AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
1317                 }
1318
1319                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1320                         regUVD_VCPU_CACHE_SIZE0),
1321                         cache_size);
1322
1323                 cache_addr = adev->vcn.inst[i].gpu_addr + offset;
1324                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1325                         regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
1326                         lower_32_bits(cache_addr));
1327                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1328                         regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
1329                         upper_32_bits(cache_addr));
1330                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1331                         regUVD_VCPU_CACHE_OFFSET1),
1332                         0);
1333                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1334                         regUVD_VCPU_CACHE_SIZE1),
1335                         AMDGPU_VCN_STACK_SIZE);
1336
1337                 cache_addr = adev->vcn.inst[i].gpu_addr + offset +
1338                         AMDGPU_VCN_STACK_SIZE;
1339                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1340                         regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
1341                         lower_32_bits(cache_addr));
1342                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1343                         regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
1344                         upper_32_bits(cache_addr));
1345                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1346                         regUVD_VCPU_CACHE_OFFSET2),
1347                         0);
1348                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1349                         regUVD_VCPU_CACHE_SIZE2),
1350                         AMDGPU_VCN_CONTEXT_SIZE);
1351
1352                 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1353                 rb_setup = &fw_shared->rb_setup;
1354
1355                 ring_enc = &adev->vcn.inst[i].ring_enc[0];
1356                 ring_enc->wptr = 0;
1357                 rb_enc_addr = ring_enc->gpu_addr;
1358
1359                 rb_setup->is_rb_enabled_flags |= RB_ENABLED;
1360                 fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
1361
1362                 if (amdgpu_sriov_is_vcn_rb_decouple(adev)) {
1363                         vcn_v4_0_init_ring_metadata(adev, i, ring_enc);
1364
1365                         memset((void *)&rb_setup->rb_info, 0, sizeof(struct amdgpu_vcn_rb_setup_info) * MAX_NUM_VCN_RB_SETUP);
1366                         if (!(adev->vcn.harvest_config & (1 << 0))) {
1367                                 rb_setup->rb_info[0].rb_addr_lo = lower_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr);
1368                                 rb_setup->rb_info[0].rb_addr_hi = upper_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr);
1369                                 rb_setup->rb_info[0].rb_size = adev->vcn.inst[0].ring_enc[0].ring_size / 4;
1370                         }
1371                         if (!(adev->vcn.harvest_config & (1 << 1))) {
1372                                 rb_setup->rb_info[2].rb_addr_lo = lower_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr);
1373                                 rb_setup->rb_info[2].rb_addr_hi = upper_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr);
1374                                 rb_setup->rb_info[2].rb_size = adev->vcn.inst[1].ring_enc[0].ring_size / 4;
1375                         }
1376                         fw_shared->decouple.is_enabled = 1;
1377                         fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG);
1378                 } else {
1379                         rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
1380                         rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
1381                         rb_setup->rb_size = ring_enc->ring_size / 4;
1382                 }
1383
1384                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1385                         regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
1386                         lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
1387                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1388                         regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
1389                         upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
1390                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1391                         regUVD_VCPU_NONCACHE_SIZE0),
1392                         AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
1393
1394                 /* add end packet */
1395                 MMSCH_V4_0_INSERT_END();
1396
1397                 /* refine header */
1398                 header.inst[i].init_status = 0;
1399                 header.inst[i].table_offset = header.total_size;
1400                 header.inst[i].table_size = table_size;
1401                 header.total_size += table_size;
1402         }
1403
1404         /* Update init table header in memory */
1405         size = sizeof(struct mmsch_v4_0_init_header);
1406         table_loc = (uint32_t *)table->cpu_addr;
1407         memcpy((void *)table_loc, &header, size);
1408
1409         /* message MMSCH (in VCN[0]) to initialize this client
1410          * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
1411          * of memory descriptor location
1412          */
1413         ctx_addr = table->gpu_addr;
1414         WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
1415         WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
1416
1417         /* 2, update vmid of descriptor */
1418         tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID);
1419         tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
1420         /* use domain0 for MM scheduler */
1421         tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
1422         WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp);
1423
1424         /* 3, notify mmsch about the size of this descriptor */
1425         size = header.total_size;
1426         WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size);
1427
1428         /* 4, set resp to zero */
1429         WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0);
1430
1431         /* 5, kick off the initialization and wait until
1432          * MMSCH_VF_MAILBOX_RESP becomes non-zero
1433          */
1434         param = 0x00000001;
1435         WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param);
1436         tmp = 0;
1437         timeout = 1000;
1438         resp = 0;
1439         expected = MMSCH_VF_MAILBOX_RESP__OK;
1440         while (resp != expected) {
1441                 resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP);
1442                 if (resp != 0)
1443                         break;
1444
1445                 udelay(10);
1446                 tmp = tmp + 10;
1447                 if (tmp >= timeout) {
1448                         DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
1449                                 " waiting for regMMSCH_VF_MAILBOX_RESP "\
1450                                 "(expected=0x%08x, readback=0x%08x)\n",
1451                                 tmp, expected, resp);
1452                         return -EBUSY;
1453                 }
1454         }
1455         enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
1456         init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->inst[enabled_vcn].init_status;
1457         if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
1458         && init_status != MMSCH_VF_ENGINE_STATUS__PASS)
1459                 DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
1460                         "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
1461
1462         return 0;
1463 }
1464
1465 /**
1466  * vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode
1467  *
1468  * @adev: amdgpu_device pointer
1469  * @inst_idx: instance number index
1470  *
1471  * Stop VCN block with dpg mode
1472  */
1473 static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
1474 {
1475         struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
1476         uint32_t tmp;
1477
1478         vcn_v4_0_pause_dpg_mode(adev, inst_idx, &state);
1479         /* Wait for power status to be 1 */
1480         SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
1481                 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1482
1483         /* wait for read ptr to be equal to write ptr */
1484         tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
1485         SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1486
1487         SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
1488                 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1489
1490         /* disable dynamic power gating mode */
1491         WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
1492                 ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
1493 }
1494
1495 /**
1496  * vcn_v4_0_stop - VCN stop
1497  *
1498  * @adev: amdgpu_device pointer
1499  *
1500  * Stop VCN block
1501  */
1502 static int vcn_v4_0_stop(struct amdgpu_device *adev)
1503 {
1504         volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1505         uint32_t tmp;
1506         int i, r = 0;
1507
1508         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1509                 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1510                 fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
1511
1512                 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1513                         vcn_v4_0_stop_dpg_mode(adev, i);
1514                         continue;
1515                 }
1516
1517                 /* wait for vcn idle */
1518                 r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
1519                 if (r)
1520                         return r;
1521
1522                 tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
1523                         UVD_LMI_STATUS__READ_CLEAN_MASK |
1524                         UVD_LMI_STATUS__WRITE_CLEAN_MASK |
1525                         UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
1526                 r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
1527                 if (r)
1528                         return r;
1529
1530                 /* disable LMI UMC channel */
1531                 tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
1532                 tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
1533                 WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
1534                 tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
1535                         UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
1536                 r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
1537                 if (r)
1538                         return r;
1539
1540                 /* block VCPU register access */
1541                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
1542                                 UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
1543                                 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1544
1545                 /* reset VCPU */
1546                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
1547                                 UVD_VCPU_CNTL__BLK_RST_MASK,
1548                                 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1549
1550                 /* disable VCPU clock */
1551                 WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
1552                                 ~(UVD_VCPU_CNTL__CLK_EN_MASK));
1553
1554                 /* apply soft reset */
1555                 tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
1556                 tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1557                 WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
1558                 tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
1559                 tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1560                 WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
1561
1562                 /* clear status */
1563                 WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
1564
1565                 /* apply HW clock gating */
1566                 vcn_v4_0_enable_clock_gating(adev, i);
1567
1568                 /* enable VCN power gating */
1569                 vcn_v4_0_enable_static_power_gating(adev, i);
1570         }
1571
1572         if (adev->pm.dpm_enabled)
1573                 amdgpu_dpm_enable_uvd(adev, false);
1574
1575         return 0;
1576 }
1577
1578 /**
1579  * vcn_v4_0_pause_dpg_mode - VCN pause with dpg mode
1580  *
1581  * @adev: amdgpu_device pointer
1582  * @inst_idx: instance number index
1583  * @new_state: pause state
1584  *
1585  * Pause dpg mode for VCN block
1586  */
1587 static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
1588       struct dpg_pause_state *new_state)
1589 {
1590         uint32_t reg_data = 0;
1591         int ret_code;
1592
1593         /* pause/unpause if state is changed */
1594         if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
1595                 DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d",
1596                         adev->vcn.inst[inst_idx].pause_state.fw_based,  new_state->fw_based);
1597                 reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) &
1598                         (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1599
1600                 if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
1601                         ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
1602                                 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1603
1604                         if (!ret_code) {
1605                                 /* pause DPG */
1606                                 reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1607                                 WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
1608
1609                                 /* wait for ACK */
1610                                 SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE,
1611                                         UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
1612                                         UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1613
1614                                 SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS,
1615                                         UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1616                         }
1617                 } else {
1618                         /* unpause dpg, no need to wait */
1619                         reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1620                         WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
1621                 }
1622                 adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
1623         }
1624
1625         return 0;
1626 }
1627
1628 /**
1629  * vcn_v4_0_unified_ring_get_rptr - get unified read pointer
1630  *
1631  * @ring: amdgpu_ring pointer
1632  *
1633  * Returns the current hardware unified read pointer
1634  */
1635 static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring)
1636 {
1637         struct amdgpu_device *adev = ring->adev;
1638
1639         if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1640                 DRM_ERROR("wrong ring id is identified in %s", __func__);
1641
1642         return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
1643 }
1644
1645 /**
1646  * vcn_v4_0_unified_ring_get_wptr - get unified write pointer
1647  *
1648  * @ring: amdgpu_ring pointer
1649  *
1650  * Returns the current hardware unified write pointer
1651  */
1652 static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring)
1653 {
1654         struct amdgpu_device *adev = ring->adev;
1655
1656         if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1657                 DRM_ERROR("wrong ring id is identified in %s", __func__);
1658
1659         if (ring->use_doorbell)
1660                 return *ring->wptr_cpu_addr;
1661         else
1662                 return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
1663 }
1664
1665 /**
1666  * vcn_v4_0_unified_ring_set_wptr - set enc write pointer
1667  *
1668  * @ring: amdgpu_ring pointer
1669  *
1670  * Commits the enc write pointer to the hardware
1671  */
1672 static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
1673 {
1674         struct amdgpu_device *adev = ring->adev;
1675
1676         if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1677                 DRM_ERROR("wrong ring id is identified in %s", __func__);
1678
1679         if (ring->use_doorbell) {
1680                 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
1681                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1682         } else {
1683                 WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
1684         }
1685 }
1686
1687 static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p,
1688                                 struct amdgpu_job *job)
1689 {
1690         struct drm_gpu_scheduler **scheds;
1691
1692         /* The create msg must be in the first IB submitted */
1693         if (atomic_read(&job->base.entity->fence_seq))
1694                 return -EINVAL;
1695
1696         /* if VCN0 is harvested, we can't support AV1 */
1697         if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
1698                 return -EINVAL;
1699
1700         scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
1701                 [AMDGPU_RING_PRIO_0].sched;
1702         drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
1703         return 0;
1704 }
1705
1706 static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
1707                             uint64_t addr)
1708 {
1709         struct ttm_operation_ctx ctx = { false, false };
1710         struct amdgpu_bo_va_mapping *map;
1711         uint32_t *msg, num_buffers;
1712         struct amdgpu_bo *bo;
1713         uint64_t start, end;
1714         unsigned int i;
1715         void *ptr;
1716         int r;
1717
1718         addr &= AMDGPU_GMC_HOLE_MASK;
1719         r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
1720         if (r) {
1721                 DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
1722                 return r;
1723         }
1724
1725         start = map->start * AMDGPU_GPU_PAGE_SIZE;
1726         end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
1727         if (addr & 0x7) {
1728                 DRM_ERROR("VCN messages must be 8 byte aligned!\n");
1729                 return -EINVAL;
1730         }
1731
1732         bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
1733         amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
1734         r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1735         if (r) {
1736                 DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
1737                 return r;
1738         }
1739
1740         r = amdgpu_bo_kmap(bo, &ptr);
1741         if (r) {
1742                 DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
1743                 return r;
1744         }
1745
1746         msg = ptr + addr - start;
1747
1748         /* Check length */
1749         if (msg[1] > end - addr) {
1750                 r = -EINVAL;
1751                 goto out;
1752         }
1753
1754         if (msg[3] != RDECODE_MSG_CREATE)
1755                 goto out;
1756
1757         num_buffers = msg[2];
1758         for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
1759                 uint32_t offset, size, *create;
1760
1761                 if (msg[0] != RDECODE_MESSAGE_CREATE)
1762                         continue;
1763
1764                 offset = msg[1];
1765                 size = msg[2];
1766
1767                 if (offset + size > end) {
1768                         r = -EINVAL;
1769                         goto out;
1770                 }
1771
1772                 create = ptr + addr + offset - start;
1773
1774                 /* H264, HEVC and VP9 can run on any instance */
1775                 if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
1776                         continue;
1777
1778                 r = vcn_v4_0_limit_sched(p, job);
1779                 if (r)
1780                         goto out;
1781         }
1782
1783 out:
1784         amdgpu_bo_kunmap(bo);
1785         return r;
1786 }
1787
1788 #define RADEON_VCN_ENGINE_TYPE_ENCODE                   (0x00000002)
1789 #define RADEON_VCN_ENGINE_TYPE_DECODE                   (0x00000003)
1790
1791 #define RADEON_VCN_ENGINE_INFO                          (0x30000001)
1792 #define RADEON_VCN_ENGINE_INFO_MAX_OFFSET               16
1793
1794 #define RENCODE_ENCODE_STANDARD_AV1                     2
1795 #define RENCODE_IB_PARAM_SESSION_INIT                   0x00000003
1796 #define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET        64
1797
1798 /* return the offset in ib if id is found, -1 otherwise
1799  * to speed up the searching we only search upto max_offset
1800  */
1801 static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset)
1802 {
1803         int i;
1804
1805         for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) {
1806                 if (ib->ptr[i + 1] == id)
1807                         return i;
1808         }
1809         return -1;
1810 }
1811
1812 static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
1813                                            struct amdgpu_job *job,
1814                                            struct amdgpu_ib *ib)
1815 {
1816         struct amdgpu_ring *ring = amdgpu_job_ring(job);
1817         struct amdgpu_vcn_decode_buffer *decode_buffer;
1818         uint64_t addr;
1819         uint32_t val;
1820         int idx;
1821
1822         /* The first instance can decode anything */
1823         if (!ring->me)
1824                 return 0;
1825
1826         /* RADEON_VCN_ENGINE_INFO is at the top of ib block */
1827         idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO,
1828                         RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
1829         if (idx < 0) /* engine info is missing */
1830                 return 0;
1831
1832         val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
1833         if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
1834                 decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
1835
1836                 if (!(decode_buffer->valid_buf_flag  & 0x1))
1837                         return 0;
1838
1839                 addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
1840                         decode_buffer->msg_buffer_address_lo;
1841                 return vcn_v4_0_dec_msg(p, job, addr);
1842         } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
1843                 idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT,
1844                         RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
1845                 if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1)
1846                         return vcn_v4_0_limit_sched(p, job);
1847         }
1848         return 0;
1849 }
1850
1851 static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
1852         .type = AMDGPU_RING_TYPE_VCN_ENC,
1853         .align_mask = 0x3f,
1854         .nop = VCN_ENC_CMD_NO_OP,
1855         .extra_dw = sizeof(struct amdgpu_vcn_rb_metadata),
1856         .get_rptr = vcn_v4_0_unified_ring_get_rptr,
1857         .get_wptr = vcn_v4_0_unified_ring_get_wptr,
1858         .set_wptr = vcn_v4_0_unified_ring_set_wptr,
1859         .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place,
1860         .emit_frame_size =
1861                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1862                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1863                 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
1864                 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
1865                 1, /* vcn_v2_0_enc_ring_insert_end */
1866         .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
1867         .emit_ib = vcn_v2_0_enc_ring_emit_ib,
1868         .emit_fence = vcn_v2_0_enc_ring_emit_fence,
1869         .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
1870         .test_ring = amdgpu_vcn_enc_ring_test_ring,
1871         .test_ib = amdgpu_vcn_unified_ring_test_ib,
1872         .insert_nop = amdgpu_ring_insert_nop,
1873         .insert_end = vcn_v2_0_enc_ring_insert_end,
1874         .pad_ib = amdgpu_ring_generic_pad_ib,
1875         .begin_use = amdgpu_vcn_ring_begin_use,
1876         .end_use = amdgpu_vcn_ring_end_use,
1877         .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
1878         .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
1879         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1880 };
1881
1882 /**
1883  * vcn_v4_0_set_unified_ring_funcs - set unified ring functions
1884  *
1885  * @adev: amdgpu_device pointer
1886  *
1887  * Set unified ring functions
1888  */
1889 static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev)
1890 {
1891         int i;
1892
1893         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1894                 if (adev->vcn.harvest_config & (1 << i))
1895                         continue;
1896
1897                 if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 2))
1898                         vcn_v4_0_unified_ring_vm_funcs.secure_submission_supported = true;
1899
1900                 adev->vcn.inst[i].ring_enc[0].funcs =
1901                        (const struct amdgpu_ring_funcs *)&vcn_v4_0_unified_ring_vm_funcs;
1902                 adev->vcn.inst[i].ring_enc[0].me = i;
1903
1904                 DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i);
1905         }
1906 }
1907
1908 /**
1909  * vcn_v4_0_is_idle - check VCN block is idle
1910  *
1911  * @handle: amdgpu_device pointer
1912  *
1913  * Check whether VCN block is idle
1914  */
1915 static bool vcn_v4_0_is_idle(void *handle)
1916 {
1917         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1918         int i, ret = 1;
1919
1920         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1921                 if (adev->vcn.harvest_config & (1 << i))
1922                         continue;
1923
1924                 ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE);
1925         }
1926
1927         return ret;
1928 }
1929
1930 /**
1931  * vcn_v4_0_wait_for_idle - wait for VCN block idle
1932  *
1933  * @handle: amdgpu_device pointer
1934  *
1935  * Wait for VCN block idle
1936  */
1937 static int vcn_v4_0_wait_for_idle(void *handle)
1938 {
1939         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1940         int i, ret = 0;
1941
1942         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1943                 if (adev->vcn.harvest_config & (1 << i))
1944                         continue;
1945
1946                 ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE,
1947                         UVD_STATUS__IDLE);
1948                 if (ret)
1949                         return ret;
1950         }
1951
1952         return ret;
1953 }
1954
1955 /**
1956  * vcn_v4_0_set_clockgating_state - set VCN block clockgating state
1957  *
1958  * @handle: amdgpu_device pointer
1959  * @state: clock gating state
1960  *
1961  * Set VCN block clockgating state
1962  */
1963 static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state)
1964 {
1965         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1966         bool enable = state == AMD_CG_STATE_GATE;
1967         int i;
1968
1969         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1970                 if (adev->vcn.harvest_config & (1 << i))
1971                         continue;
1972
1973                 if (enable) {
1974                         if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
1975                                 return -EBUSY;
1976                         vcn_v4_0_enable_clock_gating(adev, i);
1977                 } else {
1978                         vcn_v4_0_disable_clock_gating(adev, i);
1979                 }
1980         }
1981
1982         return 0;
1983 }
1984
1985 /**
1986  * vcn_v4_0_set_powergating_state - set VCN block powergating state
1987  *
1988  * @handle: amdgpu_device pointer
1989  * @state: power gating state
1990  *
1991  * Set VCN block powergating state
1992  */
1993 static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state)
1994 {
1995         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1996         int ret;
1997
1998         /* for SRIOV, guest should not control VCN Power-gating
1999          * MMSCH FW should control Power-gating and clock-gating
2000          * guest should avoid touching CGC and PG
2001          */
2002         if (amdgpu_sriov_vf(adev)) {
2003                 adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
2004                 return 0;
2005         }
2006
2007         if (state == adev->vcn.cur_state)
2008                 return 0;
2009
2010         if (state == AMD_PG_STATE_GATE)
2011                 ret = vcn_v4_0_stop(adev);
2012         else
2013                 ret = vcn_v4_0_start(adev);
2014
2015         if (!ret)
2016                 adev->vcn.cur_state = state;
2017
2018         return ret;
2019 }
2020
2021 /**
2022  * vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state
2023  *
2024  * @adev: amdgpu_device pointer
2025  * @source: interrupt sources
2026  * @type: interrupt types
2027  * @state: interrupt states
2028  *
2029  * Set VCN block RAS interrupt state
2030  */
2031 static int vcn_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
2032         struct amdgpu_irq_src *source,
2033         unsigned int type,
2034         enum amdgpu_interrupt_state state)
2035 {
2036         return 0;
2037 }
2038
2039 /**
2040  * vcn_v4_0_process_interrupt - process VCN block interrupt
2041  *
2042  * @adev: amdgpu_device pointer
2043  * @source: interrupt sources
2044  * @entry: interrupt entry from clients and sources
2045  *
2046  * Process VCN block interrupt
2047  */
2048 static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
2049       struct amdgpu_iv_entry *entry)
2050 {
2051         uint32_t ip_instance;
2052
2053         if (amdgpu_sriov_is_vcn_rb_decouple(adev)) {
2054                 ip_instance = entry->ring_id;
2055         } else {
2056                 switch (entry->client_id) {
2057                 case SOC15_IH_CLIENTID_VCN:
2058                         ip_instance = 0;
2059                         break;
2060                 case SOC15_IH_CLIENTID_VCN1:
2061                         ip_instance = 1;
2062                         break;
2063                 default:
2064                         DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
2065                         return 0;
2066                 }
2067         }
2068
2069         DRM_DEBUG("IH: VCN TRAP\n");
2070
2071         switch (entry->src_id) {
2072         case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
2073                 amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
2074                 break;
2075         default:
2076                 DRM_ERROR("Unhandled interrupt: %d %d\n",
2077                           entry->src_id, entry->src_data[0]);
2078                 break;
2079         }
2080
2081         return 0;
2082 }
2083
2084 static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = {
2085         .process = vcn_v4_0_process_interrupt,
2086 };
2087
2088 static const struct amdgpu_irq_src_funcs vcn_v4_0_ras_irq_funcs = {
2089         .set = vcn_v4_0_set_ras_interrupt_state,
2090         .process = amdgpu_vcn_process_poison_irq,
2091 };
2092
2093 /**
2094  * vcn_v4_0_set_irq_funcs - set VCN block interrupt irq functions
2095  *
2096  * @adev: amdgpu_device pointer
2097  *
2098  * Set VCN block interrupt irq functions
2099  */
2100 static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev)
2101 {
2102         int i;
2103
2104         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2105                 if (adev->vcn.harvest_config & (1 << i))
2106                         continue;
2107
2108                 adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
2109                 adev->vcn.inst[i].irq.funcs = &vcn_v4_0_irq_funcs;
2110
2111                 adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1;
2112                 adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v4_0_ras_irq_funcs;
2113         }
2114 }
2115
2116 static const struct amd_ip_funcs vcn_v4_0_ip_funcs = {
2117         .name = "vcn_v4_0",
2118         .early_init = vcn_v4_0_early_init,
2119         .late_init = NULL,
2120         .sw_init = vcn_v4_0_sw_init,
2121         .sw_fini = vcn_v4_0_sw_fini,
2122         .hw_init = vcn_v4_0_hw_init,
2123         .hw_fini = vcn_v4_0_hw_fini,
2124         .suspend = vcn_v4_0_suspend,
2125         .resume = vcn_v4_0_resume,
2126         .is_idle = vcn_v4_0_is_idle,
2127         .wait_for_idle = vcn_v4_0_wait_for_idle,
2128         .check_soft_reset = NULL,
2129         .pre_soft_reset = NULL,
2130         .soft_reset = NULL,
2131         .post_soft_reset = NULL,
2132         .set_clockgating_state = vcn_v4_0_set_clockgating_state,
2133         .set_powergating_state = vcn_v4_0_set_powergating_state,
2134         .dump_ip_state = NULL,
2135         .print_ip_state = NULL,
2136 };
2137
2138 const struct amdgpu_ip_block_version vcn_v4_0_ip_block = {
2139         .type = AMD_IP_BLOCK_TYPE_VCN,
2140         .major = 4,
2141         .minor = 0,
2142         .rev = 0,
2143         .funcs = &vcn_v4_0_ip_funcs,
2144 };
2145
2146 static uint32_t vcn_v4_0_query_poison_by_instance(struct amdgpu_device *adev,
2147                         uint32_t instance, uint32_t sub_block)
2148 {
2149         uint32_t poison_stat = 0, reg_value = 0;
2150
2151         switch (sub_block) {
2152         case AMDGPU_VCN_V4_0_VCPU_VCODEC:
2153                 reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
2154                 poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
2155                 break;
2156         default:
2157                 break;
2158         }
2159
2160         if (poison_stat)
2161                 dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
2162                         instance, sub_block);
2163
2164         return poison_stat;
2165 }
2166
2167 static bool vcn_v4_0_query_ras_poison_status(struct amdgpu_device *adev)
2168 {
2169         uint32_t inst, sub;
2170         uint32_t poison_stat = 0;
2171
2172         for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
2173                 for (sub = 0; sub < AMDGPU_VCN_V4_0_MAX_SUB_BLOCK; sub++)
2174                         poison_stat +=
2175                                 vcn_v4_0_query_poison_by_instance(adev, inst, sub);
2176
2177         return !!poison_stat;
2178 }
2179
2180 const struct amdgpu_ras_block_hw_ops vcn_v4_0_ras_hw_ops = {
2181         .query_poison_status = vcn_v4_0_query_ras_poison_status,
2182 };
2183
2184 static struct amdgpu_vcn_ras vcn_v4_0_ras = {
2185         .ras_block = {
2186                 .hw_ops = &vcn_v4_0_ras_hw_ops,
2187                 .ras_late_init = amdgpu_vcn_ras_late_init,
2188         },
2189 };
2190
2191 static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev)
2192 {
2193         switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
2194         case IP_VERSION(4, 0, 0):
2195                 adev->vcn.ras = &vcn_v4_0_ras;
2196                 break;
2197         default:
2198                 break;
2199         }
2200 }
This page took 0.167345 seconds and 4 git commands to generate.