]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
Merge tag 'opp-updates-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm
[linux.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26
27 #include <linux/firmware.h>
28 #include <drm/drm_drv.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "soc15_common.h"
35 #include "mmsch_v1_0.h"
36
37 #include "vce/vce_4_0_offset.h"
38 #include "vce/vce_4_0_default.h"
39 #include "vce/vce_4_0_sh_mask.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "mmhub/mmhub_1_0_sh_mask.h"
42
43 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44
45 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
46
47 #define VCE_V4_0_FW_SIZE        (384 * 1024)
48 #define VCE_V4_0_STACK_SIZE     (64 * 1024)
49 #define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50
51 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54
55 /**
56  * vce_v4_0_ring_get_rptr - get read pointer
57  *
58  * @ring: amdgpu_ring pointer
59  *
60  * Returns the current hardware read pointer
61  */
62 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63 {
64         struct amdgpu_device *adev = ring->adev;
65
66         if (ring->me == 0)
67                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
68         else if (ring->me == 1)
69                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70         else
71                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72 }
73
74 /**
75  * vce_v4_0_ring_get_wptr - get write pointer
76  *
77  * @ring: amdgpu_ring pointer
78  *
79  * Returns the current hardware write pointer
80  */
81 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82 {
83         struct amdgpu_device *adev = ring->adev;
84
85         if (ring->use_doorbell)
86                 return *ring->wptr_cpu_addr;
87
88         if (ring->me == 0)
89                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
90         else if (ring->me == 1)
91                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92         else
93                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94 }
95
96 /**
97  * vce_v4_0_ring_set_wptr - set write pointer
98  *
99  * @ring: amdgpu_ring pointer
100  *
101  * Commits the write pointer to the hardware
102  */
103 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104 {
105         struct amdgpu_device *adev = ring->adev;
106
107         if (ring->use_doorbell) {
108                 /* XXX check if swapping is necessary on BE */
109                 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
110                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111                 return;
112         }
113
114         if (ring->me == 0)
115                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116                         lower_32_bits(ring->wptr));
117         else if (ring->me == 1)
118                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119                         lower_32_bits(ring->wptr));
120         else
121                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122                         lower_32_bits(ring->wptr));
123 }
124
125 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126 {
127         int i, j;
128
129         for (i = 0; i < 10; ++i) {
130                 for (j = 0; j < 100; ++j) {
131                         uint32_t status =
132                                 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133
134                         if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135                                 return 0;
136                         mdelay(10);
137                 }
138
139                 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141                                 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143                 mdelay(10);
144                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146                 mdelay(10);
147
148         }
149
150         return -ETIMEDOUT;
151 }
152
153 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154                                 struct amdgpu_mm_table *table)
155 {
156         uint32_t data = 0, loop;
157         uint64_t addr = table->gpu_addr;
158         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159         uint32_t size;
160
161         size = header->header_size + header->vce_table_size + header->uvd_table_size;
162
163         /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166
167         /* 2, update vmid of descriptor */
168         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169         data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170         data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172
173         /* 3, notify mmsch about the size of this descriptor */
174         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175
176         /* 4, set resp to zero */
177         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178
179         WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
180         *adev->vce.ring[0].wptr_cpu_addr = 0;
181         adev->vce.ring[0].wptr = 0;
182         adev->vce.ring[0].wptr_old = 0;
183
184         /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186
187         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188         loop = 1000;
189         while ((data & 0x10000002) != 0x10000002) {
190                 udelay(10);
191                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192                 loop--;
193                 if (!loop)
194                         break;
195         }
196
197         if (!loop) {
198                 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199                 return -EBUSY;
200         }
201
202         return 0;
203 }
204
205 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206 {
207         struct amdgpu_ring *ring;
208         uint32_t offset, size;
209         uint32_t table_size = 0;
210         struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
211         struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
212         struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
213         struct mmsch_v1_0_cmd_end end = { { 0 } };
214         uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216
217         direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218         direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219         direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220         end.cmd_header.command_type = MMSCH_COMMAND__END;
221
222         if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223                 header->version = MMSCH_VERSION;
224                 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225
226                 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227                         header->vce_table_offset = header->header_size;
228                 else
229                         header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230
231                 init_table += header->vce_table_offset;
232
233                 ring = &adev->vce.ring[0];
234                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235                                             lower_32_bits(ring->gpu_addr));
236                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237                                             upper_32_bits(ring->gpu_addr));
238                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239                                             ring->ring_size / 4);
240
241                 /* BEGING OF MC_RESUME */
242                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247
248                 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250                         uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251                         uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252                         uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253
254                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
256                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258                                                 (tmr_mc_addr >> 40) & 0xff);
259                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260                 } else {
261                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
262                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263                                                 adev->vce.gpu_addr >> 8);
264                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
265                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
266                                                 (adev->vce.gpu_addr >> 40) & 0xff);
267                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268                                                 offset & ~0x0f000000);
269
270                 }
271                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
273                                                 adev->vce.gpu_addr >> 8);
274                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
275                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
276                                                 (adev->vce.gpu_addr >> 40) & 0xff);
277                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
279                                                 adev->vce.gpu_addr >> 8);
280                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
282                                                 (adev->vce.gpu_addr >> 40) & 0xff);
283
284                 size = VCE_V4_0_FW_SIZE;
285                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286
287                 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288                 size = VCE_V4_0_STACK_SIZE;
289                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
290                                         (offset & ~0x0f000000) | (1 << 24));
291                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292
293                 offset += size;
294                 size = VCE_V4_0_DATA_SIZE;
295                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
296                                         (offset & ~0x0f000000) | (2 << 24));
297                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298
299                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303
304                 /* end of MC_RESUME */
305                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
306                                                    VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308                                                    ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310                                                    ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311
312                 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315
316                 /* clear BUSY flag */
317                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318                                                    ~VCE_STATUS__JOB_BUSY_MASK, 0);
319
320                 /* add end packet */
321                 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322                 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323                 header->vce_table_size = table_size;
324         }
325
326         return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327 }
328
329 /**
330  * vce_v4_0_start - start VCE block
331  *
332  * @adev: amdgpu_device pointer
333  *
334  * Setup and start the VCE block
335  */
336 static int vce_v4_0_start(struct amdgpu_device *adev)
337 {
338         struct amdgpu_ring *ring;
339         int r;
340
341         ring = &adev->vce.ring[0];
342
343         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348
349         ring = &adev->vce.ring[1];
350
351         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356
357         ring = &adev->vce.ring[2];
358
359         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364
365         vce_v4_0_mc_resume(adev);
366         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367                         ~VCE_STATUS__JOB_BUSY_MASK);
368
369         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370
371         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373         mdelay(100);
374
375         r = vce_v4_0_firmware_loaded(adev);
376
377         /* clear BUSY flag */
378         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379
380         if (r) {
381                 DRM_ERROR("VCE not responding, giving up!!!\n");
382                 return r;
383         }
384
385         return 0;
386 }
387
388 static int vce_v4_0_stop(struct amdgpu_device *adev)
389 {
390
391         /* Disable VCPU */
392         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393
394         /* hold on ECPU */
395         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396                         VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398
399         /* clear VCE_STATUS */
400         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401
402         /* Set Clock-Gating off */
403         /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404                 vce_v4_0_set_vce_sw_clock_gating(adev, false);
405         */
406
407         return 0;
408 }
409
410 static int vce_v4_0_early_init(void *handle)
411 {
412         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413
414         if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415                 adev->vce.num_rings = 1;
416         else
417                 adev->vce.num_rings = 3;
418
419         vce_v4_0_set_ring_funcs(adev);
420         vce_v4_0_set_irq_funcs(adev);
421
422         return 0;
423 }
424
425 static int vce_v4_0_sw_init(void *handle)
426 {
427         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428         struct amdgpu_ring *ring;
429
430         unsigned size;
431         int r, i;
432
433         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434         if (r)
435                 return r;
436
437         size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439                 size += VCE_V4_0_FW_SIZE;
440
441         r = amdgpu_vce_sw_init(adev, size);
442         if (r)
443                 return r;
444
445         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446                 const struct common_firmware_header *hdr;
447                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448
449                 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450                 if (!adev->vce.saved_bo)
451                         return -ENOMEM;
452
453                 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456                 adev->firmware.fw_size +=
457                         ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458                 DRM_INFO("PSP loading VCE firmware\n");
459         } else {
460                 r = amdgpu_vce_resume(adev);
461                 if (r)
462                         return r;
463         }
464
465         for (i = 0; i < adev->vce.num_rings; i++) {
466                 enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
467
468                 ring = &adev->vce.ring[i];
469                 sprintf(ring->name, "vce%d", i);
470                 if (amdgpu_sriov_vf(adev)) {
471                         /* DOORBELL only works under SRIOV */
472                         ring->use_doorbell = true;
473
474                         /* currently only use the first encoding ring for sriov,
475                          * so set unused location for other unused rings.
476                          */
477                         if (i == 0)
478                                 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
479                         else
480                                 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
481                 }
482                 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
483                                      hw_prio, NULL);
484                 if (r)
485                         return r;
486         }
487
488
489         r = amdgpu_vce_entity_init(adev);
490         if (r)
491                 return r;
492
493         r = amdgpu_virt_alloc_mm_table(adev);
494         if (r)
495                 return r;
496
497         return r;
498 }
499
500 static int vce_v4_0_sw_fini(void *handle)
501 {
502         int r;
503         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
504
505         /* free MM table */
506         amdgpu_virt_free_mm_table(adev);
507
508         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
509                 kvfree(adev->vce.saved_bo);
510                 adev->vce.saved_bo = NULL;
511         }
512
513         r = amdgpu_vce_suspend(adev);
514         if (r)
515                 return r;
516
517         return amdgpu_vce_sw_fini(adev);
518 }
519
520 static int vce_v4_0_hw_init(void *handle)
521 {
522         int r, i;
523         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
524
525         if (amdgpu_sriov_vf(adev))
526                 r = vce_v4_0_sriov_start(adev);
527         else
528                 r = vce_v4_0_start(adev);
529         if (r)
530                 return r;
531
532         for (i = 0; i < adev->vce.num_rings; i++) {
533                 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
534                 if (r)
535                         return r;
536         }
537
538         DRM_INFO("VCE initialized successfully.\n");
539
540         return 0;
541 }
542
543 static int vce_v4_0_hw_fini(void *handle)
544 {
545         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
546
547         cancel_delayed_work_sync(&adev->vce.idle_work);
548
549         if (!amdgpu_sriov_vf(adev)) {
550                 /* vce_v4_0_wait_for_idle(handle); */
551                 vce_v4_0_stop(adev);
552         } else {
553                 /* full access mode, so don't touch any VCE register */
554                 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
555         }
556
557         return 0;
558 }
559
560 static int vce_v4_0_suspend(void *handle)
561 {
562         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
563         int r, idx;
564
565         if (adev->vce.vcpu_bo == NULL)
566                 return 0;
567
568         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
569                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
570                         unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
571                         void *ptr = adev->vce.cpu_addr;
572
573                         memcpy_fromio(adev->vce.saved_bo, ptr, size);
574                 }
575                 drm_dev_exit(idx);
576         }
577
578         /*
579          * Proper cleanups before halting the HW engine:
580          *   - cancel the delayed idle work
581          *   - enable powergating
582          *   - enable clockgating
583          *   - disable dpm
584          *
585          * TODO: to align with the VCN implementation, move the
586          * jobs for clockgating/powergating/dpm setting to
587          * ->set_powergating_state().
588          */
589         cancel_delayed_work_sync(&adev->vce.idle_work);
590
591         if (adev->pm.dpm_enabled) {
592                 amdgpu_dpm_enable_vce(adev, false);
593         } else {
594                 amdgpu_asic_set_vce_clocks(adev, 0, 0);
595                 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
596                                                        AMD_PG_STATE_GATE);
597                 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
598                                                        AMD_CG_STATE_GATE);
599         }
600
601         r = vce_v4_0_hw_fini(adev);
602         if (r)
603                 return r;
604
605         return amdgpu_vce_suspend(adev);
606 }
607
608 static int vce_v4_0_resume(void *handle)
609 {
610         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
611         int r, idx;
612
613         if (adev->vce.vcpu_bo == NULL)
614                 return -EINVAL;
615
616         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
617
618                 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
619                         unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
620                         void *ptr = adev->vce.cpu_addr;
621
622                         memcpy_toio(ptr, adev->vce.saved_bo, size);
623                         drm_dev_exit(idx);
624                 }
625         } else {
626                 r = amdgpu_vce_resume(adev);
627                 if (r)
628                         return r;
629         }
630
631         return vce_v4_0_hw_init(adev);
632 }
633
634 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
635 {
636         uint32_t offset, size;
637         uint64_t tmr_mc_addr;
638
639         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
640         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
641         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
642         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
643
644         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
645         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
646         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
647         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
648         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
649
650         offset = AMDGPU_VCE_FIRMWARE_OFFSET;
651
652         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
653                 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
654                                                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
655                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
656                         (tmr_mc_addr >> 8));
657                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
658                         (tmr_mc_addr >> 40) & 0xff);
659                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
660         } else {
661                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
662                         (adev->vce.gpu_addr >> 8));
663                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
664                         (adev->vce.gpu_addr >> 40) & 0xff);
665                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
666         }
667
668         size = VCE_V4_0_FW_SIZE;
669         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
670
671         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
672         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
673         offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
674         size = VCE_V4_0_STACK_SIZE;
675         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
676         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
677
678         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
679         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
680         offset += size;
681         size = VCE_V4_0_DATA_SIZE;
682         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
683         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
684
685         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
686         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
687                         VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
688                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
689 }
690
691 static int vce_v4_0_set_clockgating_state(void *handle,
692                                           enum amd_clockgating_state state)
693 {
694         /* needed for driver unload*/
695         return 0;
696 }
697
698 #if 0
699 static bool vce_v4_0_is_idle(void *handle)
700 {
701         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
702         u32 mask = 0;
703
704         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
705         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
706
707         return !(RREG32(mmSRBM_STATUS2) & mask);
708 }
709
710 static int vce_v4_0_wait_for_idle(void *handle)
711 {
712         unsigned i;
713         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
714
715         for (i = 0; i < adev->usec_timeout; i++)
716                 if (vce_v4_0_is_idle(handle))
717                         return 0;
718
719         return -ETIMEDOUT;
720 }
721
722 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
723 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
724 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
725 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
726                                       VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
727
728 static bool vce_v4_0_check_soft_reset(void *handle)
729 {
730         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
731         u32 srbm_soft_reset = 0;
732
733         /* According to VCE team , we should use VCE_STATUS instead
734          * SRBM_STATUS.VCE_BUSY bit for busy status checking.
735          * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
736          * instance's registers are accessed
737          * (0 for 1st instance, 10 for 2nd instance).
738          *
739          *VCE_STATUS
740          *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
741          *|----+----+-----------+----+----+----+----------+---------+----|
742          *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
743          *
744          * VCE team suggest use bit 3--bit 6 for busy status check
745          */
746         mutex_lock(&adev->grbm_idx_mutex);
747         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
748         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
749                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
750                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
751         }
752         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
753         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
754                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
755                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
756         }
757         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
758         mutex_unlock(&adev->grbm_idx_mutex);
759
760         if (srbm_soft_reset) {
761                 adev->vce.srbm_soft_reset = srbm_soft_reset;
762                 return true;
763         } else {
764                 adev->vce.srbm_soft_reset = 0;
765                 return false;
766         }
767 }
768
769 static int vce_v4_0_soft_reset(void *handle)
770 {
771         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
772         u32 srbm_soft_reset;
773
774         if (!adev->vce.srbm_soft_reset)
775                 return 0;
776         srbm_soft_reset = adev->vce.srbm_soft_reset;
777
778         if (srbm_soft_reset) {
779                 u32 tmp;
780
781                 tmp = RREG32(mmSRBM_SOFT_RESET);
782                 tmp |= srbm_soft_reset;
783                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
784                 WREG32(mmSRBM_SOFT_RESET, tmp);
785                 tmp = RREG32(mmSRBM_SOFT_RESET);
786
787                 udelay(50);
788
789                 tmp &= ~srbm_soft_reset;
790                 WREG32(mmSRBM_SOFT_RESET, tmp);
791                 tmp = RREG32(mmSRBM_SOFT_RESET);
792
793                 /* Wait a little for things to settle down */
794                 udelay(50);
795         }
796
797         return 0;
798 }
799
800 static int vce_v4_0_pre_soft_reset(void *handle)
801 {
802         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
803
804         if (!adev->vce.srbm_soft_reset)
805                 return 0;
806
807         mdelay(5);
808
809         return vce_v4_0_suspend(adev);
810 }
811
812
813 static int vce_v4_0_post_soft_reset(void *handle)
814 {
815         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
816
817         if (!adev->vce.srbm_soft_reset)
818                 return 0;
819
820         mdelay(5);
821
822         return vce_v4_0_resume(adev);
823 }
824
825 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
826 {
827         u32 tmp, data;
828
829         tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
830         if (override)
831                 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
832         else
833                 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
834
835         if (tmp != data)
836                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
837 }
838
839 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
840                                              bool gated)
841 {
842         u32 data;
843
844         /* Set Override to disable Clock Gating */
845         vce_v4_0_override_vce_clock_gating(adev, true);
846
847         /* This function enables MGCG which is controlled by firmware.
848            With the clocks in the gated state the core is still
849            accessible but the firmware will throttle the clocks on the
850            fly as necessary.
851         */
852         if (gated) {
853                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
854                 data |= 0x1ff;
855                 data &= ~0xef0000;
856                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
857
858                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
859                 data |= 0x3ff000;
860                 data &= ~0xffc00000;
861                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
862
863                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
864                 data |= 0x2;
865                 data &= ~0x00010000;
866                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
867
868                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
869                 data |= 0x37f;
870                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
871
872                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
873                 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
874                         VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
875                         VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
876                         0x8;
877                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
878         } else {
879                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
880                 data &= ~0x80010;
881                 data |= 0xe70008;
882                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
883
884                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
885                 data |= 0xffc00000;
886                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
887
888                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
889                 data |= 0x10000;
890                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
891
892                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
893                 data &= ~0xffc00000;
894                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
895
896                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
897                 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
898                           VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
899                           VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
900                           0x8);
901                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
902         }
903         vce_v4_0_override_vce_clock_gating(adev, false);
904 }
905
906 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
907 {
908         u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
909
910         if (enable)
911                 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
912         else
913                 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
914
915         WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
916 }
917
918 static int vce_v4_0_set_clockgating_state(void *handle,
919                                           enum amd_clockgating_state state)
920 {
921         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
922         bool enable = (state == AMD_CG_STATE_GATE);
923         int i;
924
925         if ((adev->asic_type == CHIP_POLARIS10) ||
926                 (adev->asic_type == CHIP_TONGA) ||
927                 (adev->asic_type == CHIP_FIJI))
928                 vce_v4_0_set_bypass_mode(adev, enable);
929
930         if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
931                 return 0;
932
933         mutex_lock(&adev->grbm_idx_mutex);
934         for (i = 0; i < 2; i++) {
935                 /* Program VCE Instance 0 or 1 if not harvested */
936                 if (adev->vce.harvest_config & (1 << i))
937                         continue;
938
939                 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
940
941                 if (enable) {
942                         /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
943                         uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
944                         data &= ~(0xf | 0xff0);
945                         data |= ((0x0 << 0) | (0x04 << 4));
946                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
947
948                         /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
949                         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
950                         data &= ~(0xf | 0xff0);
951                         data |= ((0x0 << 0) | (0x04 << 4));
952                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
953                 }
954
955                 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
956         }
957
958         WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
959         mutex_unlock(&adev->grbm_idx_mutex);
960
961         return 0;
962 }
963 #endif
964
965 static int vce_v4_0_set_powergating_state(void *handle,
966                                           enum amd_powergating_state state)
967 {
968         /* This doesn't actually powergate the VCE block.
969          * That's done in the dpm code via the SMC.  This
970          * just re-inits the block as necessary.  The actual
971          * gating still happens in the dpm code.  We should
972          * revisit this when there is a cleaner line between
973          * the smc and the hw blocks
974          */
975         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
976
977         if (state == AMD_PG_STATE_GATE)
978                 return vce_v4_0_stop(adev);
979         else
980                 return vce_v4_0_start(adev);
981 }
982
983 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
984                                         struct amdgpu_ib *ib, uint32_t flags)
985 {
986         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
987
988         amdgpu_ring_write(ring, VCE_CMD_IB_VM);
989         amdgpu_ring_write(ring, vmid);
990         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
991         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
992         amdgpu_ring_write(ring, ib->length_dw);
993 }
994
995 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
996                         u64 seq, unsigned flags)
997 {
998         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
999
1000         amdgpu_ring_write(ring, VCE_CMD_FENCE);
1001         amdgpu_ring_write(ring, addr);
1002         amdgpu_ring_write(ring, upper_32_bits(addr));
1003         amdgpu_ring_write(ring, seq);
1004         amdgpu_ring_write(ring, VCE_CMD_TRAP);
1005 }
1006
1007 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1008 {
1009         amdgpu_ring_write(ring, VCE_CMD_END);
1010 }
1011
1012 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1013                                    uint32_t val, uint32_t mask)
1014 {
1015         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1016         amdgpu_ring_write(ring, reg << 2);
1017         amdgpu_ring_write(ring, mask);
1018         amdgpu_ring_write(ring, val);
1019 }
1020
1021 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1022                                    unsigned int vmid, uint64_t pd_addr)
1023 {
1024         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1025
1026         pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1027
1028         /* wait for reg writes */
1029         vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1030                                vmid * hub->ctx_addr_distance,
1031                                lower_32_bits(pd_addr), 0xffffffff);
1032 }
1033
1034 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1035                                uint32_t reg, uint32_t val)
1036 {
1037         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1038         amdgpu_ring_write(ring, reg << 2);
1039         amdgpu_ring_write(ring, val);
1040 }
1041
1042 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1043                                         struct amdgpu_irq_src *source,
1044                                         unsigned type,
1045                                         enum amdgpu_interrupt_state state)
1046 {
1047         uint32_t val = 0;
1048
1049         if (!amdgpu_sriov_vf(adev)) {
1050                 if (state == AMDGPU_IRQ_STATE_ENABLE)
1051                         val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1052
1053                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1054                                 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1055         }
1056         return 0;
1057 }
1058
1059 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1060                                       struct amdgpu_irq_src *source,
1061                                       struct amdgpu_iv_entry *entry)
1062 {
1063         DRM_DEBUG("IH: VCE\n");
1064
1065         switch (entry->src_data[0]) {
1066         case 0:
1067         case 1:
1068         case 2:
1069                 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1070                 break;
1071         default:
1072                 DRM_ERROR("Unhandled interrupt: %d %d\n",
1073                           entry->src_id, entry->src_data[0]);
1074                 break;
1075         }
1076
1077         return 0;
1078 }
1079
1080 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1081         .name = "vce_v4_0",
1082         .early_init = vce_v4_0_early_init,
1083         .late_init = NULL,
1084         .sw_init = vce_v4_0_sw_init,
1085         .sw_fini = vce_v4_0_sw_fini,
1086         .hw_init = vce_v4_0_hw_init,
1087         .hw_fini = vce_v4_0_hw_fini,
1088         .suspend = vce_v4_0_suspend,
1089         .resume = vce_v4_0_resume,
1090         .is_idle = NULL /* vce_v4_0_is_idle */,
1091         .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1092         .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1093         .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1094         .soft_reset = NULL /* vce_v4_0_soft_reset */,
1095         .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1096         .set_clockgating_state = vce_v4_0_set_clockgating_state,
1097         .set_powergating_state = vce_v4_0_set_powergating_state,
1098 };
1099
1100 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1101         .type = AMDGPU_RING_TYPE_VCE,
1102         .align_mask = 0x3f,
1103         .nop = VCE_CMD_NO_OP,
1104         .support_64bit_ptrs = false,
1105         .no_user_fence = true,
1106         .vmhub = AMDGPU_MMHUB_0,
1107         .get_rptr = vce_v4_0_ring_get_rptr,
1108         .get_wptr = vce_v4_0_ring_get_wptr,
1109         .set_wptr = vce_v4_0_ring_set_wptr,
1110         .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1111         .emit_frame_size =
1112                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1113                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1114                 4 + /* vce_v4_0_emit_vm_flush */
1115                 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1116                 1, /* vce_v4_0_ring_insert_end */
1117         .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1118         .emit_ib = vce_v4_0_ring_emit_ib,
1119         .emit_vm_flush = vce_v4_0_emit_vm_flush,
1120         .emit_fence = vce_v4_0_ring_emit_fence,
1121         .test_ring = amdgpu_vce_ring_test_ring,
1122         .test_ib = amdgpu_vce_ring_test_ib,
1123         .insert_nop = amdgpu_ring_insert_nop,
1124         .insert_end = vce_v4_0_ring_insert_end,
1125         .pad_ib = amdgpu_ring_generic_pad_ib,
1126         .begin_use = amdgpu_vce_ring_begin_use,
1127         .end_use = amdgpu_vce_ring_end_use,
1128         .emit_wreg = vce_v4_0_emit_wreg,
1129         .emit_reg_wait = vce_v4_0_emit_reg_wait,
1130         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1131 };
1132
1133 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1134 {
1135         int i;
1136
1137         for (i = 0; i < adev->vce.num_rings; i++) {
1138                 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1139                 adev->vce.ring[i].me = i;
1140         }
1141         DRM_INFO("VCE enabled in VM mode\n");
1142 }
1143
1144 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1145         .set = vce_v4_0_set_interrupt_state,
1146         .process = vce_v4_0_process_interrupt,
1147 };
1148
1149 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1150 {
1151         adev->vce.irq.num_types = 1;
1152         adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1153 };
1154
1155 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1156 {
1157         .type = AMD_IP_BLOCK_TYPE_VCE,
1158         .major = 4,
1159         .minor = 0,
1160         .rev = 0,
1161         .funcs = &vce_v4_0_ip_funcs,
1162 };
This page took 0.104065 seconds and 4 git commands to generate.