]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
Merge remote-tracking branch 'mauro-exp/docbook3' into death-to-docbook
[linux.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34
35 #include "vega10/soc15ip.h"
36 #include "vega10/VCE/vce_4_0_offset.h"
37 #include "vega10/VCE/vce_4_0_default.h"
38 #include "vega10/VCE/vce_4_0_sh_mask.h"
39 #include "vega10/MMHUB/mmhub_1_0_offset.h"
40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
43
44 #define VCE_V4_0_FW_SIZE        (384 * 1024)
45 #define VCE_V4_0_STACK_SIZE     (64 * 1024)
46 #define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51
52 /**
53  * vce_v4_0_ring_get_rptr - get read pointer
54  *
55  * @ring: amdgpu_ring pointer
56  *
57  * Returns the current hardware read pointer
58  */
59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60 {
61         struct amdgpu_device *adev = ring->adev;
62
63         if (ring == &adev->vce.ring[0])
64                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65         else if (ring == &adev->vce.ring[1])
66                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67         else
68                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69 }
70
71 /**
72  * vce_v4_0_ring_get_wptr - get write pointer
73  *
74  * @ring: amdgpu_ring pointer
75  *
76  * Returns the current hardware write pointer
77  */
78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79 {
80         struct amdgpu_device *adev = ring->adev;
81
82         if (ring->use_doorbell)
83                 return adev->wb.wb[ring->wptr_offs];
84
85         if (ring == &adev->vce.ring[0])
86                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87         else if (ring == &adev->vce.ring[1])
88                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89         else
90                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91 }
92
93 /**
94  * vce_v4_0_ring_set_wptr - set write pointer
95  *
96  * @ring: amdgpu_ring pointer
97  *
98  * Commits the write pointer to the hardware
99  */
100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101 {
102         struct amdgpu_device *adev = ring->adev;
103
104         if (ring->use_doorbell) {
105                 /* XXX check if swapping is necessary on BE */
106                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108                 return;
109         }
110
111         if (ring == &adev->vce.ring[0])
112                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113                         lower_32_bits(ring->wptr));
114         else if (ring == &adev->vce.ring[1])
115                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116                         lower_32_bits(ring->wptr));
117         else
118                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119                         lower_32_bits(ring->wptr));
120 }
121
122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123 {
124         int i, j;
125
126         for (i = 0; i < 10; ++i) {
127                 for (j = 0; j < 100; ++j) {
128                         uint32_t status =
129                                 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130
131                         if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132                                 return 0;
133                         mdelay(10);
134                 }
135
136                 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138                                 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140                 mdelay(10);
141                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143                 mdelay(10);
144
145         }
146
147         return -ETIMEDOUT;
148 }
149
150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151                                 struct amdgpu_mm_table *table)
152 {
153         uint32_t data = 0, loop;
154         uint64_t addr = table->gpu_addr;
155         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156         uint32_t size;
157
158         size = header->header_size + header->vce_table_size + header->uvd_table_size;
159
160         /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163
164         /* 2, update vmid of descriptor */
165         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166         data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167         data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169
170         /* 3, notify mmsch about the size of this descriptor */
171         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172
173         /* 4, set resp to zero */
174         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175
176         /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
177         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
178
179         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
180         loop = 1000;
181         while ((data & 0x10000002) != 0x10000002) {
182                 udelay(10);
183                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
184                 loop--;
185                 if (!loop)
186                         break;
187         }
188
189         if (!loop) {
190                 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
191                 return -EBUSY;
192         }
193
194         return 0;
195 }
196
197 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
198 {
199         struct amdgpu_ring *ring;
200         uint32_t offset, size;
201         uint32_t table_size = 0;
202         struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
203         struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
204         struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
205         struct mmsch_v1_0_cmd_end end = { { 0 } };
206         uint32_t *init_table = adev->virt.mm_table.cpu_addr;
207         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
208
209         direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
210         direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
211         direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
212         end.cmd_header.command_type = MMSCH_COMMAND__END;
213
214         if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
215                 header->version = MMSCH_VERSION;
216                 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
217
218                 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
219                         header->vce_table_offset = header->header_size;
220                 else
221                         header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
222
223                 init_table += header->vce_table_offset;
224
225                 ring = &adev->vce.ring[0];
226                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
227                                             lower_32_bits(ring->gpu_addr));
228                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
229                                             upper_32_bits(ring->gpu_addr));
230                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
231                                             ring->ring_size / 4);
232
233                 /* BEGING OF MC_RESUME */
234                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
235                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
236                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
237                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
238                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
239
240                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
241                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
242                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
243                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
244                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
245                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
246                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
247                 } else {
248                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
249                                                 adev->vce.gpu_addr >> 8);
250                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
251                                                 adev->vce.gpu_addr >> 8);
252                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
253                                                 adev->vce.gpu_addr >> 8);
254                 }
255
256                 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
257                 size = VCE_V4_0_FW_SIZE;
258                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
259                                             offset & 0x7FFFFFFF);
260                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
261
262                 offset += size;
263                 size = VCE_V4_0_STACK_SIZE;
264                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
265                                             offset & 0x7FFFFFFF);
266                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
267
268                 offset += size;
269                 size = VCE_V4_0_DATA_SIZE;
270                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
271                                             offset & 0x7FFFFFFF);
272                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
273
274                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
275                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
276                                                    0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
277
278                 /* end of MC_RESUME */
279                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
280                                                    VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
281                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
282                                                    ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
283                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
284                                                    ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
285
286                 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
287                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
288                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
289
290                 /* clear BUSY flag */
291                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
292                                                    ~VCE_STATUS__JOB_BUSY_MASK, 0);
293
294                 /* add end packet */
295                 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
296                 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
297                 header->vce_table_size = table_size;
298
299                 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
300         }
301
302         return -EINVAL; /* already initializaed ? */
303 }
304
305 /**
306  * vce_v4_0_start - start VCE block
307  *
308  * @adev: amdgpu_device pointer
309  *
310  * Setup and start the VCE block
311  */
312 static int vce_v4_0_start(struct amdgpu_device *adev)
313 {
314         struct amdgpu_ring *ring;
315         int r;
316
317         ring = &adev->vce.ring[0];
318
319         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
320         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
321         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
322         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
323         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
324
325         ring = &adev->vce.ring[1];
326
327         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
328         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
329         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
330         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
331         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
332
333         ring = &adev->vce.ring[2];
334
335         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
336         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
337         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
338         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
339         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
340
341         vce_v4_0_mc_resume(adev);
342         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
343                         ~VCE_STATUS__JOB_BUSY_MASK);
344
345         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
346
347         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
348                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
349         mdelay(100);
350
351         r = vce_v4_0_firmware_loaded(adev);
352
353         /* clear BUSY flag */
354         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
355
356         if (r) {
357                 DRM_ERROR("VCE not responding, giving up!!!\n");
358                 return r;
359         }
360
361         return 0;
362 }
363
364 static int vce_v4_0_stop(struct amdgpu_device *adev)
365 {
366
367         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
368
369         /* hold on ECPU */
370         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
371                         VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
372                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373
374         /* clear BUSY flag */
375         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
376
377         /* Set Clock-Gating off */
378         /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
379                 vce_v4_0_set_vce_sw_clock_gating(adev, false);
380         */
381
382         return 0;
383 }
384
385 static int vce_v4_0_early_init(void *handle)
386 {
387         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
388
389         if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
390                 adev->vce.num_rings = 1;
391         else
392                 adev->vce.num_rings = 3;
393
394         vce_v4_0_set_ring_funcs(adev);
395         vce_v4_0_set_irq_funcs(adev);
396
397         return 0;
398 }
399
400 static int vce_v4_0_sw_init(void *handle)
401 {
402         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
403         struct amdgpu_ring *ring;
404         unsigned size;
405         int r, i;
406
407         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
408         if (r)
409                 return r;
410
411         size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
412         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
413                 size += VCE_V4_0_FW_SIZE;
414
415         r = amdgpu_vce_sw_init(adev, size);
416         if (r)
417                 return r;
418
419         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
420                 const struct common_firmware_header *hdr;
421                 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
422                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
423                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
424                 adev->firmware.fw_size +=
425                         ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
426                 DRM_INFO("PSP loading VCE firmware\n");
427         }
428
429         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
430                 r = amdgpu_vce_resume(adev);
431                 if (r)
432                         return r;
433         }
434
435         for (i = 0; i < adev->vce.num_rings; i++) {
436                 ring = &adev->vce.ring[i];
437                 sprintf(ring->name, "vce%d", i);
438                 if (amdgpu_sriov_vf(adev)) {
439                         /* DOORBELL only works under SRIOV */
440                         ring->use_doorbell = true;
441                         if (i == 0)
442                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2;
443                         else if (i == 1)
444                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2;
445                         else
446                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1;
447                 }
448                 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
449                 if (r)
450                         return r;
451         }
452
453         r = amdgpu_virt_alloc_mm_table(adev);
454         if (r)
455                 return r;
456
457         return r;
458 }
459
460 static int vce_v4_0_sw_fini(void *handle)
461 {
462         int r;
463         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
464
465         /* free MM table */
466         amdgpu_virt_free_mm_table(adev);
467
468         r = amdgpu_vce_suspend(adev);
469         if (r)
470                 return r;
471
472         return amdgpu_vce_sw_fini(adev);
473 }
474
475 static int vce_v4_0_hw_init(void *handle)
476 {
477         int r, i;
478         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
479
480         if (amdgpu_sriov_vf(adev))
481                 r = vce_v4_0_sriov_start(adev);
482         else
483                 r = vce_v4_0_start(adev);
484         if (r)
485                 return r;
486
487         for (i = 0; i < adev->vce.num_rings; i++)
488                 adev->vce.ring[i].ready = false;
489
490         for (i = 0; i < adev->vce.num_rings; i++) {
491                 r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
492                 if (r)
493                         return r;
494                 else
495                         adev->vce.ring[i].ready = true;
496         }
497
498         DRM_INFO("VCE initialized successfully.\n");
499
500         return 0;
501 }
502
503 static int vce_v4_0_hw_fini(void *handle)
504 {
505         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
506         int i;
507
508         /* vce_v4_0_wait_for_idle(handle); */
509         vce_v4_0_stop(adev);
510         for (i = 0; i < adev->vce.num_rings; i++)
511                 adev->vce.ring[i].ready = false;
512
513         return 0;
514 }
515
516 static int vce_v4_0_suspend(void *handle)
517 {
518         int r;
519         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
520
521         r = vce_v4_0_hw_fini(adev);
522         if (r)
523                 return r;
524
525         return amdgpu_vce_suspend(adev);
526 }
527
528 static int vce_v4_0_resume(void *handle)
529 {
530         int r;
531         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
532
533         r = amdgpu_vce_resume(adev);
534         if (r)
535                 return r;
536
537         return vce_v4_0_hw_init(adev);
538 }
539
540 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
541 {
542         uint32_t offset, size;
543
544         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
545         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
546         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
547         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
548
549         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
550         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
551         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
552         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
553         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
554
555         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
556                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
557                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
558                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
559                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
560         } else {
561                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
562                         (adev->vce.gpu_addr >> 8));
563                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
564                         (adev->vce.gpu_addr >> 40) & 0xff);
565         }
566
567         offset = AMDGPU_VCE_FIRMWARE_OFFSET;
568         size = VCE_V4_0_FW_SIZE;
569         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
570         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
571
572         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
573         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
574         offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
575         size = VCE_V4_0_STACK_SIZE;
576         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
577         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
578
579         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
580         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
581         offset += size;
582         size = VCE_V4_0_DATA_SIZE;
583         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
584         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
585
586         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
587         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
588                         VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
589                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
590 }
591
592 static int vce_v4_0_set_clockgating_state(void *handle,
593                                           enum amd_clockgating_state state)
594 {
595         /* needed for driver unload*/
596         return 0;
597 }
598
599 #if 0
600 static bool vce_v4_0_is_idle(void *handle)
601 {
602         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
603         u32 mask = 0;
604
605         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
606         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
607
608         return !(RREG32(mmSRBM_STATUS2) & mask);
609 }
610
611 static int vce_v4_0_wait_for_idle(void *handle)
612 {
613         unsigned i;
614         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
615
616         for (i = 0; i < adev->usec_timeout; i++)
617                 if (vce_v4_0_is_idle(handle))
618                         return 0;
619
620         return -ETIMEDOUT;
621 }
622
623 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
624 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
625 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
626 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
627                                       VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
628
629 static bool vce_v4_0_check_soft_reset(void *handle)
630 {
631         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
632         u32 srbm_soft_reset = 0;
633
634         /* According to VCE team , we should use VCE_STATUS instead
635          * SRBM_STATUS.VCE_BUSY bit for busy status checking.
636          * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
637          * instance's registers are accessed
638          * (0 for 1st instance, 10 for 2nd instance).
639          *
640          *VCE_STATUS
641          *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
642          *|----+----+-----------+----+----+----+----------+---------+----|
643          *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
644          *
645          * VCE team suggest use bit 3--bit 6 for busy status check
646          */
647         mutex_lock(&adev->grbm_idx_mutex);
648         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
649         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
650                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
651                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
652         }
653         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
654         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
655                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
656                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
657         }
658         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
659         mutex_unlock(&adev->grbm_idx_mutex);
660
661         if (srbm_soft_reset) {
662                 adev->vce.srbm_soft_reset = srbm_soft_reset;
663                 return true;
664         } else {
665                 adev->vce.srbm_soft_reset = 0;
666                 return false;
667         }
668 }
669
670 static int vce_v4_0_soft_reset(void *handle)
671 {
672         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
673         u32 srbm_soft_reset;
674
675         if (!adev->vce.srbm_soft_reset)
676                 return 0;
677         srbm_soft_reset = adev->vce.srbm_soft_reset;
678
679         if (srbm_soft_reset) {
680                 u32 tmp;
681
682                 tmp = RREG32(mmSRBM_SOFT_RESET);
683                 tmp |= srbm_soft_reset;
684                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
685                 WREG32(mmSRBM_SOFT_RESET, tmp);
686                 tmp = RREG32(mmSRBM_SOFT_RESET);
687
688                 udelay(50);
689
690                 tmp &= ~srbm_soft_reset;
691                 WREG32(mmSRBM_SOFT_RESET, tmp);
692                 tmp = RREG32(mmSRBM_SOFT_RESET);
693
694                 /* Wait a little for things to settle down */
695                 udelay(50);
696         }
697
698         return 0;
699 }
700
701 static int vce_v4_0_pre_soft_reset(void *handle)
702 {
703         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
704
705         if (!adev->vce.srbm_soft_reset)
706                 return 0;
707
708         mdelay(5);
709
710         return vce_v4_0_suspend(adev);
711 }
712
713
714 static int vce_v4_0_post_soft_reset(void *handle)
715 {
716         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
717
718         if (!adev->vce.srbm_soft_reset)
719                 return 0;
720
721         mdelay(5);
722
723         return vce_v4_0_resume(adev);
724 }
725
726 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
727 {
728         u32 tmp, data;
729
730         tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
731         if (override)
732                 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
733         else
734                 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
735
736         if (tmp != data)
737                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
738 }
739
740 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
741                                              bool gated)
742 {
743         u32 data;
744
745         /* Set Override to disable Clock Gating */
746         vce_v4_0_override_vce_clock_gating(adev, true);
747
748         /* This function enables MGCG which is controlled by firmware.
749            With the clocks in the gated state the core is still
750            accessible but the firmware will throttle the clocks on the
751            fly as necessary.
752         */
753         if (gated) {
754                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
755                 data |= 0x1ff;
756                 data &= ~0xef0000;
757                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
758
759                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
760                 data |= 0x3ff000;
761                 data &= ~0xffc00000;
762                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
763
764                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
765                 data |= 0x2;
766                 data &= ~0x00010000;
767                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
768
769                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
770                 data |= 0x37f;
771                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
772
773                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
774                 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
775                         VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
776                         VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
777                         0x8;
778                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
779         } else {
780                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
781                 data &= ~0x80010;
782                 data |= 0xe70008;
783                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
784
785                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
786                 data |= 0xffc00000;
787                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
788
789                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
790                 data |= 0x10000;
791                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
792
793                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
794                 data &= ~0xffc00000;
795                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
796
797                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
798                 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
799                           VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
800                           VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
801                           0x8);
802                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
803         }
804         vce_v4_0_override_vce_clock_gating(adev, false);
805 }
806
807 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
808 {
809         u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
810
811         if (enable)
812                 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
813         else
814                 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
815
816         WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
817 }
818
819 static int vce_v4_0_set_clockgating_state(void *handle,
820                                           enum amd_clockgating_state state)
821 {
822         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
823         bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
824         int i;
825
826         if ((adev->asic_type == CHIP_POLARIS10) ||
827                 (adev->asic_type == CHIP_TONGA) ||
828                 (adev->asic_type == CHIP_FIJI))
829                 vce_v4_0_set_bypass_mode(adev, enable);
830
831         if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
832                 return 0;
833
834         mutex_lock(&adev->grbm_idx_mutex);
835         for (i = 0; i < 2; i++) {
836                 /* Program VCE Instance 0 or 1 if not harvested */
837                 if (adev->vce.harvest_config & (1 << i))
838                         continue;
839
840                 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
841
842                 if (enable) {
843                         /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
844                         uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
845                         data &= ~(0xf | 0xff0);
846                         data |= ((0x0 << 0) | (0x04 << 4));
847                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
848
849                         /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
850                         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
851                         data &= ~(0xf | 0xff0);
852                         data |= ((0x0 << 0) | (0x04 << 4));
853                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
854                 }
855
856                 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
857         }
858
859         WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
860         mutex_unlock(&adev->grbm_idx_mutex);
861
862         return 0;
863 }
864
865 static int vce_v4_0_set_powergating_state(void *handle,
866                                           enum amd_powergating_state state)
867 {
868         /* This doesn't actually powergate the VCE block.
869          * That's done in the dpm code via the SMC.  This
870          * just re-inits the block as necessary.  The actual
871          * gating still happens in the dpm code.  We should
872          * revisit this when there is a cleaner line between
873          * the smc and the hw blocks
874          */
875         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
876
877         if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
878                 return 0;
879
880         if (state == AMD_PG_STATE_GATE)
881                 /* XXX do we need a vce_v4_0_stop()? */
882                 return 0;
883         else
884                 return vce_v4_0_start(adev);
885 }
886 #endif
887
888 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
889                 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
890 {
891         amdgpu_ring_write(ring, VCE_CMD_IB_VM);
892         amdgpu_ring_write(ring, vm_id);
893         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
894         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
895         amdgpu_ring_write(ring, ib->length_dw);
896 }
897
898 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
899                         u64 seq, unsigned flags)
900 {
901         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
902
903         amdgpu_ring_write(ring, VCE_CMD_FENCE);
904         amdgpu_ring_write(ring, addr);
905         amdgpu_ring_write(ring, upper_32_bits(addr));
906         amdgpu_ring_write(ring, seq);
907         amdgpu_ring_write(ring, VCE_CMD_TRAP);
908 }
909
910 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
911 {
912         amdgpu_ring_write(ring, VCE_CMD_END);
913 }
914
915 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
916                          unsigned int vm_id, uint64_t pd_addr)
917 {
918         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
919         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
920         unsigned eng = ring->vm_inv_eng;
921
922         pd_addr = pd_addr | 0x1; /* valid bit */
923         /* now only use physical base address of PDE and valid */
924         BUG_ON(pd_addr & 0xFFFF00000000003EULL);
925
926         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
927         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
928         amdgpu_ring_write(ring, upper_32_bits(pd_addr));
929
930         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
931         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
932         amdgpu_ring_write(ring, lower_32_bits(pd_addr));
933
934         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
935         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
936         amdgpu_ring_write(ring, 0xffffffff);
937         amdgpu_ring_write(ring, lower_32_bits(pd_addr));
938
939         /* flush TLB */
940         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
941         amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
942         amdgpu_ring_write(ring, req);
943
944         /* wait for flush */
945         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
946         amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
947         amdgpu_ring_write(ring, 1 << vm_id);
948         amdgpu_ring_write(ring, 1 << vm_id);
949 }
950
951 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
952                                         struct amdgpu_irq_src *source,
953                                         unsigned type,
954                                         enum amdgpu_interrupt_state state)
955 {
956         uint32_t val = 0;
957
958         if (state == AMDGPU_IRQ_STATE_ENABLE)
959                 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
960
961         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
962                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
963         return 0;
964 }
965
966 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
967                                       struct amdgpu_irq_src *source,
968                                       struct amdgpu_iv_entry *entry)
969 {
970         DRM_DEBUG("IH: VCE\n");
971
972         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
973                         VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
974                         ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
975
976         switch (entry->src_data[0]) {
977         case 0:
978         case 1:
979         case 2:
980                 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
981                 break;
982         default:
983                 DRM_ERROR("Unhandled interrupt: %d %d\n",
984                           entry->src_id, entry->src_data[0]);
985                 break;
986         }
987
988         return 0;
989 }
990
991 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
992         .name = "vce_v4_0",
993         .early_init = vce_v4_0_early_init,
994         .late_init = NULL,
995         .sw_init = vce_v4_0_sw_init,
996         .sw_fini = vce_v4_0_sw_fini,
997         .hw_init = vce_v4_0_hw_init,
998         .hw_fini = vce_v4_0_hw_fini,
999         .suspend = vce_v4_0_suspend,
1000         .resume = vce_v4_0_resume,
1001         .is_idle = NULL /* vce_v4_0_is_idle */,
1002         .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1003         .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1004         .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1005         .soft_reset = NULL /* vce_v4_0_soft_reset */,
1006         .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1007         .set_clockgating_state = vce_v4_0_set_clockgating_state,
1008         .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1009 };
1010
1011 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1012         .type = AMDGPU_RING_TYPE_VCE,
1013         .align_mask = 0x3f,
1014         .nop = VCE_CMD_NO_OP,
1015         .support_64bit_ptrs = false,
1016         .vmhub = AMDGPU_MMHUB,
1017         .get_rptr = vce_v4_0_ring_get_rptr,
1018         .get_wptr = vce_v4_0_ring_get_wptr,
1019         .set_wptr = vce_v4_0_ring_set_wptr,
1020         .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1021         .emit_frame_size =
1022                 17 + /* vce_v4_0_emit_vm_flush */
1023                 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1024                 1, /* vce_v4_0_ring_insert_end */
1025         .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1026         .emit_ib = vce_v4_0_ring_emit_ib,
1027         .emit_vm_flush = vce_v4_0_emit_vm_flush,
1028         .emit_fence = vce_v4_0_ring_emit_fence,
1029         .test_ring = amdgpu_vce_ring_test_ring,
1030         .test_ib = amdgpu_vce_ring_test_ib,
1031         .insert_nop = amdgpu_ring_insert_nop,
1032         .insert_end = vce_v4_0_ring_insert_end,
1033         .pad_ib = amdgpu_ring_generic_pad_ib,
1034         .begin_use = amdgpu_vce_ring_begin_use,
1035         .end_use = amdgpu_vce_ring_end_use,
1036 };
1037
1038 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1039 {
1040         int i;
1041
1042         for (i = 0; i < adev->vce.num_rings; i++)
1043                 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1044         DRM_INFO("VCE enabled in VM mode\n");
1045 }
1046
1047 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1048         .set = vce_v4_0_set_interrupt_state,
1049         .process = vce_v4_0_process_interrupt,
1050 };
1051
1052 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1053 {
1054         adev->vce.irq.num_types = 1;
1055         adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1056 };
1057
1058 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1059 {
1060         .type = AMD_IP_BLOCK_TYPE_VCE,
1061         .major = 4,
1062         .minor = 0,
1063         .rev = 0,
1064         .funcs = &vce_v4_0_ip_funcs,
1065 };
This page took 0.096162 seconds and 4 git commands to generate.