]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4
[linux.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34
35 #include "vega10/soc15ip.h"
36 #include "vega10/VCE/vce_4_0_offset.h"
37 #include "vega10/VCE/vce_4_0_default.h"
38 #include "vega10/VCE/vce_4_0_sh_mask.h"
39 #include "vega10/MMHUB/mmhub_1_0_offset.h"
40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
43
44 #define VCE_V4_0_FW_SIZE        (384 * 1024)
45 #define VCE_V4_0_STACK_SIZE     (64 * 1024)
46 #define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51
52 /**
53  * vce_v4_0_ring_get_rptr - get read pointer
54  *
55  * @ring: amdgpu_ring pointer
56  *
57  * Returns the current hardware read pointer
58  */
59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60 {
61         struct amdgpu_device *adev = ring->adev;
62
63         if (ring == &adev->vce.ring[0])
64                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65         else if (ring == &adev->vce.ring[1])
66                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67         else
68                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69 }
70
71 /**
72  * vce_v4_0_ring_get_wptr - get write pointer
73  *
74  * @ring: amdgpu_ring pointer
75  *
76  * Returns the current hardware write pointer
77  */
78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79 {
80         struct amdgpu_device *adev = ring->adev;
81
82         if (ring->use_doorbell)
83                 return adev->wb.wb[ring->wptr_offs];
84
85         if (ring == &adev->vce.ring[0])
86                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87         else if (ring == &adev->vce.ring[1])
88                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89         else
90                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91 }
92
93 /**
94  * vce_v4_0_ring_set_wptr - set write pointer
95  *
96  * @ring: amdgpu_ring pointer
97  *
98  * Commits the write pointer to the hardware
99  */
100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101 {
102         struct amdgpu_device *adev = ring->adev;
103
104         if (ring->use_doorbell) {
105                 /* XXX check if swapping is necessary on BE */
106                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108                 return;
109         }
110
111         if (ring == &adev->vce.ring[0])
112                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113                         lower_32_bits(ring->wptr));
114         else if (ring == &adev->vce.ring[1])
115                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116                         lower_32_bits(ring->wptr));
117         else
118                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119                         lower_32_bits(ring->wptr));
120 }
121
122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123 {
124         int i, j;
125
126         for (i = 0; i < 10; ++i) {
127                 for (j = 0; j < 100; ++j) {
128                         uint32_t status =
129                                 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130
131                         if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132                                 return 0;
133                         mdelay(10);
134                 }
135
136                 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138                                 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140                 mdelay(10);
141                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143                 mdelay(10);
144
145         }
146
147         return -ETIMEDOUT;
148 }
149
150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151                                 struct amdgpu_mm_table *table)
152 {
153         uint32_t data = 0, loop;
154         uint64_t addr = table->gpu_addr;
155         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156         uint32_t size;
157
158         size = header->header_size + header->vce_table_size + header->uvd_table_size;
159
160         /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163
164         /* 2, update vmid of descriptor */
165         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166         data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167         data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169
170         /* 3, notify mmsch about the size of this descriptor */
171         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172
173         /* 4, set resp to zero */
174         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175
176         /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
177         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
178
179         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
180         loop = 1000;
181         while ((data & 0x10000002) != 0x10000002) {
182                 udelay(10);
183                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
184                 loop--;
185                 if (!loop)
186                         break;
187         }
188
189         if (!loop) {
190                 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
191                 return -EBUSY;
192         }
193         WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
194
195         return 0;
196 }
197
198 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
199 {
200         struct amdgpu_ring *ring;
201         uint32_t offset, size;
202         uint32_t table_size = 0;
203         struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
204         struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
205         struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
206         struct mmsch_v1_0_cmd_end end = { { 0 } };
207         uint32_t *init_table = adev->virt.mm_table.cpu_addr;
208         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
209
210         direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
211         direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
212         direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
213         end.cmd_header.command_type = MMSCH_COMMAND__END;
214
215         if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
216                 header->version = MMSCH_VERSION;
217                 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
218
219                 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
220                         header->vce_table_offset = header->header_size;
221                 else
222                         header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
223
224                 init_table += header->vce_table_offset;
225
226                 ring = &adev->vce.ring[0];
227                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
228                                             lower_32_bits(ring->gpu_addr));
229                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
230                                             upper_32_bits(ring->gpu_addr));
231                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
232                                             ring->ring_size / 4);
233
234                 /* BEGING OF MC_RESUME */
235                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
236                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
237                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
238                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
239                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
240
241                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
242                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
243                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
244                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
245                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
246                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
247                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
248                 } else {
249                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
250                                                 adev->vce.gpu_addr >> 8);
251                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
252                                                 adev->vce.gpu_addr >> 8);
253                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
254                                                 adev->vce.gpu_addr >> 8);
255                 }
256
257                 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
258                 size = VCE_V4_0_FW_SIZE;
259                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
260                                             offset & 0x7FFFFFFF);
261                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
262
263                 offset += size;
264                 size = VCE_V4_0_STACK_SIZE;
265                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
266                                             offset & 0x7FFFFFFF);
267                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
268
269                 offset += size;
270                 size = VCE_V4_0_DATA_SIZE;
271                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
272                                             offset & 0x7FFFFFFF);
273                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
274
275                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
276                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
277                                                    0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
278
279                 /* end of MC_RESUME */
280                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
281                                                    VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
282                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
283                                                    ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
284                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
285                                                    ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
286
287                 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
288                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
289                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
290
291                 /* clear BUSY flag */
292                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
293                                                    ~VCE_STATUS__JOB_BUSY_MASK, 0);
294
295                 /* add end packet */
296                 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
297                 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
298                 header->vce_table_size = table_size;
299
300                 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
301         }
302
303         return -EINVAL; /* already initializaed ? */
304 }
305
306 /**
307  * vce_v4_0_start - start VCE block
308  *
309  * @adev: amdgpu_device pointer
310  *
311  * Setup and start the VCE block
312  */
313 static int vce_v4_0_start(struct amdgpu_device *adev)
314 {
315         struct amdgpu_ring *ring;
316         int r;
317
318         ring = &adev->vce.ring[0];
319
320         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
321         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
322         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
323         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
324         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
325
326         ring = &adev->vce.ring[1];
327
328         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
329         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
330         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
331         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
332         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
333
334         ring = &adev->vce.ring[2];
335
336         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
337         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
338         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
339         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
340         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
341
342         vce_v4_0_mc_resume(adev);
343         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
344                         ~VCE_STATUS__JOB_BUSY_MASK);
345
346         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
347
348         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
349                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
350         mdelay(100);
351
352         r = vce_v4_0_firmware_loaded(adev);
353
354         /* clear BUSY flag */
355         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
356
357         if (r) {
358                 DRM_ERROR("VCE not responding, giving up!!!\n");
359                 return r;
360         }
361
362         return 0;
363 }
364
365 static int vce_v4_0_stop(struct amdgpu_device *adev)
366 {
367
368         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
369
370         /* hold on ECPU */
371         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
372                         VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
373                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
374
375         /* clear BUSY flag */
376         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
377
378         /* Set Clock-Gating off */
379         /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
380                 vce_v4_0_set_vce_sw_clock_gating(adev, false);
381         */
382
383         return 0;
384 }
385
386 static int vce_v4_0_early_init(void *handle)
387 {
388         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
389
390         if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
391                 adev->vce.num_rings = 1;
392         else
393                 adev->vce.num_rings = 3;
394
395         vce_v4_0_set_ring_funcs(adev);
396         vce_v4_0_set_irq_funcs(adev);
397
398         return 0;
399 }
400
401 static int vce_v4_0_sw_init(void *handle)
402 {
403         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
404         struct amdgpu_ring *ring;
405         unsigned size;
406         int r, i;
407
408         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
409         if (r)
410                 return r;
411
412         size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
413         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
414                 size += VCE_V4_0_FW_SIZE;
415
416         r = amdgpu_vce_sw_init(adev, size);
417         if (r)
418                 return r;
419
420         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
421                 const struct common_firmware_header *hdr;
422                 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
423                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
424                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
425                 adev->firmware.fw_size +=
426                         ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
427                 DRM_INFO("PSP loading VCE firmware\n");
428         }
429
430         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
431                 r = amdgpu_vce_resume(adev);
432                 if (r)
433                         return r;
434         }
435
436         for (i = 0; i < adev->vce.num_rings; i++) {
437                 ring = &adev->vce.ring[i];
438                 sprintf(ring->name, "vce%d", i);
439                 if (amdgpu_sriov_vf(adev)) {
440                         /* DOORBELL only works under SRIOV */
441                         ring->use_doorbell = true;
442                         if (i == 0)
443                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2;
444                         else if (i == 1)
445                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2;
446                         else
447                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1;
448                 }
449                 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
450                 if (r)
451                         return r;
452         }
453
454         r = amdgpu_virt_alloc_mm_table(adev);
455         if (r)
456                 return r;
457
458         return r;
459 }
460
461 static int vce_v4_0_sw_fini(void *handle)
462 {
463         int r;
464         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
465
466         /* free MM table */
467         amdgpu_virt_free_mm_table(adev);
468
469         r = amdgpu_vce_suspend(adev);
470         if (r)
471                 return r;
472
473         return amdgpu_vce_sw_fini(adev);
474 }
475
476 static int vce_v4_0_hw_init(void *handle)
477 {
478         int r, i;
479         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
480
481         if (amdgpu_sriov_vf(adev))
482                 r = vce_v4_0_sriov_start(adev);
483         else
484                 r = vce_v4_0_start(adev);
485         if (r)
486                 return r;
487
488         for (i = 0; i < adev->vce.num_rings; i++)
489                 adev->vce.ring[i].ready = false;
490
491         for (i = 0; i < adev->vce.num_rings; i++) {
492                 r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
493                 if (r)
494                         return r;
495                 else
496                         adev->vce.ring[i].ready = true;
497         }
498
499         DRM_INFO("VCE initialized successfully.\n");
500
501         return 0;
502 }
503
504 static int vce_v4_0_hw_fini(void *handle)
505 {
506         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
507         int i;
508
509         if (!amdgpu_sriov_vf(adev)) {
510                 /* vce_v4_0_wait_for_idle(handle); */
511                 vce_v4_0_stop(adev);
512         } else {
513                 /* full access mode, so don't touch any VCE register */
514                 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
515         }
516
517         for (i = 0; i < adev->vce.num_rings; i++)
518                 adev->vce.ring[i].ready = false;
519
520         return 0;
521 }
522
523 static int vce_v4_0_suspend(void *handle)
524 {
525         int r;
526         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
527
528         r = vce_v4_0_hw_fini(adev);
529         if (r)
530                 return r;
531
532         return amdgpu_vce_suspend(adev);
533 }
534
535 static int vce_v4_0_resume(void *handle)
536 {
537         int r;
538         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
539
540         r = amdgpu_vce_resume(adev);
541         if (r)
542                 return r;
543
544         return vce_v4_0_hw_init(adev);
545 }
546
547 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
548 {
549         uint32_t offset, size;
550
551         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
552         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
553         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
554         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
555
556         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
557         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
558         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
559         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
560         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
561
562         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
563                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
564                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
565                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
566                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
567         } else {
568                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
569                         (adev->vce.gpu_addr >> 8));
570                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
571                         (adev->vce.gpu_addr >> 40) & 0xff);
572         }
573
574         offset = AMDGPU_VCE_FIRMWARE_OFFSET;
575         size = VCE_V4_0_FW_SIZE;
576         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
577         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
578
579         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
580         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
581         offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
582         size = VCE_V4_0_STACK_SIZE;
583         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
584         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
585
586         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
587         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
588         offset += size;
589         size = VCE_V4_0_DATA_SIZE;
590         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
591         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
592
593         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
594         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
595                         VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
596                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
597 }
598
599 static int vce_v4_0_set_clockgating_state(void *handle,
600                                           enum amd_clockgating_state state)
601 {
602         /* needed for driver unload*/
603         return 0;
604 }
605
606 #if 0
607 static bool vce_v4_0_is_idle(void *handle)
608 {
609         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
610         u32 mask = 0;
611
612         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
613         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
614
615         return !(RREG32(mmSRBM_STATUS2) & mask);
616 }
617
618 static int vce_v4_0_wait_for_idle(void *handle)
619 {
620         unsigned i;
621         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
622
623         for (i = 0; i < adev->usec_timeout; i++)
624                 if (vce_v4_0_is_idle(handle))
625                         return 0;
626
627         return -ETIMEDOUT;
628 }
629
630 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
631 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
632 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
633 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
634                                       VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
635
636 static bool vce_v4_0_check_soft_reset(void *handle)
637 {
638         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
639         u32 srbm_soft_reset = 0;
640
641         /* According to VCE team , we should use VCE_STATUS instead
642          * SRBM_STATUS.VCE_BUSY bit for busy status checking.
643          * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
644          * instance's registers are accessed
645          * (0 for 1st instance, 10 for 2nd instance).
646          *
647          *VCE_STATUS
648          *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
649          *|----+----+-----------+----+----+----+----------+---------+----|
650          *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
651          *
652          * VCE team suggest use bit 3--bit 6 for busy status check
653          */
654         mutex_lock(&adev->grbm_idx_mutex);
655         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
656         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
657                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
658                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
659         }
660         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
661         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
662                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
663                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
664         }
665         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
666         mutex_unlock(&adev->grbm_idx_mutex);
667
668         if (srbm_soft_reset) {
669                 adev->vce.srbm_soft_reset = srbm_soft_reset;
670                 return true;
671         } else {
672                 adev->vce.srbm_soft_reset = 0;
673                 return false;
674         }
675 }
676
677 static int vce_v4_0_soft_reset(void *handle)
678 {
679         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
680         u32 srbm_soft_reset;
681
682         if (!adev->vce.srbm_soft_reset)
683                 return 0;
684         srbm_soft_reset = adev->vce.srbm_soft_reset;
685
686         if (srbm_soft_reset) {
687                 u32 tmp;
688
689                 tmp = RREG32(mmSRBM_SOFT_RESET);
690                 tmp |= srbm_soft_reset;
691                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
692                 WREG32(mmSRBM_SOFT_RESET, tmp);
693                 tmp = RREG32(mmSRBM_SOFT_RESET);
694
695                 udelay(50);
696
697                 tmp &= ~srbm_soft_reset;
698                 WREG32(mmSRBM_SOFT_RESET, tmp);
699                 tmp = RREG32(mmSRBM_SOFT_RESET);
700
701                 /* Wait a little for things to settle down */
702                 udelay(50);
703         }
704
705         return 0;
706 }
707
708 static int vce_v4_0_pre_soft_reset(void *handle)
709 {
710         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
711
712         if (!adev->vce.srbm_soft_reset)
713                 return 0;
714
715         mdelay(5);
716
717         return vce_v4_0_suspend(adev);
718 }
719
720
721 static int vce_v4_0_post_soft_reset(void *handle)
722 {
723         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
724
725         if (!adev->vce.srbm_soft_reset)
726                 return 0;
727
728         mdelay(5);
729
730         return vce_v4_0_resume(adev);
731 }
732
733 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
734 {
735         u32 tmp, data;
736
737         tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
738         if (override)
739                 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
740         else
741                 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
742
743         if (tmp != data)
744                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
745 }
746
747 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
748                                              bool gated)
749 {
750         u32 data;
751
752         /* Set Override to disable Clock Gating */
753         vce_v4_0_override_vce_clock_gating(adev, true);
754
755         /* This function enables MGCG which is controlled by firmware.
756            With the clocks in the gated state the core is still
757            accessible but the firmware will throttle the clocks on the
758            fly as necessary.
759         */
760         if (gated) {
761                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
762                 data |= 0x1ff;
763                 data &= ~0xef0000;
764                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
765
766                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
767                 data |= 0x3ff000;
768                 data &= ~0xffc00000;
769                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
770
771                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
772                 data |= 0x2;
773                 data &= ~0x00010000;
774                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
775
776                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
777                 data |= 0x37f;
778                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
779
780                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
781                 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
782                         VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
783                         VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
784                         0x8;
785                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
786         } else {
787                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
788                 data &= ~0x80010;
789                 data |= 0xe70008;
790                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
791
792                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
793                 data |= 0xffc00000;
794                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
795
796                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
797                 data |= 0x10000;
798                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
799
800                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
801                 data &= ~0xffc00000;
802                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
803
804                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
805                 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
806                           VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
807                           VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
808                           0x8);
809                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
810         }
811         vce_v4_0_override_vce_clock_gating(adev, false);
812 }
813
814 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
815 {
816         u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
817
818         if (enable)
819                 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
820         else
821                 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
822
823         WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
824 }
825
826 static int vce_v4_0_set_clockgating_state(void *handle,
827                                           enum amd_clockgating_state state)
828 {
829         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
830         bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
831         int i;
832
833         if ((adev->asic_type == CHIP_POLARIS10) ||
834                 (adev->asic_type == CHIP_TONGA) ||
835                 (adev->asic_type == CHIP_FIJI))
836                 vce_v4_0_set_bypass_mode(adev, enable);
837
838         if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
839                 return 0;
840
841         mutex_lock(&adev->grbm_idx_mutex);
842         for (i = 0; i < 2; i++) {
843                 /* Program VCE Instance 0 or 1 if not harvested */
844                 if (adev->vce.harvest_config & (1 << i))
845                         continue;
846
847                 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
848
849                 if (enable) {
850                         /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
851                         uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
852                         data &= ~(0xf | 0xff0);
853                         data |= ((0x0 << 0) | (0x04 << 4));
854                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
855
856                         /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
857                         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
858                         data &= ~(0xf | 0xff0);
859                         data |= ((0x0 << 0) | (0x04 << 4));
860                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
861                 }
862
863                 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
864         }
865
866         WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
867         mutex_unlock(&adev->grbm_idx_mutex);
868
869         return 0;
870 }
871
872 static int vce_v4_0_set_powergating_state(void *handle,
873                                           enum amd_powergating_state state)
874 {
875         /* This doesn't actually powergate the VCE block.
876          * That's done in the dpm code via the SMC.  This
877          * just re-inits the block as necessary.  The actual
878          * gating still happens in the dpm code.  We should
879          * revisit this when there is a cleaner line between
880          * the smc and the hw blocks
881          */
882         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
883
884         if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
885                 return 0;
886
887         if (state == AMD_PG_STATE_GATE)
888                 /* XXX do we need a vce_v4_0_stop()? */
889                 return 0;
890         else
891                 return vce_v4_0_start(adev);
892 }
893 #endif
894
895 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
896                 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
897 {
898         amdgpu_ring_write(ring, VCE_CMD_IB_VM);
899         amdgpu_ring_write(ring, vm_id);
900         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
901         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
902         amdgpu_ring_write(ring, ib->length_dw);
903 }
904
905 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
906                         u64 seq, unsigned flags)
907 {
908         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
909
910         amdgpu_ring_write(ring, VCE_CMD_FENCE);
911         amdgpu_ring_write(ring, addr);
912         amdgpu_ring_write(ring, upper_32_bits(addr));
913         amdgpu_ring_write(ring, seq);
914         amdgpu_ring_write(ring, VCE_CMD_TRAP);
915 }
916
917 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
918 {
919         amdgpu_ring_write(ring, VCE_CMD_END);
920 }
921
922 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
923                          unsigned int vm_id, uint64_t pd_addr)
924 {
925         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
926         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
927         unsigned eng = ring->vm_inv_eng;
928
929         pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
930         pd_addr |= AMDGPU_PTE_VALID;
931
932         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
933         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
934         amdgpu_ring_write(ring, upper_32_bits(pd_addr));
935
936         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
937         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
938         amdgpu_ring_write(ring, lower_32_bits(pd_addr));
939
940         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
941         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
942         amdgpu_ring_write(ring, 0xffffffff);
943         amdgpu_ring_write(ring, lower_32_bits(pd_addr));
944
945         /* flush TLB */
946         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
947         amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
948         amdgpu_ring_write(ring, req);
949
950         /* wait for flush */
951         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
952         amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
953         amdgpu_ring_write(ring, 1 << vm_id);
954         amdgpu_ring_write(ring, 1 << vm_id);
955 }
956
957 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
958                                         struct amdgpu_irq_src *source,
959                                         unsigned type,
960                                         enum amdgpu_interrupt_state state)
961 {
962         uint32_t val = 0;
963
964         if (state == AMDGPU_IRQ_STATE_ENABLE)
965                 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
966
967         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
968                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
969         return 0;
970 }
971
972 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
973                                       struct amdgpu_irq_src *source,
974                                       struct amdgpu_iv_entry *entry)
975 {
976         DRM_DEBUG("IH: VCE\n");
977
978         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
979                         VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
980                         ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
981
982         switch (entry->src_data[0]) {
983         case 0:
984         case 1:
985         case 2:
986                 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
987                 break;
988         default:
989                 DRM_ERROR("Unhandled interrupt: %d %d\n",
990                           entry->src_id, entry->src_data[0]);
991                 break;
992         }
993
994         return 0;
995 }
996
997 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
998         .name = "vce_v4_0",
999         .early_init = vce_v4_0_early_init,
1000         .late_init = NULL,
1001         .sw_init = vce_v4_0_sw_init,
1002         .sw_fini = vce_v4_0_sw_fini,
1003         .hw_init = vce_v4_0_hw_init,
1004         .hw_fini = vce_v4_0_hw_fini,
1005         .suspend = vce_v4_0_suspend,
1006         .resume = vce_v4_0_resume,
1007         .is_idle = NULL /* vce_v4_0_is_idle */,
1008         .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1009         .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1010         .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1011         .soft_reset = NULL /* vce_v4_0_soft_reset */,
1012         .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1013         .set_clockgating_state = vce_v4_0_set_clockgating_state,
1014         .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1015 };
1016
1017 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1018         .type = AMDGPU_RING_TYPE_VCE,
1019         .align_mask = 0x3f,
1020         .nop = VCE_CMD_NO_OP,
1021         .support_64bit_ptrs = false,
1022         .vmhub = AMDGPU_MMHUB,
1023         .get_rptr = vce_v4_0_ring_get_rptr,
1024         .get_wptr = vce_v4_0_ring_get_wptr,
1025         .set_wptr = vce_v4_0_ring_set_wptr,
1026         .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1027         .emit_frame_size =
1028                 17 + /* vce_v4_0_emit_vm_flush */
1029                 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1030                 1, /* vce_v4_0_ring_insert_end */
1031         .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1032         .emit_ib = vce_v4_0_ring_emit_ib,
1033         .emit_vm_flush = vce_v4_0_emit_vm_flush,
1034         .emit_fence = vce_v4_0_ring_emit_fence,
1035         .test_ring = amdgpu_vce_ring_test_ring,
1036         .test_ib = amdgpu_vce_ring_test_ib,
1037         .insert_nop = amdgpu_ring_insert_nop,
1038         .insert_end = vce_v4_0_ring_insert_end,
1039         .pad_ib = amdgpu_ring_generic_pad_ib,
1040         .begin_use = amdgpu_vce_ring_begin_use,
1041         .end_use = amdgpu_vce_ring_end_use,
1042 };
1043
1044 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1045 {
1046         int i;
1047
1048         for (i = 0; i < adev->vce.num_rings; i++)
1049                 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1050         DRM_INFO("VCE enabled in VM mode\n");
1051 }
1052
1053 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1054         .set = vce_v4_0_set_interrupt_state,
1055         .process = vce_v4_0_process_interrupt,
1056 };
1057
1058 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1059 {
1060         adev->vce.irq.num_types = 1;
1061         adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1062 };
1063
1064 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1065 {
1066         .type = AMD_IP_BLOCK_TYPE_VCE,
1067         .major = 4,
1068         .minor = 0,
1069         .rev = 0,
1070         .funcs = &vce_v4_0_ip_funcs,
1071 };
This page took 0.098581 seconds and 4 git commands to generate.