]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[linux.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34
35 #include "vega10/soc15ip.h"
36 #include "vega10/VCE/vce_4_0_offset.h"
37 #include "vega10/VCE/vce_4_0_default.h"
38 #include "vega10/VCE/vce_4_0_sh_mask.h"
39 #include "vega10/MMHUB/mmhub_1_0_offset.h"
40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
43
44 #define VCE_V4_0_FW_SIZE        (384 * 1024)
45 #define VCE_V4_0_STACK_SIZE     (64 * 1024)
46 #define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51
52 static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt,
53                                           uint32_t *init_table,
54                                           uint32_t reg_offset,
55                                           uint32_t value)
56 {
57         direct_wt->cmd_header.reg_offset = reg_offset;
58         direct_wt->reg_value = value;
59         memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write));
60 }
61
62 static inline void mmsch_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
63                                                  uint32_t *init_table,
64                                                  uint32_t reg_offset,
65                                                  uint32_t mask, uint32_t data)
66 {
67         direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
68         direct_rd_mod_wt->mask_value = mask;
69         direct_rd_mod_wt->write_data = data;
70         memcpy((void *)init_table, direct_rd_mod_wt,
71                sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write));
72 }
73
74 static inline void mmsch_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll,
75                                             uint32_t *init_table,
76                                             uint32_t reg_offset,
77                                             uint32_t mask, uint32_t wait)
78 {
79         direct_poll->cmd_header.reg_offset = reg_offset;
80         direct_poll->mask_value = mask;
81         direct_poll->wait_value = wait;
82         memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling));
83 }
84
85 #define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
86         mmsch_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \
87                                       init_table, (reg), \
88                                       (mask), (data)); \
89         init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
90         table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
91 }
92
93 #define INSERT_DIRECT_WT(reg, value) { \
94         mmsch_insert_direct_wt(&direct_wt, \
95                                init_table, (reg), \
96                                (value)); \
97         init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
98         table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
99 }
100
101 #define INSERT_DIRECT_POLL(reg, mask, wait) { \
102         mmsch_insert_direct_poll(&direct_poll, \
103                                  init_table, (reg), \
104                                  (mask), (wait)); \
105         init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
106         table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
107 }
108
109 /**
110  * vce_v4_0_ring_get_rptr - get read pointer
111  *
112  * @ring: amdgpu_ring pointer
113  *
114  * Returns the current hardware read pointer
115  */
116 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
117 {
118         struct amdgpu_device *adev = ring->adev;
119
120         if (ring == &adev->vce.ring[0])
121                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
122         else if (ring == &adev->vce.ring[1])
123                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
124         else
125                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
126 }
127
128 /**
129  * vce_v4_0_ring_get_wptr - get write pointer
130  *
131  * @ring: amdgpu_ring pointer
132  *
133  * Returns the current hardware write pointer
134  */
135 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
136 {
137         struct amdgpu_device *adev = ring->adev;
138
139         if (ring->use_doorbell)
140                 return adev->wb.wb[ring->wptr_offs];
141
142         if (ring == &adev->vce.ring[0])
143                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
144         else if (ring == &adev->vce.ring[1])
145                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
146         else
147                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
148 }
149
150 /**
151  * vce_v4_0_ring_set_wptr - set write pointer
152  *
153  * @ring: amdgpu_ring pointer
154  *
155  * Commits the write pointer to the hardware
156  */
157 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
158 {
159         struct amdgpu_device *adev = ring->adev;
160
161         if (ring->use_doorbell) {
162                 /* XXX check if swapping is necessary on BE */
163                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
164                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
165                 return;
166         }
167
168         if (ring == &adev->vce.ring[0])
169                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
170                         lower_32_bits(ring->wptr));
171         else if (ring == &adev->vce.ring[1])
172                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
173                         lower_32_bits(ring->wptr));
174         else
175                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
176                         lower_32_bits(ring->wptr));
177 }
178
179 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
180 {
181         int i, j;
182
183         for (i = 0; i < 10; ++i) {
184                 for (j = 0; j < 100; ++j) {
185                         uint32_t status =
186                                 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
187
188                         if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
189                                 return 0;
190                         mdelay(10);
191                 }
192
193                 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
194                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
195                                 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
196                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
197                 mdelay(10);
198                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
199                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
200                 mdelay(10);
201
202         }
203
204         return -ETIMEDOUT;
205 }
206
207 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
208                                 struct amdgpu_mm_table *table)
209 {
210         uint32_t data = 0, loop;
211         uint64_t addr = table->gpu_addr;
212         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
213         uint32_t size;
214
215         size = header->header_size + header->vce_table_size + header->uvd_table_size;
216
217         /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
218         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
219         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
220
221         /* 2, update vmid of descriptor */
222         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
223         data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
224         data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
225         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
226
227         /* 3, notify mmsch about the size of this descriptor */
228         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
229
230         /* 4, set resp to zero */
231         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
232
233         /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
234         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
235
236         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
237         loop = 1000;
238         while ((data & 0x10000002) != 0x10000002) {
239                 udelay(10);
240                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
241                 loop--;
242                 if (!loop)
243                         break;
244         }
245
246         if (!loop) {
247                 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
248                 return -EBUSY;
249         }
250
251         return 0;
252 }
253
254 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
255 {
256         struct amdgpu_ring *ring;
257         uint32_t offset, size;
258         uint32_t table_size = 0;
259         struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
260         struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
261         struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
262         struct mmsch_v1_0_cmd_end end = { { 0 } };
263         uint32_t *init_table = adev->virt.mm_table.cpu_addr;
264         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
265
266         direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
267         direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
268         direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
269         end.cmd_header.command_type = MMSCH_COMMAND__END;
270
271         if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
272                 header->version = MMSCH_VERSION;
273                 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
274
275                 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
276                         header->vce_table_offset = header->header_size;
277                 else
278                         header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
279
280                 init_table += header->vce_table_offset;
281
282                 ring = &adev->vce.ring[0];
283                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), ring->wptr);
284                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), ring->wptr);
285                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr));
286                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
287                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
288
289                 /* BEGING OF MC_RESUME */
290                 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), ~(1 << 16), 0);
291                 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), ~0xFF9FF000, 0x1FF000);
292                 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), ~0x3F, 0x3F);
293                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
294
295                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
296                 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
297                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
298                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
299                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
300
301                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8);
302                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8);
303                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8);
304
305                 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
306                 size = VCE_V4_0_FW_SIZE;
307                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & 0x7FFFFFFF);
308                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
309
310                 offset += size;
311                 size = VCE_V4_0_STACK_SIZE;
312                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), offset & 0x7FFFFFFF);
313                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
314
315                 offset += size;
316                 size = VCE_V4_0_DATA_SIZE;
317                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), offset & 0x7FFFFFFF);
318                 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
319
320                 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
321                 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
322                                 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
323
324                 /* end of MC_RESUME */
325                 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
326                                 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
327                 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
328                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
329
330                 INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
331                                 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
332                                 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
333
334                 /* clear BUSY flag */
335                 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
336                                 ~VCE_STATUS__JOB_BUSY_MASK, 0);
337
338                 /* add end packet */
339                 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
340                 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
341                 header->vce_table_size = table_size;
342
343                 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
344         }
345
346         return -EINVAL; /* already initializaed ? */
347 }
348
349 /**
350  * vce_v4_0_start - start VCE block
351  *
352  * @adev: amdgpu_device pointer
353  *
354  * Setup and start the VCE block
355  */
356 static int vce_v4_0_start(struct amdgpu_device *adev)
357 {
358         struct amdgpu_ring *ring;
359         int r;
360
361         ring = &adev->vce.ring[0];
362
363         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
364         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
365         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
366         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
367         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
368
369         ring = &adev->vce.ring[1];
370
371         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
372         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
373         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
374         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
375         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
376
377         ring = &adev->vce.ring[2];
378
379         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
380         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
381         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
382         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
383         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
384
385         vce_v4_0_mc_resume(adev);
386         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
387                         ~VCE_STATUS__JOB_BUSY_MASK);
388
389         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
390
391         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
392                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
393         mdelay(100);
394
395         r = vce_v4_0_firmware_loaded(adev);
396
397         /* clear BUSY flag */
398         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
399
400         if (r) {
401                 DRM_ERROR("VCE not responding, giving up!!!\n");
402                 return r;
403         }
404
405         return 0;
406 }
407
408 static int vce_v4_0_stop(struct amdgpu_device *adev)
409 {
410
411         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
412
413         /* hold on ECPU */
414         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
415                         VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
416                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
417
418         /* clear BUSY flag */
419         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
420
421         /* Set Clock-Gating off */
422         /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
423                 vce_v4_0_set_vce_sw_clock_gating(adev, false);
424         */
425
426         return 0;
427 }
428
429 static int vce_v4_0_early_init(void *handle)
430 {
431         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
432
433         if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
434                 adev->vce.num_rings = 1;
435         else
436                 adev->vce.num_rings = 3;
437
438         vce_v4_0_set_ring_funcs(adev);
439         vce_v4_0_set_irq_funcs(adev);
440
441         return 0;
442 }
443
444 static int vce_v4_0_sw_init(void *handle)
445 {
446         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
447         struct amdgpu_ring *ring;
448         unsigned size;
449         int r, i;
450
451         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
452         if (r)
453                 return r;
454
455         size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
456         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
457                 size += VCE_V4_0_FW_SIZE;
458
459         r = amdgpu_vce_sw_init(adev, size);
460         if (r)
461                 return r;
462
463         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
464                 const struct common_firmware_header *hdr;
465                 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
466                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
467                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
468                 adev->firmware.fw_size +=
469                         ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
470                 DRM_INFO("PSP loading VCE firmware\n");
471         }
472
473         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
474                 r = amdgpu_vce_resume(adev);
475                 if (r)
476                         return r;
477         }
478
479         for (i = 0; i < adev->vce.num_rings; i++) {
480                 ring = &adev->vce.ring[i];
481                 sprintf(ring->name, "vce%d", i);
482                 if (amdgpu_sriov_vf(adev)) {
483                         /* DOORBELL only works under SRIOV */
484                         ring->use_doorbell = true;
485                         if (i == 0)
486                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2;
487                         else if (i == 1)
488                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2;
489                         else
490                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1;
491                 }
492                 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
493                 if (r)
494                         return r;
495         }
496
497         if (amdgpu_sriov_vf(adev)) {
498                 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
499                                             AMDGPU_GEM_DOMAIN_VRAM,
500                                             &adev->virt.mm_table.bo,
501                                             &adev->virt.mm_table.gpu_addr,
502                                             (void *)&adev->virt.mm_table.cpu_addr);
503                 if (!r) {
504                         memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
505                         printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n",
506                                adev->virt.mm_table.gpu_addr,
507                                adev->virt.mm_table.cpu_addr);
508                 }
509                 return r;
510         }
511
512         return r;
513 }
514
515 static int vce_v4_0_sw_fini(void *handle)
516 {
517         int r;
518         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
519
520         /* free MM table */
521         if (amdgpu_sriov_vf(adev))
522                 amdgpu_bo_free_kernel(&adev->virt.mm_table.bo,
523                                       &adev->virt.mm_table.gpu_addr,
524                                       (void *)&adev->virt.mm_table.cpu_addr);
525
526         r = amdgpu_vce_suspend(adev);
527         if (r)
528                 return r;
529
530         return amdgpu_vce_sw_fini(adev);
531 }
532
533 static int vce_v4_0_hw_init(void *handle)
534 {
535         int r, i;
536         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
537
538         if (amdgpu_sriov_vf(adev))
539                 r = vce_v4_0_sriov_start(adev);
540         else
541                 r = vce_v4_0_start(adev);
542         if (r)
543                 return r;
544
545         for (i = 0; i < adev->vce.num_rings; i++)
546                 adev->vce.ring[i].ready = false;
547
548         for (i = 0; i < adev->vce.num_rings; i++) {
549                 r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
550                 if (r)
551                         return r;
552                 else
553                         adev->vce.ring[i].ready = true;
554         }
555
556         DRM_INFO("VCE initialized successfully.\n");
557
558         return 0;
559 }
560
561 static int vce_v4_0_hw_fini(void *handle)
562 {
563         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
564         int i;
565
566         /* vce_v4_0_wait_for_idle(handle); */
567         vce_v4_0_stop(adev);
568         for (i = 0; i < adev->vce.num_rings; i++)
569                 adev->vce.ring[i].ready = false;
570
571         return 0;
572 }
573
574 static int vce_v4_0_suspend(void *handle)
575 {
576         int r;
577         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
578
579         r = vce_v4_0_hw_fini(adev);
580         if (r)
581                 return r;
582
583         return amdgpu_vce_suspend(adev);
584 }
585
586 static int vce_v4_0_resume(void *handle)
587 {
588         int r;
589         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
590
591         r = amdgpu_vce_resume(adev);
592         if (r)
593                 return r;
594
595         return vce_v4_0_hw_init(adev);
596 }
597
598 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
599 {
600         uint32_t offset, size;
601
602         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
603         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
604         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
605         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
606
607         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
608         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
609         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
610         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
611         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
612
613         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
614                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
615                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
616                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
617                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
618         } else {
619                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
620                         (adev->vce.gpu_addr >> 8));
621                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
622                         (adev->vce.gpu_addr >> 40) & 0xff);
623         }
624
625         offset = AMDGPU_VCE_FIRMWARE_OFFSET;
626         size = VCE_V4_0_FW_SIZE;
627         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
628         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
629
630         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
631         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
632         offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
633         size = VCE_V4_0_STACK_SIZE;
634         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
635         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
636
637         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
638         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
639         offset += size;
640         size = VCE_V4_0_DATA_SIZE;
641         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
642         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
643
644         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
645         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
646                         VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
647                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
648 }
649
650 static int vce_v4_0_set_clockgating_state(void *handle,
651                                           enum amd_clockgating_state state)
652 {
653         /* needed for driver unload*/
654         return 0;
655 }
656
657 #if 0
658 static bool vce_v4_0_is_idle(void *handle)
659 {
660         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
661         u32 mask = 0;
662
663         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
664         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
665
666         return !(RREG32(mmSRBM_STATUS2) & mask);
667 }
668
669 static int vce_v4_0_wait_for_idle(void *handle)
670 {
671         unsigned i;
672         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
673
674         for (i = 0; i < adev->usec_timeout; i++)
675                 if (vce_v4_0_is_idle(handle))
676                         return 0;
677
678         return -ETIMEDOUT;
679 }
680
681 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
682 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
683 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
684 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
685                                       VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
686
687 static bool vce_v4_0_check_soft_reset(void *handle)
688 {
689         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
690         u32 srbm_soft_reset = 0;
691
692         /* According to VCE team , we should use VCE_STATUS instead
693          * SRBM_STATUS.VCE_BUSY bit for busy status checking.
694          * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
695          * instance's registers are accessed
696          * (0 for 1st instance, 10 for 2nd instance).
697          *
698          *VCE_STATUS
699          *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
700          *|----+----+-----------+----+----+----+----------+---------+----|
701          *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
702          *
703          * VCE team suggest use bit 3--bit 6 for busy status check
704          */
705         mutex_lock(&adev->grbm_idx_mutex);
706         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
707         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
708                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
709                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
710         }
711         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
712         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
713                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
714                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
715         }
716         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
717         mutex_unlock(&adev->grbm_idx_mutex);
718
719         if (srbm_soft_reset) {
720                 adev->vce.srbm_soft_reset = srbm_soft_reset;
721                 return true;
722         } else {
723                 adev->vce.srbm_soft_reset = 0;
724                 return false;
725         }
726 }
727
728 static int vce_v4_0_soft_reset(void *handle)
729 {
730         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
731         u32 srbm_soft_reset;
732
733         if (!adev->vce.srbm_soft_reset)
734                 return 0;
735         srbm_soft_reset = adev->vce.srbm_soft_reset;
736
737         if (srbm_soft_reset) {
738                 u32 tmp;
739
740                 tmp = RREG32(mmSRBM_SOFT_RESET);
741                 tmp |= srbm_soft_reset;
742                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
743                 WREG32(mmSRBM_SOFT_RESET, tmp);
744                 tmp = RREG32(mmSRBM_SOFT_RESET);
745
746                 udelay(50);
747
748                 tmp &= ~srbm_soft_reset;
749                 WREG32(mmSRBM_SOFT_RESET, tmp);
750                 tmp = RREG32(mmSRBM_SOFT_RESET);
751
752                 /* Wait a little for things to settle down */
753                 udelay(50);
754         }
755
756         return 0;
757 }
758
759 static int vce_v4_0_pre_soft_reset(void *handle)
760 {
761         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
762
763         if (!adev->vce.srbm_soft_reset)
764                 return 0;
765
766         mdelay(5);
767
768         return vce_v4_0_suspend(adev);
769 }
770
771
772 static int vce_v4_0_post_soft_reset(void *handle)
773 {
774         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
775
776         if (!adev->vce.srbm_soft_reset)
777                 return 0;
778
779         mdelay(5);
780
781         return vce_v4_0_resume(adev);
782 }
783
784 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
785 {
786         u32 tmp, data;
787
788         tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
789         if (override)
790                 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
791         else
792                 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
793
794         if (tmp != data)
795                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
796 }
797
798 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
799                                              bool gated)
800 {
801         u32 data;
802
803         /* Set Override to disable Clock Gating */
804         vce_v4_0_override_vce_clock_gating(adev, true);
805
806         /* This function enables MGCG which is controlled by firmware.
807            With the clocks in the gated state the core is still
808            accessible but the firmware will throttle the clocks on the
809            fly as necessary.
810         */
811         if (gated) {
812                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
813                 data |= 0x1ff;
814                 data &= ~0xef0000;
815                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
816
817                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
818                 data |= 0x3ff000;
819                 data &= ~0xffc00000;
820                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
821
822                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
823                 data |= 0x2;
824                 data &= ~0x00010000;
825                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
826
827                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
828                 data |= 0x37f;
829                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
830
831                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
832                 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
833                         VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
834                         VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
835                         0x8;
836                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
837         } else {
838                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
839                 data &= ~0x80010;
840                 data |= 0xe70008;
841                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
842
843                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
844                 data |= 0xffc00000;
845                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
846
847                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
848                 data |= 0x10000;
849                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
850
851                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
852                 data &= ~0xffc00000;
853                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
854
855                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
856                 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
857                           VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
858                           VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
859                           0x8);
860                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
861         }
862         vce_v4_0_override_vce_clock_gating(adev, false);
863 }
864
865 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
866 {
867         u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
868
869         if (enable)
870                 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
871         else
872                 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
873
874         WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
875 }
876
877 static int vce_v4_0_set_clockgating_state(void *handle,
878                                           enum amd_clockgating_state state)
879 {
880         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
881         bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
882         int i;
883
884         if ((adev->asic_type == CHIP_POLARIS10) ||
885                 (adev->asic_type == CHIP_TONGA) ||
886                 (adev->asic_type == CHIP_FIJI))
887                 vce_v4_0_set_bypass_mode(adev, enable);
888
889         if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
890                 return 0;
891
892         mutex_lock(&adev->grbm_idx_mutex);
893         for (i = 0; i < 2; i++) {
894                 /* Program VCE Instance 0 or 1 if not harvested */
895                 if (adev->vce.harvest_config & (1 << i))
896                         continue;
897
898                 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
899
900                 if (enable) {
901                         /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
902                         uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
903                         data &= ~(0xf | 0xff0);
904                         data |= ((0x0 << 0) | (0x04 << 4));
905                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
906
907                         /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
908                         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
909                         data &= ~(0xf | 0xff0);
910                         data |= ((0x0 << 0) | (0x04 << 4));
911                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
912                 }
913
914                 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
915         }
916
917         WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
918         mutex_unlock(&adev->grbm_idx_mutex);
919
920         return 0;
921 }
922
923 static int vce_v4_0_set_powergating_state(void *handle,
924                                           enum amd_powergating_state state)
925 {
926         /* This doesn't actually powergate the VCE block.
927          * That's done in the dpm code via the SMC.  This
928          * just re-inits the block as necessary.  The actual
929          * gating still happens in the dpm code.  We should
930          * revisit this when there is a cleaner line between
931          * the smc and the hw blocks
932          */
933         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
934
935         if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
936                 return 0;
937
938         if (state == AMD_PG_STATE_GATE)
939                 /* XXX do we need a vce_v4_0_stop()? */
940                 return 0;
941         else
942                 return vce_v4_0_start(adev);
943 }
944 #endif
945
946 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
947                 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
948 {
949         amdgpu_ring_write(ring, VCE_CMD_IB_VM);
950         amdgpu_ring_write(ring, vm_id);
951         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
952         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
953         amdgpu_ring_write(ring, ib->length_dw);
954 }
955
956 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
957                         u64 seq, unsigned flags)
958 {
959         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
960
961         amdgpu_ring_write(ring, VCE_CMD_FENCE);
962         amdgpu_ring_write(ring, addr);
963         amdgpu_ring_write(ring, upper_32_bits(addr));
964         amdgpu_ring_write(ring, seq);
965         amdgpu_ring_write(ring, VCE_CMD_TRAP);
966 }
967
968 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
969 {
970         amdgpu_ring_write(ring, VCE_CMD_END);
971 }
972
973 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
974                          unsigned int vm_id, uint64_t pd_addr)
975 {
976         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
977         unsigned eng = ring->idx;
978         unsigned i;
979
980         pd_addr = pd_addr | 0x1; /* valid bit */
981         /* now only use physical base address of PDE and valid */
982         BUG_ON(pd_addr & 0xFFFF00000000003EULL);
983
984         for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
985                 struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
986
987                 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
988                 amdgpu_ring_write(ring,
989                         (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
990                 amdgpu_ring_write(ring, upper_32_bits(pd_addr));
991
992                 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
993                 amdgpu_ring_write(ring,
994                         (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
995                 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
996
997                 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
998                 amdgpu_ring_write(ring,
999                         (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
1000                 amdgpu_ring_write(ring, 0xffffffff);
1001                 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
1002
1003                 /* flush TLB */
1004                 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1005                 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
1006                 amdgpu_ring_write(ring, req);
1007
1008                 /* wait for flush */
1009                 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1010                 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
1011                 amdgpu_ring_write(ring, 1 << vm_id);
1012                 amdgpu_ring_write(ring, 1 << vm_id);
1013         }
1014 }
1015
1016 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1017                                         struct amdgpu_irq_src *source,
1018                                         unsigned type,
1019                                         enum amdgpu_interrupt_state state)
1020 {
1021         uint32_t val = 0;
1022
1023         if (state == AMDGPU_IRQ_STATE_ENABLE)
1024                 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1025
1026         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1027                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1028         return 0;
1029 }
1030
1031 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1032                                       struct amdgpu_irq_src *source,
1033                                       struct amdgpu_iv_entry *entry)
1034 {
1035         DRM_DEBUG("IH: VCE\n");
1036
1037         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
1038                         VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
1039                         ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
1040
1041         switch (entry->src_data[0]) {
1042         case 0:
1043         case 1:
1044         case 2:
1045                 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1046                 break;
1047         default:
1048                 DRM_ERROR("Unhandled interrupt: %d %d\n",
1049                           entry->src_id, entry->src_data[0]);
1050                 break;
1051         }
1052
1053         return 0;
1054 }
1055
1056 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1057         .name = "vce_v4_0",
1058         .early_init = vce_v4_0_early_init,
1059         .late_init = NULL,
1060         .sw_init = vce_v4_0_sw_init,
1061         .sw_fini = vce_v4_0_sw_fini,
1062         .hw_init = vce_v4_0_hw_init,
1063         .hw_fini = vce_v4_0_hw_fini,
1064         .suspend = vce_v4_0_suspend,
1065         .resume = vce_v4_0_resume,
1066         .is_idle = NULL /* vce_v4_0_is_idle */,
1067         .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1068         .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1069         .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1070         .soft_reset = NULL /* vce_v4_0_soft_reset */,
1071         .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1072         .set_clockgating_state = vce_v4_0_set_clockgating_state,
1073         .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1074 };
1075
1076 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1077         .type = AMDGPU_RING_TYPE_VCE,
1078         .align_mask = 0x3f,
1079         .nop = VCE_CMD_NO_OP,
1080         .support_64bit_ptrs = false,
1081         .get_rptr = vce_v4_0_ring_get_rptr,
1082         .get_wptr = vce_v4_0_ring_get_wptr,
1083         .set_wptr = vce_v4_0_ring_set_wptr,
1084         .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1085         .emit_frame_size =
1086                 17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */
1087                 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1088                 1, /* vce_v4_0_ring_insert_end */
1089         .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1090         .emit_ib = vce_v4_0_ring_emit_ib,
1091         .emit_vm_flush = vce_v4_0_emit_vm_flush,
1092         .emit_fence = vce_v4_0_ring_emit_fence,
1093         .test_ring = amdgpu_vce_ring_test_ring,
1094         .test_ib = amdgpu_vce_ring_test_ib,
1095         .insert_nop = amdgpu_ring_insert_nop,
1096         .insert_end = vce_v4_0_ring_insert_end,
1097         .pad_ib = amdgpu_ring_generic_pad_ib,
1098         .begin_use = amdgpu_vce_ring_begin_use,
1099         .end_use = amdgpu_vce_ring_end_use,
1100 };
1101
1102 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1103 {
1104         int i;
1105
1106         for (i = 0; i < adev->vce.num_rings; i++)
1107                 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1108         DRM_INFO("VCE enabled in VM mode\n");
1109 }
1110
1111 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1112         .set = vce_v4_0_set_interrupt_state,
1113         .process = vce_v4_0_process_interrupt,
1114 };
1115
1116 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1117 {
1118         adev->vce.irq.num_types = 1;
1119         adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1120 };
1121
1122 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1123 {
1124         .type = AMD_IP_BLOCK_TYPE_VCE,
1125         .major = 4,
1126         .minor = 0,
1127         .rev = 0,
1128         .funcs = &vce_v4_0_ip_funcs,
1129 };
This page took 0.09962 seconds and 4 git commands to generate.