]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
Merge tag 'for-6.2-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v11_0.c
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v11_0.h"
34 #include "soc21.h"
35 #include "nvd.h"
36
37 #include "gc/gc_11_0_0_offset.h"
38 #include "gc/gc_11_0_0_sh_mask.h"
39 #include "smuio/smuio_13_0_6_offset.h"
40 #include "smuio/smuio_13_0_6_sh_mask.h"
41 #include "navi10_enum.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
43
44 #include "soc15.h"
45 #include "soc15d.h"
46 #include "clearstate_gfx11.h"
47 #include "v11_structs.h"
48 #include "gfx_v11_0.h"
49 #include "nbio_v4_3.h"
50 #include "mes_v11_0.h"
51
52 #define GFX11_NUM_GFX_RINGS             1
53 #define GFX11_MEC_HPD_SIZE      2048
54
55 #define RLCG_UCODE_LOADING_START_ADDRESS        0x00002000L
56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1        0x1388
57
58 #define regCGTT_WD_CLK_CTRL             0x5086
59 #define regCGTT_WD_CLK_CTRL_BASE_IDX    1
60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1   0x4e7e
61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX  1
62
63 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
64 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
65 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
68 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
69 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
70 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
72 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
73 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
74 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
76 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
78 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
80 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
81 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
82 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
83 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
84
85 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
86 {
87         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
88         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
89         SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
90         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
91         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
92         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
93         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
94         SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
95         SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
96 };
97
98 #define DEFAULT_SH_MEM_CONFIG \
99         ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
100          (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
101          (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
102
103 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
104 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
105 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
106 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
107 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
108 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
109 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
110 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
111                                  struct amdgpu_cu_info *cu_info);
112 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
113 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
114                                    u32 sh_num, u32 instance);
115 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
116
117 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
118 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
119 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
120                                      uint32_t val);
121 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
122 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
123                                            uint16_t pasid, uint32_t flush_type,
124                                            bool all_hub, uint8_t dst_sel);
125 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
126 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
127 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
128                                       bool enable);
129
130 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
131 {
132         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
133         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
134                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
135         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
136         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
137         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
138         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
139         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
140         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
141 }
142
143 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
144                                  struct amdgpu_ring *ring)
145 {
146         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
147         uint64_t wptr_addr = ring->wptr_gpu_addr;
148         uint32_t me = 0, eng_sel = 0;
149
150         switch (ring->funcs->type) {
151         case AMDGPU_RING_TYPE_COMPUTE:
152                 me = 1;
153                 eng_sel = 0;
154                 break;
155         case AMDGPU_RING_TYPE_GFX:
156                 me = 0;
157                 eng_sel = 4;
158                 break;
159         case AMDGPU_RING_TYPE_MES:
160                 me = 2;
161                 eng_sel = 5;
162                 break;
163         default:
164                 WARN_ON(1);
165         }
166
167         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
168         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
169         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
170                           PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
171                           PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
172                           PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
173                           PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
174                           PACKET3_MAP_QUEUES_ME((me)) |
175                           PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
176                           PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
177                           PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
178                           PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
179         amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
180         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
181         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
182         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
183         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
184 }
185
186 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
187                                    struct amdgpu_ring *ring,
188                                    enum amdgpu_unmap_queues_action action,
189                                    u64 gpu_addr, u64 seq)
190 {
191         struct amdgpu_device *adev = kiq_ring->adev;
192         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
193
194         if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
195                 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
196                 return;
197         }
198
199         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
200         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
201                           PACKET3_UNMAP_QUEUES_ACTION(action) |
202                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
203                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
204                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
205         amdgpu_ring_write(kiq_ring,
206                   PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
207
208         if (action == PREEMPT_QUEUES_NO_UNMAP) {
209                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
210                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
211                 amdgpu_ring_write(kiq_ring, seq);
212         } else {
213                 amdgpu_ring_write(kiq_ring, 0);
214                 amdgpu_ring_write(kiq_ring, 0);
215                 amdgpu_ring_write(kiq_ring, 0);
216         }
217 }
218
219 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
220                                    struct amdgpu_ring *ring,
221                                    u64 addr,
222                                    u64 seq)
223 {
224         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
225
226         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
227         amdgpu_ring_write(kiq_ring,
228                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
229                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
230                           PACKET3_QUERY_STATUS_COMMAND(2));
231         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
232                           PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
233                           PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
234         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
235         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
236         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
237         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
238 }
239
240 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
241                                 uint16_t pasid, uint32_t flush_type,
242                                 bool all_hub)
243 {
244         gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
245 }
246
247 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
248         .kiq_set_resources = gfx11_kiq_set_resources,
249         .kiq_map_queues = gfx11_kiq_map_queues,
250         .kiq_unmap_queues = gfx11_kiq_unmap_queues,
251         .kiq_query_status = gfx11_kiq_query_status,
252         .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
253         .set_resources_size = 8,
254         .map_queues_size = 7,
255         .unmap_queues_size = 6,
256         .query_status_size = 7,
257         .invalidate_tlbs_size = 2,
258 };
259
260 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
261 {
262         adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs;
263 }
264
265 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
266 {
267         switch (adev->ip_versions[GC_HWIP][0]) {
268         case IP_VERSION(11, 0, 1):
269         case IP_VERSION(11, 0, 4):
270                 soc15_program_register_sequence(adev,
271                                                 golden_settings_gc_11_0_1,
272                                                 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
273                 break;
274         default:
275                 break;
276         }
277 }
278
279 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
280                                        bool wc, uint32_t reg, uint32_t val)
281 {
282         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
283         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
284                           WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
285         amdgpu_ring_write(ring, reg);
286         amdgpu_ring_write(ring, 0);
287         amdgpu_ring_write(ring, val);
288 }
289
290 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
291                                   int mem_space, int opt, uint32_t addr0,
292                                   uint32_t addr1, uint32_t ref, uint32_t mask,
293                                   uint32_t inv)
294 {
295         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
296         amdgpu_ring_write(ring,
297                           /* memory (1) or register (0) */
298                           (WAIT_REG_MEM_MEM_SPACE(mem_space) |
299                            WAIT_REG_MEM_OPERATION(opt) | /* wait */
300                            WAIT_REG_MEM_FUNCTION(3) |  /* equal */
301                            WAIT_REG_MEM_ENGINE(eng_sel)));
302
303         if (mem_space)
304                 BUG_ON(addr0 & 0x3); /* Dword align */
305         amdgpu_ring_write(ring, addr0);
306         amdgpu_ring_write(ring, addr1);
307         amdgpu_ring_write(ring, ref);
308         amdgpu_ring_write(ring, mask);
309         amdgpu_ring_write(ring, inv); /* poll interval */
310 }
311
312 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
313 {
314         struct amdgpu_device *adev = ring->adev;
315         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
316         uint32_t tmp = 0;
317         unsigned i;
318         int r;
319
320         WREG32(scratch, 0xCAFEDEAD);
321         r = amdgpu_ring_alloc(ring, 5);
322         if (r) {
323                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
324                           ring->idx, r);
325                 return r;
326         }
327
328         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
329                 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
330         } else {
331                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
332                 amdgpu_ring_write(ring, scratch -
333                                   PACKET3_SET_UCONFIG_REG_START);
334                 amdgpu_ring_write(ring, 0xDEADBEEF);
335         }
336         amdgpu_ring_commit(ring);
337
338         for (i = 0; i < adev->usec_timeout; i++) {
339                 tmp = RREG32(scratch);
340                 if (tmp == 0xDEADBEEF)
341                         break;
342                 if (amdgpu_emu_mode == 1)
343                         msleep(1);
344                 else
345                         udelay(1);
346         }
347
348         if (i >= adev->usec_timeout)
349                 r = -ETIMEDOUT;
350         return r;
351 }
352
353 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
354 {
355         struct amdgpu_device *adev = ring->adev;
356         struct amdgpu_ib ib;
357         struct dma_fence *f = NULL;
358         unsigned index;
359         uint64_t gpu_addr;
360         volatile uint32_t *cpu_ptr;
361         long r;
362
363         /* MES KIQ fw hasn't indirect buffer support for now */
364         if (adev->enable_mes_kiq &&
365             ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
366                 return 0;
367
368         memset(&ib, 0, sizeof(ib));
369
370         if (ring->is_mes_queue) {
371                 uint32_t padding, offset;
372
373                 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
374                 padding = amdgpu_mes_ctx_get_offs(ring,
375                                                   AMDGPU_MES_CTX_PADDING_OFFS);
376
377                 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
378                 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
379
380                 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
381                 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
382                 *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
383         } else {
384                 r = amdgpu_device_wb_get(adev, &index);
385                 if (r)
386                         return r;
387
388                 gpu_addr = adev->wb.gpu_addr + (index * 4);
389                 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
390                 cpu_ptr = &adev->wb.wb[index];
391
392                 r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
393                 if (r) {
394                         DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
395                         goto err1;
396                 }
397         }
398
399         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
400         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
401         ib.ptr[2] = lower_32_bits(gpu_addr);
402         ib.ptr[3] = upper_32_bits(gpu_addr);
403         ib.ptr[4] = 0xDEADBEEF;
404         ib.length_dw = 5;
405
406         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
407         if (r)
408                 goto err2;
409
410         r = dma_fence_wait_timeout(f, false, timeout);
411         if (r == 0) {
412                 r = -ETIMEDOUT;
413                 goto err2;
414         } else if (r < 0) {
415                 goto err2;
416         }
417
418         if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
419                 r = 0;
420         else
421                 r = -EINVAL;
422 err2:
423         if (!ring->is_mes_queue)
424                 amdgpu_ib_free(adev, &ib, NULL);
425         dma_fence_put(f);
426 err1:
427         if (!ring->is_mes_queue)
428                 amdgpu_device_wb_free(adev, index);
429         return r;
430 }
431
432 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
433 {
434         release_firmware(adev->gfx.pfp_fw);
435         adev->gfx.pfp_fw = NULL;
436         release_firmware(adev->gfx.me_fw);
437         adev->gfx.me_fw = NULL;
438         release_firmware(adev->gfx.rlc_fw);
439         adev->gfx.rlc_fw = NULL;
440         release_firmware(adev->gfx.mec_fw);
441         adev->gfx.mec_fw = NULL;
442
443         kfree(adev->gfx.rlc.register_list_format);
444 }
445
446 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
447 {
448         char fw_name[40];
449         char ucode_prefix[30];
450         int err;
451         const struct rlc_firmware_header_v2_0 *rlc_hdr;
452         uint16_t version_major;
453         uint16_t version_minor;
454
455         DRM_DEBUG("\n");
456
457         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
458
459         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
460         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
461         if (err)
462                 goto out;
463         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
464         if (err)
465                 goto out;
466         /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
467         adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
468                                 (union amdgpu_firmware_header *)
469                                 adev->gfx.pfp_fw->data, 2, 0);
470         if (adev->gfx.rs64_enable) {
471                 dev_info(adev->dev, "CP RS64 enable\n");
472                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
473                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
474                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
475         } else {
476                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
477         }
478
479         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
480         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
481         if (err)
482                 goto out;
483         err = amdgpu_ucode_validate(adev->gfx.me_fw);
484         if (err)
485                 goto out;
486         if (adev->gfx.rs64_enable) {
487                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
488                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
489                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
490         } else {
491                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
492         }
493
494         if (!amdgpu_sriov_vf(adev)) {
495                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
496                 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
497                 if (err)
498                         goto out;
499                 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
500                 if (err)
501                         goto out;
502                 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
503                 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
504                 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
505                 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
506                 if (err)
507                         goto out;
508         }
509
510         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
511         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
512         if (err)
513                 goto out;
514         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
515         if (err)
516                 goto out;
517         if (adev->gfx.rs64_enable) {
518                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
519                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
520                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
521                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
522                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
523         } else {
524                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
525                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
526         }
527
528         /* only one MEC for gfx 11.0.0. */
529         adev->gfx.mec2_fw = NULL;
530
531 out:
532         if (err) {
533                 dev_err(adev->dev,
534                         "gfx11: Failed to init firmware \"%s\"\n",
535                         fw_name);
536                 release_firmware(adev->gfx.pfp_fw);
537                 adev->gfx.pfp_fw = NULL;
538                 release_firmware(adev->gfx.me_fw);
539                 adev->gfx.me_fw = NULL;
540                 release_firmware(adev->gfx.rlc_fw);
541                 adev->gfx.rlc_fw = NULL;
542                 release_firmware(adev->gfx.mec_fw);
543                 adev->gfx.mec_fw = NULL;
544         }
545
546         return err;
547 }
548
549 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev)
550 {
551         const struct psp_firmware_header_v1_0 *toc_hdr;
552         int err = 0;
553         char fw_name[40];
554         char ucode_prefix[30];
555
556         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
557
558         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
559         err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
560         if (err)
561                 goto out;
562
563         err = amdgpu_ucode_validate(adev->psp.toc_fw);
564         if (err)
565                 goto out;
566
567         toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
568         adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
569         adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
570         adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
571         adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
572                                 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
573         return 0;
574 out:
575         dev_err(adev->dev, "Failed to load TOC microcode\n");
576         release_firmware(adev->psp.toc_fw);
577         adev->psp.toc_fw = NULL;
578         return err;
579 }
580
581 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
582 {
583         u32 count = 0;
584         const struct cs_section_def *sect = NULL;
585         const struct cs_extent_def *ext = NULL;
586
587         /* begin clear state */
588         count += 2;
589         /* context control state */
590         count += 3;
591
592         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
593                 for (ext = sect->section; ext->extent != NULL; ++ext) {
594                         if (sect->id == SECT_CONTEXT)
595                                 count += 2 + ext->reg_count;
596                         else
597                                 return 0;
598                 }
599         }
600
601         /* set PA_SC_TILE_STEERING_OVERRIDE */
602         count += 3;
603         /* end clear state */
604         count += 2;
605         /* clear state */
606         count += 2;
607
608         return count;
609 }
610
611 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
612                                     volatile u32 *buffer)
613 {
614         u32 count = 0, i;
615         const struct cs_section_def *sect = NULL;
616         const struct cs_extent_def *ext = NULL;
617         int ctx_reg_offset;
618
619         if (adev->gfx.rlc.cs_data == NULL)
620                 return;
621         if (buffer == NULL)
622                 return;
623
624         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
625         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
626
627         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
628         buffer[count++] = cpu_to_le32(0x80000000);
629         buffer[count++] = cpu_to_le32(0x80000000);
630
631         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
632                 for (ext = sect->section; ext->extent != NULL; ++ext) {
633                         if (sect->id == SECT_CONTEXT) {
634                                 buffer[count++] =
635                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
636                                 buffer[count++] = cpu_to_le32(ext->reg_index -
637                                                 PACKET3_SET_CONTEXT_REG_START);
638                                 for (i = 0; i < ext->reg_count; i++)
639                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
640                         } else {
641                                 return;
642                         }
643                 }
644         }
645
646         ctx_reg_offset =
647                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
648         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
649         buffer[count++] = cpu_to_le32(ctx_reg_offset);
650         buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
651
652         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
653         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
654
655         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
656         buffer[count++] = cpu_to_le32(0);
657 }
658
659 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
660 {
661         /* clear state block */
662         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
663                         &adev->gfx.rlc.clear_state_gpu_addr,
664                         (void **)&adev->gfx.rlc.cs_ptr);
665
666         /* jump table block */
667         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
668                         &adev->gfx.rlc.cp_table_gpu_addr,
669                         (void **)&adev->gfx.rlc.cp_table_ptr);
670 }
671
672 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
673 {
674         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
675
676         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
677         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
678         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
679         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
680         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
681         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
682         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
683         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
684         adev->gfx.rlc.rlcg_reg_access_supported = true;
685 }
686
687 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
688 {
689         const struct cs_section_def *cs_data;
690         int r;
691
692         adev->gfx.rlc.cs_data = gfx11_cs_data;
693
694         cs_data = adev->gfx.rlc.cs_data;
695
696         if (cs_data) {
697                 /* init clear state block */
698                 r = amdgpu_gfx_rlc_init_csb(adev);
699                 if (r)
700                         return r;
701         }
702
703         /* init spm vmid with 0xf */
704         if (adev->gfx.rlc.funcs->update_spm_vmid)
705                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
706
707         return 0;
708 }
709
710 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
711 {
712         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
713         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
714         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
715 }
716
717 static int gfx_v11_0_me_init(struct amdgpu_device *adev)
718 {
719         int r;
720
721         bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
722
723         amdgpu_gfx_graphics_queue_acquire(adev);
724
725         r = gfx_v11_0_init_microcode(adev);
726         if (r)
727                 DRM_ERROR("Failed to load gfx firmware!\n");
728
729         return r;
730 }
731
732 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
733 {
734         int r;
735         u32 *hpd;
736         size_t mec_hpd_size;
737
738         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
739
740         /* take ownership of the relevant compute queues */
741         amdgpu_gfx_compute_queue_acquire(adev);
742         mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
743
744         if (mec_hpd_size) {
745                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
746                                               AMDGPU_GEM_DOMAIN_GTT,
747                                               &adev->gfx.mec.hpd_eop_obj,
748                                               &adev->gfx.mec.hpd_eop_gpu_addr,
749                                               (void **)&hpd);
750                 if (r) {
751                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
752                         gfx_v11_0_mec_fini(adev);
753                         return r;
754                 }
755
756                 memset(hpd, 0, mec_hpd_size);
757
758                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
759                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
760         }
761
762         return 0;
763 }
764
765 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
766 {
767         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
768                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
769                 (address << SQ_IND_INDEX__INDEX__SHIFT));
770         return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
771 }
772
773 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
774                            uint32_t thread, uint32_t regno,
775                            uint32_t num, uint32_t *out)
776 {
777         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
778                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
779                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
780                 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
781                 (SQ_IND_INDEX__AUTO_INCR_MASK));
782         while (num--)
783                 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
784 }
785
786 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
787 {
788         /* in gfx11 the SIMD_ID is specified as part of the INSTANCE
789          * field when performing a select_se_sh so it should be
790          * zero here */
791         WARN_ON(simd != 0);
792
793         /* type 2 wave data */
794         dst[(*no_fields)++] = 2;
795         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
796         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
797         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
798         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
799         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
800         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
801         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
802         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
803         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
804         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
805         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
806         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
807         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
808         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
809         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
810 }
811
812 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
813                                      uint32_t wave, uint32_t start,
814                                      uint32_t size, uint32_t *dst)
815 {
816         WARN_ON(simd != 0);
817
818         wave_read_regs(
819                 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
820                 dst);
821 }
822
823 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
824                                       uint32_t wave, uint32_t thread,
825                                       uint32_t start, uint32_t size,
826                                       uint32_t *dst)
827 {
828         wave_read_regs(
829                 adev, wave, thread,
830                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
831 }
832
833 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
834                                                                           u32 me, u32 pipe, u32 q, u32 vm)
835 {
836         soc21_grbm_select(adev, me, pipe, q, vm);
837 }
838
839 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
840         .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
841         .select_se_sh = &gfx_v11_0_select_se_sh,
842         .read_wave_data = &gfx_v11_0_read_wave_data,
843         .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
844         .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
845         .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
846         .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
847 };
848
849 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
850 {
851
852         switch (adev->ip_versions[GC_HWIP][0]) {
853         case IP_VERSION(11, 0, 0):
854         case IP_VERSION(11, 0, 2):
855         case IP_VERSION(11, 0, 3):
856                 adev->gfx.config.max_hw_contexts = 8;
857                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
858                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
859                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
860                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
861                 break;
862         case IP_VERSION(11, 0, 1):
863         case IP_VERSION(11, 0, 4):
864                 adev->gfx.config.max_hw_contexts = 8;
865                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
866                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
867                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
868                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
869                 break;
870         default:
871                 BUG();
872                 break;
873         }
874
875         return 0;
876 }
877
878 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
879                                    int me, int pipe, int queue)
880 {
881         int r;
882         struct amdgpu_ring *ring;
883         unsigned int irq_type;
884
885         ring = &adev->gfx.gfx_ring[ring_id];
886
887         ring->me = me;
888         ring->pipe = pipe;
889         ring->queue = queue;
890
891         ring->ring_obj = NULL;
892         ring->use_doorbell = true;
893
894         if (!ring_id)
895                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
896         else
897                 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
898         sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
899
900         irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
901         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
902                              AMDGPU_RING_PRIO_DEFAULT, NULL);
903         if (r)
904                 return r;
905         return 0;
906 }
907
908 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
909                                        int mec, int pipe, int queue)
910 {
911         int r;
912         unsigned irq_type;
913         struct amdgpu_ring *ring;
914         unsigned int hw_prio;
915
916         ring = &adev->gfx.compute_ring[ring_id];
917
918         /* mec0 is me1 */
919         ring->me = mec + 1;
920         ring->pipe = pipe;
921         ring->queue = queue;
922
923         ring->ring_obj = NULL;
924         ring->use_doorbell = true;
925         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
926         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
927                                 + (ring_id * GFX11_MEC_HPD_SIZE);
928         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
929
930         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
931                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
932                 + ring->pipe;
933         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
934                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
935         /* type-2 packets are deprecated on MEC, use type-3 instead */
936         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
937                              hw_prio, NULL);
938         if (r)
939                 return r;
940
941         return 0;
942 }
943
944 static struct {
945         SOC21_FIRMWARE_ID       id;
946         unsigned int            offset;
947         unsigned int            size;
948 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
949
950 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
951 {
952         RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
953
954         while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
955                         (ucode->id < SOC21_FIRMWARE_ID_MAX)) {
956                 rlc_autoload_info[ucode->id].id = ucode->id;
957                 rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
958                 rlc_autoload_info[ucode->id].size = ucode->size * 4;
959
960                 ucode++;
961         }
962 }
963
964 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
965 {
966         uint32_t total_size = 0;
967         SOC21_FIRMWARE_ID id;
968
969         gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
970
971         for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
972                 total_size += rlc_autoload_info[id].size;
973
974         /* In case the offset in rlc toc ucode is aligned */
975         if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
976                 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
977                         rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
978
979         return total_size;
980 }
981
982 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
983 {
984         int r;
985         uint32_t total_size;
986
987         total_size = gfx_v11_0_calc_toc_total_size(adev);
988
989         r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
990                         AMDGPU_GEM_DOMAIN_VRAM,
991                         &adev->gfx.rlc.rlc_autoload_bo,
992                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
993                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
994
995         if (r) {
996                 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
997                 return r;
998         }
999
1000         return 0;
1001 }
1002
1003 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1004                                               SOC21_FIRMWARE_ID id,
1005                                               const void *fw_data,
1006                                               uint32_t fw_size,
1007                                               uint32_t *fw_autoload_mask)
1008 {
1009         uint32_t toc_offset;
1010         uint32_t toc_fw_size;
1011         char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1012
1013         if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1014                 return;
1015
1016         toc_offset = rlc_autoload_info[id].offset;
1017         toc_fw_size = rlc_autoload_info[id].size;
1018
1019         if (fw_size == 0)
1020                 fw_size = toc_fw_size;
1021
1022         if (fw_size > toc_fw_size)
1023                 fw_size = toc_fw_size;
1024
1025         memcpy(ptr + toc_offset, fw_data, fw_size);
1026
1027         if (fw_size < toc_fw_size)
1028                 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1029
1030         if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1031                 *(uint64_t *)fw_autoload_mask |= 1ULL << id;
1032 }
1033
1034 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1035                                                         uint32_t *fw_autoload_mask)
1036 {
1037         void *data;
1038         uint32_t size;
1039         uint64_t *toc_ptr;
1040
1041         *(uint64_t *)fw_autoload_mask |= 0x1;
1042
1043         DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1044
1045         data = adev->psp.toc.start_addr;
1046         size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1047
1048         toc_ptr = (uint64_t *)data + size / 8 - 1;
1049         *toc_ptr = *(uint64_t *)fw_autoload_mask;
1050
1051         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1052                                         data, size, fw_autoload_mask);
1053 }
1054
1055 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1056                                                         uint32_t *fw_autoload_mask)
1057 {
1058         const __le32 *fw_data;
1059         uint32_t fw_size;
1060         const struct gfx_firmware_header_v1_0 *cp_hdr;
1061         const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1062         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1063         const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1064         uint16_t version_major, version_minor;
1065
1066         if (adev->gfx.rs64_enable) {
1067                 /* pfp ucode */
1068                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1069                         adev->gfx.pfp_fw->data;
1070                 /* instruction */
1071                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1072                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1073                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1074                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1075                                                 fw_data, fw_size, fw_autoload_mask);
1076                 /* data */
1077                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1078                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1079                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1080                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1081                                                 fw_data, fw_size, fw_autoload_mask);
1082                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1083                                                 fw_data, fw_size, fw_autoload_mask);
1084                 /* me ucode */
1085                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1086                         adev->gfx.me_fw->data;
1087                 /* instruction */
1088                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1089                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1090                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1091                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1092                                                 fw_data, fw_size, fw_autoload_mask);
1093                 /* data */
1094                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1095                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1096                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1097                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1098                                                 fw_data, fw_size, fw_autoload_mask);
1099                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1100                                                 fw_data, fw_size, fw_autoload_mask);
1101                 /* mec ucode */
1102                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1103                         adev->gfx.mec_fw->data;
1104                 /* instruction */
1105                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1106                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1107                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1108                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1109                                                 fw_data, fw_size, fw_autoload_mask);
1110                 /* data */
1111                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1112                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1113                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1114                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1115                                                 fw_data, fw_size, fw_autoload_mask);
1116                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1117                                                 fw_data, fw_size, fw_autoload_mask);
1118                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1119                                                 fw_data, fw_size, fw_autoload_mask);
1120                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1121                                                 fw_data, fw_size, fw_autoload_mask);
1122         } else {
1123                 /* pfp ucode */
1124                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1125                         adev->gfx.pfp_fw->data;
1126                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1127                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1128                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1129                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1130                                                 fw_data, fw_size, fw_autoload_mask);
1131
1132                 /* me ucode */
1133                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1134                         adev->gfx.me_fw->data;
1135                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1136                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1137                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1138                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1139                                                 fw_data, fw_size, fw_autoload_mask);
1140
1141                 /* mec ucode */
1142                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1143                         adev->gfx.mec_fw->data;
1144                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1145                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1146                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1147                         cp_hdr->jt_size * 4;
1148                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1149                                                 fw_data, fw_size, fw_autoload_mask);
1150         }
1151
1152         /* rlc ucode */
1153         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1154                 adev->gfx.rlc_fw->data;
1155         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1156                         le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1157         fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1158         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1159                                         fw_data, fw_size, fw_autoload_mask);
1160
1161         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1162         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1163         if (version_major == 2) {
1164                 if (version_minor >= 2) {
1165                         rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1166
1167                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1168                                         le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1169                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1170                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1171                                         fw_data, fw_size, fw_autoload_mask);
1172
1173                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1174                                         le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1175                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1176                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1177                                         fw_data, fw_size, fw_autoload_mask);
1178                 }
1179         }
1180 }
1181
1182 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1183                                                         uint32_t *fw_autoload_mask)
1184 {
1185         const __le32 *fw_data;
1186         uint32_t fw_size;
1187         const struct sdma_firmware_header_v2_0 *sdma_hdr;
1188
1189         sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1190                 adev->sdma.instance[0].fw->data;
1191         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1192                         le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1193         fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1194
1195         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1196                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1197
1198         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1199                         le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1200         fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1201
1202         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1203                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1204 }
1205
1206 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1207                                                         uint32_t *fw_autoload_mask)
1208 {
1209         const __le32 *fw_data;
1210         unsigned fw_size;
1211         const struct mes_firmware_header_v1_0 *mes_hdr;
1212         int pipe, ucode_id, data_id;
1213
1214         for (pipe = 0; pipe < 2; pipe++) {
1215                 if (pipe==0) {
1216                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1217                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1218                 } else {
1219                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1220                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1221                 }
1222
1223                 mes_hdr = (const struct mes_firmware_header_v1_0 *)
1224                         adev->mes.fw[pipe]->data;
1225
1226                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1227                                 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1228                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1229
1230                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1231                                 ucode_id, fw_data, fw_size, fw_autoload_mask);
1232
1233                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1234                                 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1235                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1236
1237                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1238                                 data_id, fw_data, fw_size, fw_autoload_mask);
1239         }
1240 }
1241
1242 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1243 {
1244         uint32_t rlc_g_offset, rlc_g_size;
1245         uint64_t gpu_addr;
1246         uint32_t autoload_fw_id[2];
1247
1248         memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1249
1250         /* RLC autoload sequence 2: copy ucode */
1251         gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1252         gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1253         gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1254         gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1255
1256         rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1257         rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1258         gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1259
1260         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1261         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1262
1263         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1264
1265         /* RLC autoload sequence 3: load IMU fw */
1266         if (adev->gfx.imu.funcs->load_microcode)
1267                 adev->gfx.imu.funcs->load_microcode(adev);
1268         /* RLC autoload sequence 4 init IMU fw */
1269         if (adev->gfx.imu.funcs->setup_imu)
1270                 adev->gfx.imu.funcs->setup_imu(adev);
1271         if (adev->gfx.imu.funcs->start_imu)
1272                 adev->gfx.imu.funcs->start_imu(adev);
1273
1274         /* RLC autoload sequence 5 disable gpa mode */
1275         gfx_v11_0_disable_gpa_mode(adev);
1276
1277         return 0;
1278 }
1279
1280 static int gfx_v11_0_sw_init(void *handle)
1281 {
1282         int i, j, k, r, ring_id = 0;
1283         struct amdgpu_kiq *kiq;
1284         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1285
1286         adev->gfxhub.funcs->init(adev);
1287
1288         switch (adev->ip_versions[GC_HWIP][0]) {
1289         case IP_VERSION(11, 0, 0):
1290         case IP_VERSION(11, 0, 2):
1291         case IP_VERSION(11, 0, 3):
1292                 adev->gfx.me.num_me = 1;
1293                 adev->gfx.me.num_pipe_per_me = 1;
1294                 adev->gfx.me.num_queue_per_pipe = 1;
1295                 adev->gfx.mec.num_mec = 2;
1296                 adev->gfx.mec.num_pipe_per_mec = 4;
1297                 adev->gfx.mec.num_queue_per_pipe = 4;
1298                 break;
1299         case IP_VERSION(11, 0, 1):
1300         case IP_VERSION(11, 0, 4):
1301                 adev->gfx.me.num_me = 1;
1302                 adev->gfx.me.num_pipe_per_me = 1;
1303                 adev->gfx.me.num_queue_per_pipe = 1;
1304                 adev->gfx.mec.num_mec = 1;
1305                 adev->gfx.mec.num_pipe_per_mec = 4;
1306                 adev->gfx.mec.num_queue_per_pipe = 4;
1307                 break;
1308         default:
1309                 adev->gfx.me.num_me = 1;
1310                 adev->gfx.me.num_pipe_per_me = 1;
1311                 adev->gfx.me.num_queue_per_pipe = 1;
1312                 adev->gfx.mec.num_mec = 1;
1313                 adev->gfx.mec.num_pipe_per_mec = 4;
1314                 adev->gfx.mec.num_queue_per_pipe = 8;
1315                 break;
1316         }
1317
1318         /* EOP Event */
1319         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1320                               GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1321                               &adev->gfx.eop_irq);
1322         if (r)
1323                 return r;
1324
1325         /* Privileged reg */
1326         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1327                               GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1328                               &adev->gfx.priv_reg_irq);
1329         if (r)
1330                 return r;
1331
1332         /* Privileged inst */
1333         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1334                               GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1335                               &adev->gfx.priv_inst_irq);
1336         if (r)
1337                 return r;
1338
1339         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1340
1341         if (adev->gfx.imu.funcs) {
1342                 if (adev->gfx.imu.funcs->init_microcode) {
1343                         r = adev->gfx.imu.funcs->init_microcode(adev);
1344                         if (r)
1345                                 DRM_ERROR("Failed to load imu firmware!\n");
1346                 }
1347         }
1348
1349         r = gfx_v11_0_me_init(adev);
1350         if (r)
1351                 return r;
1352
1353         r = gfx_v11_0_rlc_init(adev);
1354         if (r) {
1355                 DRM_ERROR("Failed to init rlc BOs!\n");
1356                 return r;
1357         }
1358
1359         r = gfx_v11_0_mec_init(adev);
1360         if (r) {
1361                 DRM_ERROR("Failed to init MEC BOs!\n");
1362                 return r;
1363         }
1364
1365         /* set up the gfx ring */
1366         for (i = 0; i < adev->gfx.me.num_me; i++) {
1367                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1368                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1369                                 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1370                                         continue;
1371
1372                                 r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1373                                                             i, k, j);
1374                                 if (r)
1375                                         return r;
1376                                 ring_id++;
1377                         }
1378                 }
1379         }
1380
1381         ring_id = 0;
1382         /* set up the compute queues - allocate horizontally across pipes */
1383         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1384                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1385                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1386                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
1387                                                                      j))
1388                                         continue;
1389
1390                                 r = gfx_v11_0_compute_ring_init(adev, ring_id,
1391                                                                 i, k, j);
1392                                 if (r)
1393                                         return r;
1394
1395                                 ring_id++;
1396                         }
1397                 }
1398         }
1399
1400         if (!adev->enable_mes_kiq) {
1401                 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE);
1402                 if (r) {
1403                         DRM_ERROR("Failed to init KIQ BOs!\n");
1404                         return r;
1405                 }
1406
1407                 kiq = &adev->gfx.kiq;
1408                 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1409                 if (r)
1410                         return r;
1411         }
1412
1413         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd));
1414         if (r)
1415                 return r;
1416
1417         /* allocate visible FB for rlc auto-loading fw */
1418         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1419                 r = gfx_v11_0_init_toc_microcode(adev);
1420                 if (r)
1421                         dev_err(adev->dev, "Failed to load toc firmware!\n");
1422                 r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1423                 if (r)
1424                         return r;
1425         }
1426
1427         r = gfx_v11_0_gpu_early_init(adev);
1428         if (r)
1429                 return r;
1430
1431         return 0;
1432 }
1433
1434 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1435 {
1436         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1437                               &adev->gfx.pfp.pfp_fw_gpu_addr,
1438                               (void **)&adev->gfx.pfp.pfp_fw_ptr);
1439
1440         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1441                               &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1442                               (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1443 }
1444
1445 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1446 {
1447         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1448                               &adev->gfx.me.me_fw_gpu_addr,
1449                               (void **)&adev->gfx.me.me_fw_ptr);
1450
1451         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1452                                &adev->gfx.me.me_fw_data_gpu_addr,
1453                                (void **)&adev->gfx.me.me_fw_data_ptr);
1454 }
1455
1456 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1457 {
1458         amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1459                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
1460                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1461 }
1462
1463 static int gfx_v11_0_sw_fini(void *handle)
1464 {
1465         int i;
1466         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1467
1468         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1469                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1470         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1471                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1472
1473         amdgpu_gfx_mqd_sw_fini(adev);
1474
1475         if (!adev->enable_mes_kiq) {
1476                 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
1477                 amdgpu_gfx_kiq_fini(adev);
1478         }
1479
1480         gfx_v11_0_pfp_fini(adev);
1481         gfx_v11_0_me_fini(adev);
1482         gfx_v11_0_rlc_fini(adev);
1483         gfx_v11_0_mec_fini(adev);
1484
1485         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1486                 gfx_v11_0_rlc_autoload_buffer_fini(adev);
1487
1488         gfx_v11_0_free_microcode(adev);
1489
1490         return 0;
1491 }
1492
1493 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1494                                    u32 sh_num, u32 instance)
1495 {
1496         u32 data;
1497
1498         if (instance == 0xffffffff)
1499                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1500                                      INSTANCE_BROADCAST_WRITES, 1);
1501         else
1502                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1503                                      instance);
1504
1505         if (se_num == 0xffffffff)
1506                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1507                                      1);
1508         else
1509                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1510
1511         if (sh_num == 0xffffffff)
1512                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1513                                      1);
1514         else
1515                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1516
1517         WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1518 }
1519
1520 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1521 {
1522         u32 data, mask;
1523
1524         data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1525         data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1526
1527         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1528         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1529
1530         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1531                                          adev->gfx.config.max_sh_per_se);
1532
1533         return (~data) & mask;
1534 }
1535
1536 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1537 {
1538         int i, j;
1539         u32 data;
1540         u32 active_rbs = 0;
1541         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1542                                         adev->gfx.config.max_sh_per_se;
1543
1544         mutex_lock(&adev->grbm_idx_mutex);
1545         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1546                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1547                         gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
1548                         data = gfx_v11_0_get_rb_active_bitmap(adev);
1549                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1550                                                rb_bitmap_width_per_sh);
1551                 }
1552         }
1553         gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1554         mutex_unlock(&adev->grbm_idx_mutex);
1555
1556         adev->gfx.config.backend_enable_mask = active_rbs;
1557         adev->gfx.config.num_rbs = hweight32(active_rbs);
1558 }
1559
1560 #define DEFAULT_SH_MEM_BASES    (0x6000)
1561 #define LDS_APP_BASE           0x1
1562 #define SCRATCH_APP_BASE       0x2
1563
1564 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1565 {
1566         int i;
1567         uint32_t sh_mem_bases;
1568         uint32_t data;
1569
1570         /*
1571          * Configure apertures:
1572          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1573          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1574          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1575          */
1576         sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1577                         SCRATCH_APP_BASE;
1578
1579         mutex_lock(&adev->srbm_mutex);
1580         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1581                 soc21_grbm_select(adev, 0, 0, 0, i);
1582                 /* CP and shaders */
1583                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1584                 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1585
1586                 /* Enable trap for each kfd vmid. */
1587                 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1588                 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1589         }
1590         soc21_grbm_select(adev, 0, 0, 0, 0);
1591         mutex_unlock(&adev->srbm_mutex);
1592
1593         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1594            acccess. These should be enabled by FW for target VMIDs. */
1595         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1596                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1597                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1598                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1599                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1600         }
1601 }
1602
1603 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1604 {
1605         int vmid;
1606
1607         /*
1608          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1609          * access. Compute VMIDs should be enabled by FW for target VMIDs,
1610          * the driver can enable them for graphics. VMID0 should maintain
1611          * access so that HWS firmware can save/restore entries.
1612          */
1613         for (vmid = 1; vmid < 16; vmid++) {
1614                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1615                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1616                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1617                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1618         }
1619 }
1620
1621 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1622 {
1623         /* TODO: harvest feature to be added later. */
1624 }
1625
1626 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
1627 {
1628         /* TCCs are global (not instanced). */
1629         uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
1630                                RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
1631
1632         adev->gfx.config.tcc_disabled_mask =
1633                 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
1634                 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
1635 }
1636
1637 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
1638 {
1639         u32 tmp;
1640         int i;
1641
1642         if (!amdgpu_sriov_vf(adev))
1643                 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1644
1645         gfx_v11_0_setup_rb(adev);
1646         gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
1647         gfx_v11_0_get_tcc_info(adev);
1648         adev->gfx.config.pa_sc_tile_steering_override = 0;
1649
1650         /* XXX SH_MEM regs */
1651         /* where to put LDS, scratch, GPUVM in FSA64 space */
1652         mutex_lock(&adev->srbm_mutex);
1653         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
1654                 soc21_grbm_select(adev, 0, 0, 0, i);
1655                 /* CP and shaders */
1656                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1657                 if (i != 0) {
1658                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1659                                 (adev->gmc.private_aperture_start >> 48));
1660                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1661                                 (adev->gmc.shared_aperture_start >> 48));
1662                         WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1663                 }
1664         }
1665         soc21_grbm_select(adev, 0, 0, 0, 0);
1666
1667         mutex_unlock(&adev->srbm_mutex);
1668
1669         gfx_v11_0_init_compute_vmid(adev);
1670         gfx_v11_0_init_gds_vmid(adev);
1671 }
1672
1673 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1674                                                bool enable)
1675 {
1676         u32 tmp;
1677
1678         if (amdgpu_sriov_vf(adev))
1679                 return;
1680
1681         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
1682
1683         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1684                             enable ? 1 : 0);
1685         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1686                             enable ? 1 : 0);
1687         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1688                             enable ? 1 : 0);
1689         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1690                             enable ? 1 : 0);
1691
1692         WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
1693 }
1694
1695 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
1696 {
1697         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1698
1699         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1700                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
1701         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1702                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1703         WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1704
1705         return 0;
1706 }
1707
1708 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
1709 {
1710         u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1711
1712         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1713         WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1714 }
1715
1716 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
1717 {
1718         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1719         udelay(50);
1720         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1721         udelay(50);
1722 }
1723
1724 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1725                                              bool enable)
1726 {
1727         uint32_t rlc_pg_cntl;
1728
1729         rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1730
1731         if (!enable) {
1732                 /* RLC_PG_CNTL[23] = 0 (default)
1733                  * RLC will wait for handshake acks with SMU
1734                  * GFXOFF will be enabled
1735                  * RLC_PG_CNTL[23] = 1
1736                  * RLC will not issue any message to SMU
1737                  * hence no handshake between SMU & RLC
1738                  * GFXOFF will be disabled
1739                  */
1740                 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1741         } else
1742                 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1743         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1744 }
1745
1746 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
1747 {
1748         /* TODO: enable rlc & smu handshake until smu
1749          * and gfxoff feature works as expected */
1750         if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1751                 gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
1752
1753         WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1754         udelay(50);
1755 }
1756
1757 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
1758 {
1759         uint32_t tmp;
1760
1761         /* enable Save Restore Machine */
1762         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1763         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1764         tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1765         WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1766 }
1767
1768 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
1769 {
1770         const struct rlc_firmware_header_v2_0 *hdr;
1771         const __le32 *fw_data;
1772         unsigned i, fw_size;
1773
1774         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1775         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1776                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1777         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1778
1779         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1780                      RLCG_UCODE_LOADING_START_ADDRESS);
1781
1782         for (i = 0; i < fw_size; i++)
1783                 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1784                              le32_to_cpup(fw_data++));
1785
1786         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1787 }
1788
1789 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1790 {
1791         const struct rlc_firmware_header_v2_2 *hdr;
1792         const __le32 *fw_data;
1793         unsigned i, fw_size;
1794         u32 tmp;
1795
1796         hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1797
1798         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1799                         le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1800         fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1801
1802         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1803
1804         for (i = 0; i < fw_size; i++) {
1805                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1806                         msleep(1);
1807                 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1808                                 le32_to_cpup(fw_data++));
1809         }
1810
1811         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1812
1813         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1814                         le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1815         fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1816
1817         WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1818         for (i = 0; i < fw_size; i++) {
1819                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1820                         msleep(1);
1821                 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1822                                 le32_to_cpup(fw_data++));
1823         }
1824
1825         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1826
1827         tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1828         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1829         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1830         WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1831 }
1832
1833 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
1834 {
1835         const struct rlc_firmware_header_v2_3 *hdr;
1836         const __le32 *fw_data;
1837         unsigned i, fw_size;
1838         u32 tmp;
1839
1840         hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
1841
1842         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1843                         le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
1844         fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
1845
1846         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
1847
1848         for (i = 0; i < fw_size; i++) {
1849                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1850                         msleep(1);
1851                 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
1852                                 le32_to_cpup(fw_data++));
1853         }
1854
1855         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
1856
1857         tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
1858         tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1859         WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
1860
1861         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1862                         le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
1863         fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
1864
1865         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
1866
1867         for (i = 0; i < fw_size; i++) {
1868                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1869                         msleep(1);
1870                 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
1871                                 le32_to_cpup(fw_data++));
1872         }
1873
1874         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
1875
1876         tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
1877         tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
1878         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
1879 }
1880
1881 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
1882 {
1883         const struct rlc_firmware_header_v2_0 *hdr;
1884         uint16_t version_major;
1885         uint16_t version_minor;
1886
1887         if (!adev->gfx.rlc_fw)
1888                 return -EINVAL;
1889
1890         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1891         amdgpu_ucode_print_rlc_hdr(&hdr->header);
1892
1893         version_major = le16_to_cpu(hdr->header.header_version_major);
1894         version_minor = le16_to_cpu(hdr->header.header_version_minor);
1895
1896         if (version_major == 2) {
1897                 gfx_v11_0_load_rlcg_microcode(adev);
1898                 if (amdgpu_dpm == 1) {
1899                         if (version_minor >= 2)
1900                                 gfx_v11_0_load_rlc_iram_dram_microcode(adev);
1901                         if (version_minor == 3)
1902                                 gfx_v11_0_load_rlcp_rlcv_microcode(adev);
1903                 }
1904                 
1905                 return 0;
1906         }
1907
1908         return -EINVAL;
1909 }
1910
1911 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
1912 {
1913         int r;
1914
1915         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1916                 gfx_v11_0_init_csb(adev);
1917
1918                 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
1919                         gfx_v11_0_rlc_enable_srm(adev);
1920         } else {
1921                 if (amdgpu_sriov_vf(adev)) {
1922                         gfx_v11_0_init_csb(adev);
1923                         return 0;
1924                 }
1925
1926                 adev->gfx.rlc.funcs->stop(adev);
1927
1928                 /* disable CG */
1929                 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
1930
1931                 /* disable PG */
1932                 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
1933
1934                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1935                         /* legacy rlc firmware loading */
1936                         r = gfx_v11_0_rlc_load_microcode(adev);
1937                         if (r)
1938                                 return r;
1939                 }
1940
1941                 gfx_v11_0_init_csb(adev);
1942
1943                 adev->gfx.rlc.funcs->start(adev);
1944         }
1945         return 0;
1946 }
1947
1948 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
1949 {
1950         uint32_t usec_timeout = 50000;  /* wait for 50ms */
1951         uint32_t tmp;
1952         int i;
1953
1954         /* Trigger an invalidation of the L1 instruction caches */
1955         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
1956         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1957         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
1958
1959         /* Wait for invalidation complete */
1960         for (i = 0; i < usec_timeout; i++) {
1961                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
1962                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
1963                                         INVALIDATE_CACHE_COMPLETE))
1964                         break;
1965                 udelay(1);
1966         }
1967
1968         if (i >= usec_timeout) {
1969                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
1970                 return -EINVAL;
1971         }
1972
1973         if (amdgpu_emu_mode == 1)
1974                 adev->hdp.funcs->flush_hdp(adev, NULL);
1975
1976         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
1977         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
1978         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
1979         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
1980         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
1981         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
1982
1983         /* Program me ucode address into intruction cache address register */
1984         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
1985                         lower_32_bits(addr) & 0xFFFFF000);
1986         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
1987                         upper_32_bits(addr));
1988
1989         return 0;
1990 }
1991
1992 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
1993 {
1994         uint32_t usec_timeout = 50000;  /* wait for 50ms */
1995         uint32_t tmp;
1996         int i;
1997
1998         /* Trigger an invalidation of the L1 instruction caches */
1999         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2000         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2001         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2002
2003         /* Wait for invalidation complete */
2004         for (i = 0; i < usec_timeout; i++) {
2005                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2006                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2007                                         INVALIDATE_CACHE_COMPLETE))
2008                         break;
2009                 udelay(1);
2010         }
2011
2012         if (i >= usec_timeout) {
2013                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2014                 return -EINVAL;
2015         }
2016
2017         if (amdgpu_emu_mode == 1)
2018                 adev->hdp.funcs->flush_hdp(adev, NULL);
2019
2020         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2021         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2022         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2023         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2024         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2025         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2026
2027         /* Program pfp ucode address into intruction cache address register */
2028         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2029                         lower_32_bits(addr) & 0xFFFFF000);
2030         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2031                         upper_32_bits(addr));
2032
2033         return 0;
2034 }
2035
2036 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2037 {
2038         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2039         uint32_t tmp;
2040         int i;
2041
2042         /* Trigger an invalidation of the L1 instruction caches */
2043         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2044         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2045
2046         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2047
2048         /* Wait for invalidation complete */
2049         for (i = 0; i < usec_timeout; i++) {
2050                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2051                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2052                                         INVALIDATE_CACHE_COMPLETE))
2053                         break;
2054                 udelay(1);
2055         }
2056
2057         if (i >= usec_timeout) {
2058                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2059                 return -EINVAL;
2060         }
2061
2062         if (amdgpu_emu_mode == 1)
2063                 adev->hdp.funcs->flush_hdp(adev, NULL);
2064
2065         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2066         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2067         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2068         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2069         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2070
2071         /* Program mec1 ucode address into intruction cache address register */
2072         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2073                         lower_32_bits(addr) & 0xFFFFF000);
2074         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2075                         upper_32_bits(addr));
2076
2077         return 0;
2078 }
2079
2080 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2081 {
2082         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2083         uint32_t tmp;
2084         unsigned i, pipe_id;
2085         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2086
2087         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2088                 adev->gfx.pfp_fw->data;
2089
2090         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2091                 lower_32_bits(addr));
2092         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2093                 upper_32_bits(addr));
2094
2095         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2096         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2097         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2098         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2099         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2100
2101         /*
2102          * Programming any of the CP_PFP_IC_BASE registers
2103          * forces invalidation of the ME L1 I$. Wait for the
2104          * invalidation complete
2105          */
2106         for (i = 0; i < usec_timeout; i++) {
2107                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2108                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2109                         INVALIDATE_CACHE_COMPLETE))
2110                         break;
2111                 udelay(1);
2112         }
2113
2114         if (i >= usec_timeout) {
2115                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2116                 return -EINVAL;
2117         }
2118
2119         /* Prime the L1 instruction caches */
2120         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2121         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2122         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2123         /* Waiting for cache primed*/
2124         for (i = 0; i < usec_timeout; i++) {
2125                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2126                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2127                         ICACHE_PRIMED))
2128                         break;
2129                 udelay(1);
2130         }
2131
2132         if (i >= usec_timeout) {
2133                 dev_err(adev->dev, "failed to prime instruction cache\n");
2134                 return -EINVAL;
2135         }
2136
2137         mutex_lock(&adev->srbm_mutex);
2138         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2139                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2140                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2141                         (pfp_hdr->ucode_start_addr_hi << 30) |
2142                         (pfp_hdr->ucode_start_addr_lo >> 2));
2143                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2144                         pfp_hdr->ucode_start_addr_hi >> 2);
2145
2146                 /*
2147                  * Program CP_ME_CNTL to reset given PIPE to take
2148                  * effect of CP_PFP_PRGRM_CNTR_START.
2149                  */
2150                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2151                 if (pipe_id == 0)
2152                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2153                                         PFP_PIPE0_RESET, 1);
2154                 else
2155                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2156                                         PFP_PIPE1_RESET, 1);
2157                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2158
2159                 /* Clear pfp pipe0 reset bit. */
2160                 if (pipe_id == 0)
2161                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2162                                         PFP_PIPE0_RESET, 0);
2163                 else
2164                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2165                                         PFP_PIPE1_RESET, 0);
2166                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2167
2168                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2169                         lower_32_bits(addr2));
2170                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2171                         upper_32_bits(addr2));
2172         }
2173         soc21_grbm_select(adev, 0, 0, 0, 0);
2174         mutex_unlock(&adev->srbm_mutex);
2175
2176         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2177         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2178         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2179         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2180
2181         /* Invalidate the data caches */
2182         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2183         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2184         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2185
2186         for (i = 0; i < usec_timeout; i++) {
2187                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2188                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2189                         INVALIDATE_DCACHE_COMPLETE))
2190                         break;
2191                 udelay(1);
2192         }
2193
2194         if (i >= usec_timeout) {
2195                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2196                 return -EINVAL;
2197         }
2198
2199         return 0;
2200 }
2201
2202 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2203 {
2204         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2205         uint32_t tmp;
2206         unsigned i, pipe_id;
2207         const struct gfx_firmware_header_v2_0 *me_hdr;
2208
2209         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2210                 adev->gfx.me_fw->data;
2211
2212         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2213                 lower_32_bits(addr));
2214         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2215                 upper_32_bits(addr));
2216
2217         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2218         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2219         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2220         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2221         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2222
2223         /*
2224          * Programming any of the CP_ME_IC_BASE registers
2225          * forces invalidation of the ME L1 I$. Wait for the
2226          * invalidation complete
2227          */
2228         for (i = 0; i < usec_timeout; i++) {
2229                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2230                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2231                         INVALIDATE_CACHE_COMPLETE))
2232                         break;
2233                 udelay(1);
2234         }
2235
2236         if (i >= usec_timeout) {
2237                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2238                 return -EINVAL;
2239         }
2240
2241         /* Prime the instruction caches */
2242         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2243         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2244         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2245
2246         /* Waiting for instruction cache primed*/
2247         for (i = 0; i < usec_timeout; i++) {
2248                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2249                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2250                         ICACHE_PRIMED))
2251                         break;
2252                 udelay(1);
2253         }
2254
2255         if (i >= usec_timeout) {
2256                 dev_err(adev->dev, "failed to prime instruction cache\n");
2257                 return -EINVAL;
2258         }
2259
2260         mutex_lock(&adev->srbm_mutex);
2261         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2262                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2263                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2264                         (me_hdr->ucode_start_addr_hi << 30) |
2265                         (me_hdr->ucode_start_addr_lo >> 2) );
2266                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2267                         me_hdr->ucode_start_addr_hi>>2);
2268
2269                 /*
2270                  * Program CP_ME_CNTL to reset given PIPE to take
2271                  * effect of CP_PFP_PRGRM_CNTR_START.
2272                  */
2273                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2274                 if (pipe_id == 0)
2275                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2276                                         ME_PIPE0_RESET, 1);
2277                 else
2278                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2279                                         ME_PIPE1_RESET, 1);
2280                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2281
2282                 /* Clear pfp pipe0 reset bit. */
2283                 if (pipe_id == 0)
2284                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2285                                         ME_PIPE0_RESET, 0);
2286                 else
2287                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2288                                         ME_PIPE1_RESET, 0);
2289                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2290
2291                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2292                         lower_32_bits(addr2));
2293                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2294                         upper_32_bits(addr2));
2295         }
2296         soc21_grbm_select(adev, 0, 0, 0, 0);
2297         mutex_unlock(&adev->srbm_mutex);
2298
2299         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2300         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2301         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2302         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2303
2304         /* Invalidate the data caches */
2305         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2306         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2307         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2308
2309         for (i = 0; i < usec_timeout; i++) {
2310                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2311                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2312                         INVALIDATE_DCACHE_COMPLETE))
2313                         break;
2314                 udelay(1);
2315         }
2316
2317         if (i >= usec_timeout) {
2318                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2319                 return -EINVAL;
2320         }
2321
2322         return 0;
2323 }
2324
2325 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2326 {
2327         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2328         uint32_t tmp;
2329         unsigned i;
2330         const struct gfx_firmware_header_v2_0 *mec_hdr;
2331
2332         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2333                 adev->gfx.mec_fw->data;
2334
2335         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2336         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2337         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2338         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2339         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2340
2341         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2342         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2343         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2344         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2345
2346         mutex_lock(&adev->srbm_mutex);
2347         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2348                 soc21_grbm_select(adev, 1, i, 0, 0);
2349
2350                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2351                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2352                      upper_32_bits(addr2));
2353
2354                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2355                                         mec_hdr->ucode_start_addr_lo >> 2 |
2356                                         mec_hdr->ucode_start_addr_hi << 30);
2357                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2358                                         mec_hdr->ucode_start_addr_hi >> 2);
2359
2360                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2361                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2362                      upper_32_bits(addr));
2363         }
2364         mutex_unlock(&adev->srbm_mutex);
2365         soc21_grbm_select(adev, 0, 0, 0, 0);
2366
2367         /* Trigger an invalidation of the L1 instruction caches */
2368         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2369         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2370         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2371
2372         /* Wait for invalidation complete */
2373         for (i = 0; i < usec_timeout; i++) {
2374                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2375                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2376                                        INVALIDATE_DCACHE_COMPLETE))
2377                         break;
2378                 udelay(1);
2379         }
2380
2381         if (i >= usec_timeout) {
2382                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2383                 return -EINVAL;
2384         }
2385
2386         /* Trigger an invalidation of the L1 instruction caches */
2387         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2388         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2389         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2390
2391         /* Wait for invalidation complete */
2392         for (i = 0; i < usec_timeout; i++) {
2393                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2394                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2395                                        INVALIDATE_CACHE_COMPLETE))
2396                         break;
2397                 udelay(1);
2398         }
2399
2400         if (i >= usec_timeout) {
2401                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2402                 return -EINVAL;
2403         }
2404
2405         return 0;
2406 }
2407
2408 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2409 {
2410         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2411         const struct gfx_firmware_header_v2_0 *me_hdr;
2412         const struct gfx_firmware_header_v2_0 *mec_hdr;
2413         uint32_t pipe_id, tmp;
2414
2415         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2416                 adev->gfx.mec_fw->data;
2417         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2418                 adev->gfx.me_fw->data;
2419         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2420                 adev->gfx.pfp_fw->data;
2421
2422         /* config pfp program start addr */
2423         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2424                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2425                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2426                         (pfp_hdr->ucode_start_addr_hi << 30) |
2427                         (pfp_hdr->ucode_start_addr_lo >> 2));
2428                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2429                         pfp_hdr->ucode_start_addr_hi >> 2);
2430         }
2431         soc21_grbm_select(adev, 0, 0, 0, 0);
2432
2433         /* reset pfp pipe */
2434         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2435         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2436         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2437         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2438
2439         /* clear pfp pipe reset */
2440         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2441         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2442         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2443
2444         /* config me program start addr */
2445         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2446                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2447                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2448                         (me_hdr->ucode_start_addr_hi << 30) |
2449                         (me_hdr->ucode_start_addr_lo >> 2) );
2450                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2451                         me_hdr->ucode_start_addr_hi>>2);
2452         }
2453         soc21_grbm_select(adev, 0, 0, 0, 0);
2454
2455         /* reset me pipe */
2456         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2457         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2458         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2459         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2460
2461         /* clear me pipe reset */
2462         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2463         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2464         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2465
2466         /* config mec program start addr */
2467         for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2468                 soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2469                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2470                                         mec_hdr->ucode_start_addr_lo >> 2 |
2471                                         mec_hdr->ucode_start_addr_hi << 30);
2472                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2473                                         mec_hdr->ucode_start_addr_hi >> 2);
2474         }
2475         soc21_grbm_select(adev, 0, 0, 0, 0);
2476
2477         /* reset mec pipe */
2478         tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2479         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2480         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2481         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2482         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2483         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2484
2485         /* clear mec pipe reset */
2486         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2487         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2488         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2489         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2490         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2491 }
2492
2493 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2494 {
2495         uint32_t cp_status;
2496         uint32_t bootload_status;
2497         int i, r;
2498         uint64_t addr, addr2;
2499
2500         for (i = 0; i < adev->usec_timeout; i++) {
2501                 cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2502
2503                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
2504                                 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
2505                         bootload_status = RREG32_SOC15(GC, 0,
2506                                         regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2507                 else
2508                         bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2509
2510                 if ((cp_status == 0) &&
2511                     (REG_GET_FIELD(bootload_status,
2512                         RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2513                         break;
2514                 }
2515                 udelay(1);
2516         }
2517
2518         if (i >= adev->usec_timeout) {
2519                 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2520                 return -ETIMEDOUT;
2521         }
2522
2523         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2524                 if (adev->gfx.rs64_enable) {
2525                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2526                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2527                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2528                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2529                         r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2530                         if (r)
2531                                 return r;
2532                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2533                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2534                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2535                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2536                         r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2537                         if (r)
2538                                 return r;
2539                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2540                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2541                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2542                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2543                         r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2544                         if (r)
2545                                 return r;
2546                 } else {
2547                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2548                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2549                         r = gfx_v11_0_config_me_cache(adev, addr);
2550                         if (r)
2551                                 return r;
2552                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2553                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2554                         r = gfx_v11_0_config_pfp_cache(adev, addr);
2555                         if (r)
2556                                 return r;
2557                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2558                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2559                         r = gfx_v11_0_config_mec_cache(adev, addr);
2560                         if (r)
2561                                 return r;
2562                 }
2563         }
2564
2565         return 0;
2566 }
2567
2568 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2569 {
2570         int i;
2571         u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2572
2573         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2574         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2575         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2576
2577         for (i = 0; i < adev->usec_timeout; i++) {
2578                 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2579                         break;
2580                 udelay(1);
2581         }
2582
2583         if (i >= adev->usec_timeout)
2584                 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2585
2586         return 0;
2587 }
2588
2589 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
2590 {
2591         int r;
2592         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2593         const __le32 *fw_data;
2594         unsigned i, fw_size;
2595
2596         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2597                 adev->gfx.pfp_fw->data;
2598
2599         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2600
2601         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2602                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2603         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
2604
2605         r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
2606                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2607                                       &adev->gfx.pfp.pfp_fw_obj,
2608                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2609                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2610         if (r) {
2611                 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
2612                 gfx_v11_0_pfp_fini(adev);
2613                 return r;
2614         }
2615
2616         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
2617
2618         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2619         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2620
2621         gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
2622
2623         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
2624
2625         for (i = 0; i < pfp_hdr->jt_size; i++)
2626                 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
2627                              le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
2628
2629         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2630
2631         return 0;
2632 }
2633
2634 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2635 {
2636         int r;
2637         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2638         const __le32 *fw_ucode, *fw_data;
2639         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2640         uint32_t tmp;
2641         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2642
2643         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2644                 adev->gfx.pfp_fw->data;
2645
2646         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2647
2648         /* instruction */
2649         fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2650                 le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2651         fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2652         /* data */
2653         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2654                 le32_to_cpu(pfp_hdr->data_offset_bytes));
2655         fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2656
2657         /* 64kb align */
2658         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2659                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2660                                       &adev->gfx.pfp.pfp_fw_obj,
2661                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2662                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2663         if (r) {
2664                 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2665                 gfx_v11_0_pfp_fini(adev);
2666                 return r;
2667         }
2668
2669         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2670                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2671                                       &adev->gfx.pfp.pfp_fw_data_obj,
2672                                       &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2673                                       (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2674         if (r) {
2675                 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2676                 gfx_v11_0_pfp_fini(adev);
2677                 return r;
2678         }
2679
2680         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2681         memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2682
2683         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2684         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2685         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2686         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2687
2688         if (amdgpu_emu_mode == 1)
2689                 adev->hdp.funcs->flush_hdp(adev, NULL);
2690
2691         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2692                 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2693         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2694                 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2695
2696         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2697         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2698         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2699         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2700         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2701
2702         /*
2703          * Programming any of the CP_PFP_IC_BASE registers
2704          * forces invalidation of the ME L1 I$. Wait for the
2705          * invalidation complete
2706          */
2707         for (i = 0; i < usec_timeout; i++) {
2708                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2709                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2710                         INVALIDATE_CACHE_COMPLETE))
2711                         break;
2712                 udelay(1);
2713         }
2714
2715         if (i >= usec_timeout) {
2716                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2717                 return -EINVAL;
2718         }
2719
2720         /* Prime the L1 instruction caches */
2721         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2722         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2723         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2724         /* Waiting for cache primed*/
2725         for (i = 0; i < usec_timeout; i++) {
2726                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2727                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2728                         ICACHE_PRIMED))
2729                         break;
2730                 udelay(1);
2731         }
2732
2733         if (i >= usec_timeout) {
2734                 dev_err(adev->dev, "failed to prime instruction cache\n");
2735                 return -EINVAL;
2736         }
2737
2738         mutex_lock(&adev->srbm_mutex);
2739         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2740                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2741                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2742                         (pfp_hdr->ucode_start_addr_hi << 30) |
2743                         (pfp_hdr->ucode_start_addr_lo >> 2) );
2744                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2745                         pfp_hdr->ucode_start_addr_hi>>2);
2746
2747                 /*
2748                  * Program CP_ME_CNTL to reset given PIPE to take
2749                  * effect of CP_PFP_PRGRM_CNTR_START.
2750                  */
2751                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2752                 if (pipe_id == 0)
2753                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2754                                         PFP_PIPE0_RESET, 1);
2755                 else
2756                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2757                                         PFP_PIPE1_RESET, 1);
2758                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2759
2760                 /* Clear pfp pipe0 reset bit. */
2761                 if (pipe_id == 0)
2762                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2763                                         PFP_PIPE0_RESET, 0);
2764                 else
2765                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2766                                         PFP_PIPE1_RESET, 0);
2767                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2768
2769                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2770                         lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2771                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2772                         upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2773         }
2774         soc21_grbm_select(adev, 0, 0, 0, 0);
2775         mutex_unlock(&adev->srbm_mutex);
2776
2777         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2778         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2779         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2780         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2781
2782         /* Invalidate the data caches */
2783         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2784         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2785         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2786
2787         for (i = 0; i < usec_timeout; i++) {
2788                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2789                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2790                         INVALIDATE_DCACHE_COMPLETE))
2791                         break;
2792                 udelay(1);
2793         }
2794
2795         if (i >= usec_timeout) {
2796                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2797                 return -EINVAL;
2798         }
2799
2800         return 0;
2801 }
2802
2803 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
2804 {
2805         int r;
2806         const struct gfx_firmware_header_v1_0 *me_hdr;
2807         const __le32 *fw_data;
2808         unsigned i, fw_size;
2809
2810         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2811                 adev->gfx.me_fw->data;
2812
2813         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2814
2815         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2816                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2817         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
2818
2819         r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
2820                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2821                                       &adev->gfx.me.me_fw_obj,
2822                                       &adev->gfx.me.me_fw_gpu_addr,
2823                                       (void **)&adev->gfx.me.me_fw_ptr);
2824         if (r) {
2825                 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
2826                 gfx_v11_0_me_fini(adev);
2827                 return r;
2828         }
2829
2830         memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
2831
2832         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2833         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2834
2835         gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
2836
2837         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
2838
2839         for (i = 0; i < me_hdr->jt_size; i++)
2840                 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
2841                              le32_to_cpup(fw_data + me_hdr->jt_offset + i));
2842
2843         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
2844
2845         return 0;
2846 }
2847
2848 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2849 {
2850         int r;
2851         const struct gfx_firmware_header_v2_0 *me_hdr;
2852         const __le32 *fw_ucode, *fw_data;
2853         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2854         uint32_t tmp;
2855         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2856
2857         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2858                 adev->gfx.me_fw->data;
2859
2860         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2861
2862         /* instruction */
2863         fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2864                 le32_to_cpu(me_hdr->ucode_offset_bytes));
2865         fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2866         /* data */
2867         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2868                 le32_to_cpu(me_hdr->data_offset_bytes));
2869         fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2870
2871         /* 64kb align*/
2872         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2873                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2874                                       &adev->gfx.me.me_fw_obj,
2875                                       &adev->gfx.me.me_fw_gpu_addr,
2876                                       (void **)&adev->gfx.me.me_fw_ptr);
2877         if (r) {
2878                 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
2879                 gfx_v11_0_me_fini(adev);
2880                 return r;
2881         }
2882
2883         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2884                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2885                                       &adev->gfx.me.me_fw_data_obj,
2886                                       &adev->gfx.me.me_fw_data_gpu_addr,
2887                                       (void **)&adev->gfx.me.me_fw_data_ptr);
2888         if (r) {
2889                 dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
2890                 gfx_v11_0_pfp_fini(adev);
2891                 return r;
2892         }
2893
2894         memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
2895         memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
2896
2897         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2898         amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
2899         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2900         amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
2901
2902         if (amdgpu_emu_mode == 1)
2903                 adev->hdp.funcs->flush_hdp(adev, NULL);
2904
2905         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2906                 lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
2907         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2908                 upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
2909
2910         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2911         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2912         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2913         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2914         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2915
2916         /*
2917          * Programming any of the CP_ME_IC_BASE registers
2918          * forces invalidation of the ME L1 I$. Wait for the
2919          * invalidation complete
2920          */
2921         for (i = 0; i < usec_timeout; i++) {
2922                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2923                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2924                         INVALIDATE_CACHE_COMPLETE))
2925                         break;
2926                 udelay(1);
2927         }
2928
2929         if (i >= usec_timeout) {
2930                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2931                 return -EINVAL;
2932         }
2933
2934         /* Prime the instruction caches */
2935         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2936         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2937         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2938
2939         /* Waiting for instruction cache primed*/
2940         for (i = 0; i < usec_timeout; i++) {
2941                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2942                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2943                         ICACHE_PRIMED))
2944                         break;
2945                 udelay(1);
2946         }
2947
2948         if (i >= usec_timeout) {
2949                 dev_err(adev->dev, "failed to prime instruction cache\n");
2950                 return -EINVAL;
2951         }
2952
2953         mutex_lock(&adev->srbm_mutex);
2954         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2955                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2956                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2957                         (me_hdr->ucode_start_addr_hi << 30) |
2958                         (me_hdr->ucode_start_addr_lo >> 2) );
2959                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2960                         me_hdr->ucode_start_addr_hi>>2);
2961
2962                 /*
2963                  * Program CP_ME_CNTL to reset given PIPE to take
2964                  * effect of CP_PFP_PRGRM_CNTR_START.
2965                  */
2966                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2967                 if (pipe_id == 0)
2968                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2969                                         ME_PIPE0_RESET, 1);
2970                 else
2971                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2972                                         ME_PIPE1_RESET, 1);
2973                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2974
2975                 /* Clear pfp pipe0 reset bit. */
2976                 if (pipe_id == 0)
2977                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2978                                         ME_PIPE0_RESET, 0);
2979                 else
2980                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2981                                         ME_PIPE1_RESET, 0);
2982                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2983
2984                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2985                         lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2986                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2987                         upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2988         }
2989         soc21_grbm_select(adev, 0, 0, 0, 0);
2990         mutex_unlock(&adev->srbm_mutex);
2991
2992         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2993         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2994         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2995         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2996
2997         /* Invalidate the data caches */
2998         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2999         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3000         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3001
3002         for (i = 0; i < usec_timeout; i++) {
3003                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3004                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3005                         INVALIDATE_DCACHE_COMPLETE))
3006                         break;
3007                 udelay(1);
3008         }
3009
3010         if (i >= usec_timeout) {
3011                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3012                 return -EINVAL;
3013         }
3014
3015         return 0;
3016 }
3017
3018 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3019 {
3020         int r;
3021
3022         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3023                 return -EINVAL;
3024
3025         gfx_v11_0_cp_gfx_enable(adev, false);
3026
3027         if (adev->gfx.rs64_enable)
3028                 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3029         else
3030                 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3031         if (r) {
3032                 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3033                 return r;
3034         }
3035
3036         if (adev->gfx.rs64_enable)
3037                 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3038         else
3039                 r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3040         if (r) {
3041                 dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3042                 return r;
3043         }
3044
3045         return 0;
3046 }
3047
3048 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3049 {
3050         struct amdgpu_ring *ring;
3051         const struct cs_section_def *sect = NULL;
3052         const struct cs_extent_def *ext = NULL;
3053         int r, i;
3054         int ctx_reg_offset;
3055
3056         /* init the CP */
3057         WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3058                      adev->gfx.config.max_hw_contexts - 1);
3059         WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3060
3061         if (!amdgpu_async_gfx_ring)
3062                 gfx_v11_0_cp_gfx_enable(adev, true);
3063
3064         ring = &adev->gfx.gfx_ring[0];
3065         r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3066         if (r) {
3067                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3068                 return r;
3069         }
3070
3071         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3072         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3073
3074         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3075         amdgpu_ring_write(ring, 0x80000000);
3076         amdgpu_ring_write(ring, 0x80000000);
3077
3078         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3079                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3080                         if (sect->id == SECT_CONTEXT) {
3081                                 amdgpu_ring_write(ring,
3082                                                   PACKET3(PACKET3_SET_CONTEXT_REG,
3083                                                           ext->reg_count));
3084                                 amdgpu_ring_write(ring, ext->reg_index -
3085                                                   PACKET3_SET_CONTEXT_REG_START);
3086                                 for (i = 0; i < ext->reg_count; i++)
3087                                         amdgpu_ring_write(ring, ext->extent[i]);
3088                         }
3089                 }
3090         }
3091
3092         ctx_reg_offset =
3093                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3094         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3095         amdgpu_ring_write(ring, ctx_reg_offset);
3096         amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3097
3098         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3099         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3100
3101         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3102         amdgpu_ring_write(ring, 0);
3103
3104         amdgpu_ring_commit(ring);
3105
3106         /* submit cs packet to copy state 0 to next available state */
3107         if (adev->gfx.num_gfx_rings > 1) {
3108                 /* maximum supported gfx ring is 2 */
3109                 ring = &adev->gfx.gfx_ring[1];
3110                 r = amdgpu_ring_alloc(ring, 2);
3111                 if (r) {
3112                         DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3113                         return r;
3114                 }
3115
3116                 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3117                 amdgpu_ring_write(ring, 0);
3118
3119                 amdgpu_ring_commit(ring);
3120         }
3121         return 0;
3122 }
3123
3124 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3125                                          CP_PIPE_ID pipe)
3126 {
3127         u32 tmp;
3128
3129         tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3130         tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3131
3132         WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3133 }
3134
3135 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3136                                           struct amdgpu_ring *ring)
3137 {
3138         u32 tmp;
3139
3140         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3141         if (ring->use_doorbell) {
3142                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3143                                     DOORBELL_OFFSET, ring->doorbell_index);
3144                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3145                                     DOORBELL_EN, 1);
3146         } else {
3147                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3148                                     DOORBELL_EN, 0);
3149         }
3150         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3151
3152         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3153                             DOORBELL_RANGE_LOWER, ring->doorbell_index);
3154         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3155
3156         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3157                      CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3158 }
3159
3160 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3161 {
3162         struct amdgpu_ring *ring;
3163         u32 tmp;
3164         u32 rb_bufsz;
3165         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3166         u32 i;
3167
3168         /* Set the write pointer delay */
3169         WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3170
3171         /* set the RB to use vmid 0 */
3172         WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3173
3174         /* Init gfx ring 0 for pipe 0 */
3175         mutex_lock(&adev->srbm_mutex);
3176         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3177
3178         /* Set ring buffer size */
3179         ring = &adev->gfx.gfx_ring[0];
3180         rb_bufsz = order_base_2(ring->ring_size / 8);
3181         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3182         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3183         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3184
3185         /* Initialize the ring buffer's write pointers */
3186         ring->wptr = 0;
3187         WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3188         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3189
3190         /* set the wb address wether it's enabled or not */
3191         rptr_addr = ring->rptr_gpu_addr;
3192         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3193         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3194                      CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3195
3196         wptr_gpu_addr = ring->wptr_gpu_addr;
3197         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3198                      lower_32_bits(wptr_gpu_addr));
3199         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3200                      upper_32_bits(wptr_gpu_addr));
3201
3202         mdelay(1);
3203         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3204
3205         rb_addr = ring->gpu_addr >> 8;
3206         WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3207         WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3208
3209         WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3210
3211         gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3212         mutex_unlock(&adev->srbm_mutex);
3213
3214         /* Init gfx ring 1 for pipe 1 */
3215         if (adev->gfx.num_gfx_rings > 1) {
3216                 mutex_lock(&adev->srbm_mutex);
3217                 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3218                 /* maximum supported gfx ring is 2 */
3219                 ring = &adev->gfx.gfx_ring[1];
3220                 rb_bufsz = order_base_2(ring->ring_size / 8);
3221                 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3222                 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3223                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3224                 /* Initialize the ring buffer's write pointers */
3225                 ring->wptr = 0;
3226                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3227                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3228                 /* Set the wb address wether it's enabled or not */
3229                 rptr_addr = ring->rptr_gpu_addr;
3230                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3231                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3232                              CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3233                 wptr_gpu_addr = ring->wptr_gpu_addr;
3234                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3235                              lower_32_bits(wptr_gpu_addr));
3236                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3237                              upper_32_bits(wptr_gpu_addr));
3238
3239                 mdelay(1);
3240                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3241
3242                 rb_addr = ring->gpu_addr >> 8;
3243                 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3244                 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3245                 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3246
3247                 gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3248                 mutex_unlock(&adev->srbm_mutex);
3249         }
3250         /* Switch to pipe 0 */
3251         mutex_lock(&adev->srbm_mutex);
3252         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3253         mutex_unlock(&adev->srbm_mutex);
3254
3255         /* start the ring */
3256         gfx_v11_0_cp_gfx_start(adev);
3257
3258         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3259                 ring = &adev->gfx.gfx_ring[i];
3260                 ring->sched.ready = true;
3261         }
3262
3263         return 0;
3264 }
3265
3266 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3267 {
3268         u32 data;
3269
3270         if (adev->gfx.rs64_enable) {
3271                 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3272                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3273                                                          enable ? 0 : 1);
3274                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3275                                                          enable ? 0 : 1);
3276                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3277                                                          enable ? 0 : 1);
3278                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3279                                                          enable ? 0 : 1);
3280                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3281                                                          enable ? 0 : 1);
3282                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3283                                                          enable ? 1 : 0);
3284                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3285                                                          enable ? 1 : 0);
3286                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3287                                                          enable ? 1 : 0);
3288                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3289                                                          enable ? 1 : 0);
3290                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3291                                                          enable ? 0 : 1);
3292                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3293         } else {
3294                 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3295
3296                 if (enable) {
3297                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3298                         if (!adev->enable_mes_kiq)
3299                                 data = REG_SET_FIELD(data, CP_MEC_CNTL,
3300                                                      MEC_ME2_HALT, 0);
3301                 } else {
3302                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3303                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3304                 }
3305                 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3306         }
3307
3308         adev->gfx.kiq.ring.sched.ready = enable;
3309
3310         udelay(50);
3311 }
3312
3313 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3314 {
3315         const struct gfx_firmware_header_v1_0 *mec_hdr;
3316         const __le32 *fw_data;
3317         unsigned i, fw_size;
3318         u32 *fw = NULL;
3319         int r;
3320
3321         if (!adev->gfx.mec_fw)
3322                 return -EINVAL;
3323
3324         gfx_v11_0_cp_compute_enable(adev, false);
3325
3326         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3327         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3328
3329         fw_data = (const __le32 *)
3330                 (adev->gfx.mec_fw->data +
3331                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3332         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3333
3334         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3335                                           PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3336                                           &adev->gfx.mec.mec_fw_obj,
3337                                           &adev->gfx.mec.mec_fw_gpu_addr,
3338                                           (void **)&fw);
3339         if (r) {
3340                 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3341                 gfx_v11_0_mec_fini(adev);
3342                 return r;
3343         }
3344
3345         memcpy(fw, fw_data, fw_size);
3346         
3347         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3348         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3349
3350         gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3351
3352         /* MEC1 */
3353         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3354
3355         for (i = 0; i < mec_hdr->jt_size; i++)
3356                 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3357                              le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3358
3359         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3360
3361         return 0;
3362 }
3363
3364 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3365 {
3366         const struct gfx_firmware_header_v2_0 *mec_hdr;
3367         const __le32 *fw_ucode, *fw_data;
3368         u32 tmp, fw_ucode_size, fw_data_size;
3369         u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3370         u32 *fw_ucode_ptr, *fw_data_ptr;
3371         int r;
3372
3373         if (!adev->gfx.mec_fw)
3374                 return -EINVAL;
3375
3376         gfx_v11_0_cp_compute_enable(adev, false);
3377
3378         mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3379         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3380
3381         fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3382                                 le32_to_cpu(mec_hdr->ucode_offset_bytes));
3383         fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3384
3385         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3386                                 le32_to_cpu(mec_hdr->data_offset_bytes));
3387         fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3388
3389         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3390                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3391                                       &adev->gfx.mec.mec_fw_obj,
3392                                       &adev->gfx.mec.mec_fw_gpu_addr,
3393                                       (void **)&fw_ucode_ptr);
3394         if (r) {
3395                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3396                 gfx_v11_0_mec_fini(adev);
3397                 return r;
3398         }
3399
3400         r = amdgpu_bo_create_reserved(adev, fw_data_size,
3401                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3402                                       &adev->gfx.mec.mec_fw_data_obj,
3403                                       &adev->gfx.mec.mec_fw_data_gpu_addr,
3404                                       (void **)&fw_data_ptr);
3405         if (r) {
3406                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3407                 gfx_v11_0_mec_fini(adev);
3408                 return r;
3409         }
3410
3411         memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3412         memcpy(fw_data_ptr, fw_data, fw_data_size);
3413
3414         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3415         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3416         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3417         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3418
3419         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3420         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3421         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3422         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3423         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3424
3425         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3426         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3427         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3428         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3429
3430         mutex_lock(&adev->srbm_mutex);
3431         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3432                 soc21_grbm_select(adev, 1, i, 0, 0);
3433
3434                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3435                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3436                      upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3437
3438                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3439                                         mec_hdr->ucode_start_addr_lo >> 2 |
3440                                         mec_hdr->ucode_start_addr_hi << 30);
3441                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3442                                         mec_hdr->ucode_start_addr_hi >> 2);
3443
3444                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3445                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3446                      upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3447         }
3448         mutex_unlock(&adev->srbm_mutex);
3449         soc21_grbm_select(adev, 0, 0, 0, 0);
3450
3451         /* Trigger an invalidation of the L1 instruction caches */
3452         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3453         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3454         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3455
3456         /* Wait for invalidation complete */
3457         for (i = 0; i < usec_timeout; i++) {
3458                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3459                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3460                                        INVALIDATE_DCACHE_COMPLETE))
3461                         break;
3462                 udelay(1);
3463         }
3464
3465         if (i >= usec_timeout) {
3466                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3467                 return -EINVAL;
3468         }
3469
3470         /* Trigger an invalidation of the L1 instruction caches */
3471         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3472         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3473         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3474
3475         /* Wait for invalidation complete */
3476         for (i = 0; i < usec_timeout; i++) {
3477                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3478                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3479                                        INVALIDATE_CACHE_COMPLETE))
3480                         break;
3481                 udelay(1);
3482         }
3483
3484         if (i >= usec_timeout) {
3485                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3486                 return -EINVAL;
3487         }
3488
3489         return 0;
3490 }
3491
3492 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3493 {
3494         uint32_t tmp;
3495         struct amdgpu_device *adev = ring->adev;
3496
3497         /* tell RLC which is KIQ queue */
3498         tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3499         tmp &= 0xffffff00;
3500         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3501         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3502         tmp |= 0x80;
3503         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3504 }
3505
3506 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3507 {
3508         /* set graphics engine doorbell range */
3509         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3510                      (adev->doorbell_index.gfx_ring0 * 2) << 2);
3511         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3512                      (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3513
3514         /* set compute engine doorbell range */
3515         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3516                      (adev->doorbell_index.kiq * 2) << 2);
3517         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3518                      (adev->doorbell_index.userqueue_end * 2) << 2);
3519 }
3520
3521 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3522                                   struct amdgpu_mqd_prop *prop)
3523 {
3524         struct v11_gfx_mqd *mqd = m;
3525         uint64_t hqd_gpu_addr, wb_gpu_addr;
3526         uint32_t tmp;
3527         uint32_t rb_bufsz;
3528
3529         /* set up gfx hqd wptr */
3530         mqd->cp_gfx_hqd_wptr = 0;
3531         mqd->cp_gfx_hqd_wptr_hi = 0;
3532
3533         /* set the pointer to the MQD */
3534         mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3535         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3536
3537         /* set up mqd control */
3538         tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3539         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3540         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3541         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3542         mqd->cp_gfx_mqd_control = tmp;
3543
3544         /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3545         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3546         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3547         mqd->cp_gfx_hqd_vmid = 0;
3548
3549         /* set up default queue priority level
3550          * 0x0 = low priority, 0x1 = high priority */
3551         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3552         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
3553         mqd->cp_gfx_hqd_queue_priority = tmp;
3554
3555         /* set up time quantum */
3556         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
3557         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
3558         mqd->cp_gfx_hqd_quantum = tmp;
3559
3560         /* set up gfx hqd base. this is similar as CP_RB_BASE */
3561         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3562         mqd->cp_gfx_hqd_base = hqd_gpu_addr;
3563         mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
3564
3565         /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3566         wb_gpu_addr = prop->rptr_gpu_addr;
3567         mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
3568         mqd->cp_gfx_hqd_rptr_addr_hi =
3569                 upper_32_bits(wb_gpu_addr) & 0xffff;
3570
3571         /* set up rb_wptr_poll addr */
3572         wb_gpu_addr = prop->wptr_gpu_addr;
3573         mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3574         mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3575
3576         /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3577         rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
3578         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
3579         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
3580         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
3581 #ifdef __BIG_ENDIAN
3582         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
3583 #endif
3584         mqd->cp_gfx_hqd_cntl = tmp;
3585
3586         /* set up cp_doorbell_control */
3587         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3588         if (prop->use_doorbell) {
3589                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3590                                     DOORBELL_OFFSET, prop->doorbell_index);
3591                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3592                                     DOORBELL_EN, 1);
3593         } else
3594                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3595                                     DOORBELL_EN, 0);
3596         mqd->cp_rb_doorbell_control = tmp;
3597
3598         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3599         mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
3600
3601         /* active the queue */
3602         mqd->cp_gfx_hqd_active = 1;
3603
3604         return 0;
3605 }
3606
3607 #ifdef BRING_UP_DEBUG
3608 static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring)
3609 {
3610         struct amdgpu_device *adev = ring->adev;
3611         struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3612
3613         /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
3614         WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
3615         WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
3616
3617         /* set GFX_MQD_BASE */
3618         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
3619         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3620
3621         /* set GFX_MQD_CONTROL */
3622         WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
3623
3624         /* set GFX_HQD_VMID to 0 */
3625         WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
3626
3627         WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY,
3628                         mqd->cp_gfx_hqd_queue_priority);
3629         WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
3630
3631         /* set GFX_HQD_BASE, similar as CP_RB_BASE */
3632         WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
3633         WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
3634
3635         /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
3636         WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
3637         WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
3638
3639         /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
3640         WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
3641
3642         /* set RB_WPTR_POLL_ADDR */
3643         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
3644         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
3645
3646         /* set RB_DOORBELL_CONTROL */
3647         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
3648
3649         /* active the queue */
3650         WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
3651
3652         return 0;
3653 }
3654 #endif
3655
3656 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
3657 {
3658         struct amdgpu_device *adev = ring->adev;
3659         struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3660         int mqd_idx = ring - &adev->gfx.gfx_ring[0];
3661
3662         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3663                 memset((void *)mqd, 0, sizeof(*mqd));
3664                 mutex_lock(&adev->srbm_mutex);
3665                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3666                 amdgpu_ring_init_mqd(ring);
3667 #ifdef BRING_UP_DEBUG
3668                 gfx_v11_0_gfx_queue_init_register(ring);
3669 #endif
3670                 soc21_grbm_select(adev, 0, 0, 0, 0);
3671                 mutex_unlock(&adev->srbm_mutex);
3672                 if (adev->gfx.me.mqd_backup[mqd_idx])
3673                         memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3674         } else if (amdgpu_in_reset(adev)) {
3675                 /* reset mqd with the backup copy */
3676                 if (adev->gfx.me.mqd_backup[mqd_idx])
3677                         memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
3678                 /* reset the ring */
3679                 ring->wptr = 0;
3680                 *ring->wptr_cpu_addr = 0;
3681                 amdgpu_ring_clear_ring(ring);
3682 #ifdef BRING_UP_DEBUG
3683                 mutex_lock(&adev->srbm_mutex);
3684                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3685                 gfx_v11_0_gfx_queue_init_register(ring);
3686                 soc21_grbm_select(adev, 0, 0, 0, 0);
3687                 mutex_unlock(&adev->srbm_mutex);
3688 #endif
3689         } else {
3690                 amdgpu_ring_clear_ring(ring);
3691         }
3692
3693         return 0;
3694 }
3695
3696 #ifndef BRING_UP_DEBUG
3697 static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev)
3698 {
3699         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3700         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3701         int r, i;
3702
3703         if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
3704                 return -EINVAL;
3705
3706         r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
3707                                         adev->gfx.num_gfx_rings);
3708         if (r) {
3709                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3710                 return r;
3711         }
3712
3713         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3714                 kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
3715
3716         return amdgpu_ring_test_helper(kiq_ring);
3717 }
3718 #endif
3719
3720 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
3721 {
3722         int r, i;
3723         struct amdgpu_ring *ring;
3724
3725         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3726                 ring = &adev->gfx.gfx_ring[i];
3727
3728                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3729                 if (unlikely(r != 0))
3730                         goto done;
3731
3732                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3733                 if (!r) {
3734                         r = gfx_v11_0_gfx_init_queue(ring);
3735                         amdgpu_bo_kunmap(ring->mqd_obj);
3736                         ring->mqd_ptr = NULL;
3737                 }
3738                 amdgpu_bo_unreserve(ring->mqd_obj);
3739                 if (r)
3740                         goto done;
3741         }
3742 #ifndef BRING_UP_DEBUG
3743         r = gfx_v11_0_kiq_enable_kgq(adev);
3744         if (r)
3745                 goto done;
3746 #endif
3747         r = gfx_v11_0_cp_gfx_start(adev);
3748         if (r)
3749                 goto done;
3750
3751         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3752                 ring = &adev->gfx.gfx_ring[i];
3753                 ring->sched.ready = true;
3754         }
3755 done:
3756         return r;
3757 }
3758
3759 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
3760                                       struct amdgpu_mqd_prop *prop)
3761 {
3762         struct v11_compute_mqd *mqd = m;
3763         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3764         uint32_t tmp;
3765
3766         mqd->header = 0xC0310800;
3767         mqd->compute_pipelinestat_enable = 0x00000001;
3768         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3769         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3770         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3771         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3772         mqd->compute_misc_reserved = 0x00000007;
3773
3774         eop_base_addr = prop->eop_gpu_addr >> 8;
3775         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3776         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3777
3778         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3779         tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3780         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3781                         (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
3782
3783         mqd->cp_hqd_eop_control = tmp;
3784
3785         /* enable doorbell? */
3786         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3787
3788         if (prop->use_doorbell) {
3789                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3790                                     DOORBELL_OFFSET, prop->doorbell_index);
3791                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3792                                     DOORBELL_EN, 1);
3793                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3794                                     DOORBELL_SOURCE, 0);
3795                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3796                                     DOORBELL_HIT, 0);
3797         } else {
3798                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3799                                     DOORBELL_EN, 0);
3800         }
3801
3802         mqd->cp_hqd_pq_doorbell_control = tmp;
3803
3804         /* disable the queue if it's active */
3805         mqd->cp_hqd_dequeue_request = 0;
3806         mqd->cp_hqd_pq_rptr = 0;
3807         mqd->cp_hqd_pq_wptr_lo = 0;
3808         mqd->cp_hqd_pq_wptr_hi = 0;
3809
3810         /* set the pointer to the MQD */
3811         mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
3812         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3813
3814         /* set MQD vmid to 0 */
3815         tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
3816         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3817         mqd->cp_mqd_control = tmp;
3818
3819         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3820         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3821         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3822         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3823
3824         /* set up the HQD, this is similar to CP_RB0_CNTL */
3825         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3826         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3827                             (order_base_2(prop->queue_size / 4) - 1));
3828         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3829                             (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3830         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3831         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3832         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3833         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3834         mqd->cp_hqd_pq_control = tmp;
3835
3836         /* set the wb address whether it's enabled or not */
3837         wb_gpu_addr = prop->rptr_gpu_addr;
3838         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3839         mqd->cp_hqd_pq_rptr_report_addr_hi =
3840                 upper_32_bits(wb_gpu_addr) & 0xffff;
3841
3842         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3843         wb_gpu_addr = prop->wptr_gpu_addr;
3844         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3845         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3846
3847         tmp = 0;
3848         /* enable the doorbell if requested */
3849         if (prop->use_doorbell) {
3850                 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3851                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3852                                 DOORBELL_OFFSET, prop->doorbell_index);
3853
3854                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3855                                     DOORBELL_EN, 1);
3856                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3857                                     DOORBELL_SOURCE, 0);
3858                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3859                                     DOORBELL_HIT, 0);
3860         }
3861
3862         mqd->cp_hqd_pq_doorbell_control = tmp;
3863
3864         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3865         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3866
3867         /* set the vmid for the queue */
3868         mqd->cp_hqd_vmid = 0;
3869
3870         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3871         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
3872         mqd->cp_hqd_persistent_state = tmp;
3873
3874         /* set MIN_IB_AVAIL_SIZE */
3875         tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
3876         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3877         mqd->cp_hqd_ib_control = tmp;
3878
3879         /* set static priority for a compute queue/ring */
3880         mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
3881         mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
3882
3883         mqd->cp_hqd_active = prop->hqd_active;
3884
3885         return 0;
3886 }
3887
3888 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
3889 {
3890         struct amdgpu_device *adev = ring->adev;
3891         struct v11_compute_mqd *mqd = ring->mqd_ptr;
3892         int j;
3893
3894         /* inactivate the queue */
3895         if (amdgpu_sriov_vf(adev))
3896                 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
3897
3898         /* disable wptr polling */
3899         WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3900
3901         /* write the EOP addr */
3902         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
3903                mqd->cp_hqd_eop_base_addr_lo);
3904         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
3905                mqd->cp_hqd_eop_base_addr_hi);
3906
3907         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3908         WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
3909                mqd->cp_hqd_eop_control);
3910
3911         /* enable doorbell? */
3912         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3913                mqd->cp_hqd_pq_doorbell_control);
3914
3915         /* disable the queue if it's active */
3916         if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
3917                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
3918                 for (j = 0; j < adev->usec_timeout; j++) {
3919                         if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
3920                                 break;
3921                         udelay(1);
3922                 }
3923                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
3924                        mqd->cp_hqd_dequeue_request);
3925                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
3926                        mqd->cp_hqd_pq_rptr);
3927                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3928                        mqd->cp_hqd_pq_wptr_lo);
3929                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3930                        mqd->cp_hqd_pq_wptr_hi);
3931         }
3932
3933         /* set the pointer to the MQD */
3934         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
3935                mqd->cp_mqd_base_addr_lo);
3936         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
3937                mqd->cp_mqd_base_addr_hi);
3938
3939         /* set MQD vmid to 0 */
3940         WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
3941                mqd->cp_mqd_control);
3942
3943         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3944         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
3945                mqd->cp_hqd_pq_base_lo);
3946         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
3947                mqd->cp_hqd_pq_base_hi);
3948
3949         /* set up the HQD, this is similar to CP_RB0_CNTL */
3950         WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
3951                mqd->cp_hqd_pq_control);
3952
3953         /* set the wb address whether it's enabled or not */
3954         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
3955                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3956         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3957                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3958
3959         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3960         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
3961                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3962         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3963                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3964
3965         /* enable the doorbell if requested */
3966         if (ring->use_doorbell) {
3967                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3968                         (adev->doorbell_index.kiq * 2) << 2);
3969                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3970                         (adev->doorbell_index.userqueue_end * 2) << 2);
3971         }
3972
3973         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3974                mqd->cp_hqd_pq_doorbell_control);
3975
3976         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3977         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3978                mqd->cp_hqd_pq_wptr_lo);
3979         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3980                mqd->cp_hqd_pq_wptr_hi);
3981
3982         /* set the vmid for the queue */
3983         WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
3984
3985         WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
3986                mqd->cp_hqd_persistent_state);
3987
3988         /* activate the queue */
3989         WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
3990                mqd->cp_hqd_active);
3991
3992         if (ring->use_doorbell)
3993                 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3994
3995         return 0;
3996 }
3997
3998 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
3999 {
4000         struct amdgpu_device *adev = ring->adev;
4001         struct v11_compute_mqd *mqd = ring->mqd_ptr;
4002         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4003
4004         gfx_v11_0_kiq_setting(ring);
4005
4006         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4007                 /* reset MQD to a clean status */
4008                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4009                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4010
4011                 /* reset ring buffer */
4012                 ring->wptr = 0;
4013                 amdgpu_ring_clear_ring(ring);
4014
4015                 mutex_lock(&adev->srbm_mutex);
4016                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4017                 gfx_v11_0_kiq_init_register(ring);
4018                 soc21_grbm_select(adev, 0, 0, 0, 0);
4019                 mutex_unlock(&adev->srbm_mutex);
4020         } else {
4021                 memset((void *)mqd, 0, sizeof(*mqd));
4022                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4023                         amdgpu_ring_clear_ring(ring);
4024                 mutex_lock(&adev->srbm_mutex);
4025                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4026                 amdgpu_ring_init_mqd(ring);
4027                 gfx_v11_0_kiq_init_register(ring);
4028                 soc21_grbm_select(adev, 0, 0, 0, 0);
4029                 mutex_unlock(&adev->srbm_mutex);
4030
4031                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4032                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4033         }
4034
4035         return 0;
4036 }
4037
4038 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
4039 {
4040         struct amdgpu_device *adev = ring->adev;
4041         struct v11_compute_mqd *mqd = ring->mqd_ptr;
4042         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4043
4044         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4045                 memset((void *)mqd, 0, sizeof(*mqd));
4046                 mutex_lock(&adev->srbm_mutex);
4047                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4048                 amdgpu_ring_init_mqd(ring);
4049                 soc21_grbm_select(adev, 0, 0, 0, 0);
4050                 mutex_unlock(&adev->srbm_mutex);
4051
4052                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4053                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4054         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4055                 /* reset MQD to a clean status */
4056                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4057                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4058
4059                 /* reset ring buffer */
4060                 ring->wptr = 0;
4061                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4062                 amdgpu_ring_clear_ring(ring);
4063         } else {
4064                 amdgpu_ring_clear_ring(ring);
4065         }
4066
4067         return 0;
4068 }
4069
4070 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4071 {
4072         struct amdgpu_ring *ring;
4073         int r;
4074
4075         ring = &adev->gfx.kiq.ring;
4076
4077         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4078         if (unlikely(r != 0))
4079                 return r;
4080
4081         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4082         if (unlikely(r != 0)) {
4083                 amdgpu_bo_unreserve(ring->mqd_obj);
4084                 return r;
4085         }
4086
4087         gfx_v11_0_kiq_init_queue(ring);
4088         amdgpu_bo_kunmap(ring->mqd_obj);
4089         ring->mqd_ptr = NULL;
4090         amdgpu_bo_unreserve(ring->mqd_obj);
4091         ring->sched.ready = true;
4092         return 0;
4093 }
4094
4095 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4096 {
4097         struct amdgpu_ring *ring = NULL;
4098         int r = 0, i;
4099
4100         if (!amdgpu_async_gfx_ring)
4101                 gfx_v11_0_cp_compute_enable(adev, true);
4102
4103         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4104                 ring = &adev->gfx.compute_ring[i];
4105
4106                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4107                 if (unlikely(r != 0))
4108                         goto done;
4109                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4110                 if (!r) {
4111                         r = gfx_v11_0_kcq_init_queue(ring);
4112                         amdgpu_bo_kunmap(ring->mqd_obj);
4113                         ring->mqd_ptr = NULL;
4114                 }
4115                 amdgpu_bo_unreserve(ring->mqd_obj);
4116                 if (r)
4117                         goto done;
4118         }
4119
4120         r = amdgpu_gfx_enable_kcq(adev);
4121 done:
4122         return r;
4123 }
4124
4125 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4126 {
4127         int r, i;
4128         struct amdgpu_ring *ring;
4129
4130         if (!(adev->flags & AMD_IS_APU))
4131                 gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4132
4133         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4134                 /* legacy firmware loading */
4135                 r = gfx_v11_0_cp_gfx_load_microcode(adev);
4136                 if (r)
4137                         return r;
4138
4139                 if (adev->gfx.rs64_enable)
4140                         r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4141                 else
4142                         r = gfx_v11_0_cp_compute_load_microcode(adev);
4143                 if (r)
4144                         return r;
4145         }
4146
4147         gfx_v11_0_cp_set_doorbell_range(adev);
4148
4149         if (amdgpu_async_gfx_ring) {
4150                 gfx_v11_0_cp_compute_enable(adev, true);
4151                 gfx_v11_0_cp_gfx_enable(adev, true);
4152         }
4153
4154         if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4155                 r = amdgpu_mes_kiq_hw_init(adev);
4156         else
4157                 r = gfx_v11_0_kiq_resume(adev);
4158         if (r)
4159                 return r;
4160
4161         r = gfx_v11_0_kcq_resume(adev);
4162         if (r)
4163                 return r;
4164
4165         if (!amdgpu_async_gfx_ring) {
4166                 r = gfx_v11_0_cp_gfx_resume(adev);
4167                 if (r)
4168                         return r;
4169         } else {
4170                 r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4171                 if (r)
4172                         return r;
4173         }
4174
4175         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4176                 ring = &adev->gfx.gfx_ring[i];
4177                 r = amdgpu_ring_test_helper(ring);
4178                 if (r)
4179                         return r;
4180         }
4181
4182         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4183                 ring = &adev->gfx.compute_ring[i];
4184                 r = amdgpu_ring_test_helper(ring);
4185                 if (r)
4186                         return r;
4187         }
4188
4189         return 0;
4190 }
4191
4192 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4193 {
4194         gfx_v11_0_cp_gfx_enable(adev, enable);
4195         gfx_v11_0_cp_compute_enable(adev, enable);
4196 }
4197
4198 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4199 {
4200         int r;
4201         bool value;
4202
4203         r = adev->gfxhub.funcs->gart_enable(adev);
4204         if (r)
4205                 return r;
4206
4207         adev->hdp.funcs->flush_hdp(adev, NULL);
4208
4209         value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4210                 false : true;
4211
4212         adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4213         amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
4214
4215         return 0;
4216 }
4217
4218 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4219 {
4220         u32 tmp;
4221
4222         /* select RS64 */
4223         if (adev->gfx.rs64_enable) {
4224                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4225                 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4226                 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4227
4228                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4229                 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4230                 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4231         }
4232
4233         if (amdgpu_emu_mode == 1)
4234                 msleep(100);
4235 }
4236
4237 static int get_gb_addr_config(struct amdgpu_device * adev)
4238 {
4239         u32 gb_addr_config;
4240
4241         gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4242         if (gb_addr_config == 0)
4243                 return -EINVAL;
4244
4245         adev->gfx.config.gb_addr_config_fields.num_pkrs =
4246                 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4247
4248         adev->gfx.config.gb_addr_config = gb_addr_config;
4249
4250         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4251                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4252                                       GB_ADDR_CONFIG, NUM_PIPES);
4253
4254         adev->gfx.config.max_tile_pipes =
4255                 adev->gfx.config.gb_addr_config_fields.num_pipes;
4256
4257         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4258                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4259                                       GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4260         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4261                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4262                                       GB_ADDR_CONFIG, NUM_RB_PER_SE);
4263         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4264                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4265                                       GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4266         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4267                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4268                                       GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4269
4270         return 0;
4271 }
4272
4273 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4274 {
4275         uint32_t data;
4276
4277         data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4278         data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4279         WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4280
4281         data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4282         data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4283         WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4284 }
4285
4286 static int gfx_v11_0_hw_init(void *handle)
4287 {
4288         int r;
4289         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4290
4291         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4292                 if (adev->gfx.imu.funcs) {
4293                         /* RLC autoload sequence 1: Program rlc ram */
4294                         if (adev->gfx.imu.funcs->program_rlc_ram)
4295                                 adev->gfx.imu.funcs->program_rlc_ram(adev);
4296                 }
4297                 /* rlc autoload firmware */
4298                 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4299                 if (r)
4300                         return r;
4301         } else {
4302                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4303                         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4304                                 if (adev->gfx.imu.funcs->load_microcode)
4305                                         adev->gfx.imu.funcs->load_microcode(adev);
4306                                 if (adev->gfx.imu.funcs->setup_imu)
4307                                         adev->gfx.imu.funcs->setup_imu(adev);
4308                                 if (adev->gfx.imu.funcs->start_imu)
4309                                         adev->gfx.imu.funcs->start_imu(adev);
4310                         }
4311
4312                         /* disable gpa mode in backdoor loading */
4313                         gfx_v11_0_disable_gpa_mode(adev);
4314                 }
4315         }
4316
4317         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4318             (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4319                 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4320                 if (r) {
4321                         dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4322                         return r;
4323                 }
4324         }
4325
4326         adev->gfx.is_poweron = true;
4327
4328         if(get_gb_addr_config(adev))
4329                 DRM_WARN("Invalid gb_addr_config !\n");
4330
4331         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4332             adev->gfx.rs64_enable)
4333                 gfx_v11_0_config_gfx_rs64(adev);
4334
4335         r = gfx_v11_0_gfxhub_enable(adev);
4336         if (r)
4337                 return r;
4338
4339         if (!amdgpu_emu_mode)
4340                 gfx_v11_0_init_golden_registers(adev);
4341
4342         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4343             (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4344                 /**
4345                  * For gfx 11, rlc firmware loading relies on smu firmware is
4346                  * loaded firstly, so in direct type, it has to load smc ucode
4347                  * here before rlc.
4348                  */
4349                 if (!(adev->flags & AMD_IS_APU)) {
4350                         r = amdgpu_pm_load_smu_firmware(adev, NULL);
4351                         if (r)
4352                                 return r;
4353                 }
4354         }
4355
4356         gfx_v11_0_constants_init(adev);
4357
4358         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4359                 gfx_v11_0_select_cp_fw_arch(adev);
4360
4361         if (adev->nbio.funcs->gc_doorbell_init)
4362                 adev->nbio.funcs->gc_doorbell_init(adev);
4363
4364         r = gfx_v11_0_rlc_resume(adev);
4365         if (r)
4366                 return r;
4367
4368         /*
4369          * init golden registers and rlc resume may override some registers,
4370          * reconfig them here
4371          */
4372         gfx_v11_0_tcp_harvest(adev);
4373
4374         r = gfx_v11_0_cp_resume(adev);
4375         if (r)
4376                 return r;
4377
4378         return r;
4379 }
4380
4381 #ifndef BRING_UP_DEBUG
4382 static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev)
4383 {
4384         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4385         struct amdgpu_ring *kiq_ring = &kiq->ring;
4386         int i, r = 0;
4387
4388         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4389                 return -EINVAL;
4390
4391         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
4392                                         adev->gfx.num_gfx_rings))
4393                 return -ENOMEM;
4394
4395         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4396                 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
4397                                            PREEMPT_QUEUES, 0, 0);
4398
4399         if (adev->gfx.kiq.ring.sched.ready)
4400                 r = amdgpu_ring_test_helper(kiq_ring);
4401
4402         return r;
4403 }
4404 #endif
4405
4406 static int gfx_v11_0_hw_fini(void *handle)
4407 {
4408         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4409         int r;
4410
4411         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4412         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4413
4414         if (!adev->no_hw_access) {
4415 #ifndef BRING_UP_DEBUG
4416                 if (amdgpu_async_gfx_ring) {
4417                         r = gfx_v11_0_kiq_disable_kgq(adev);
4418                         if (r)
4419                                 DRM_ERROR("KGQ disable failed\n");
4420                 }
4421 #endif
4422                 if (amdgpu_gfx_disable_kcq(adev))
4423                         DRM_ERROR("KCQ disable failed\n");
4424
4425                 amdgpu_mes_kiq_hw_fini(adev);
4426         }
4427
4428         if (amdgpu_sriov_vf(adev))
4429                 /* Remove the steps disabling CPG and clearing KIQ position,
4430                  * so that CP could perform IDLE-SAVE during switch. Those
4431                  * steps are necessary to avoid a DMAR error in gfx9 but it is
4432                  * not reproduced on gfx11.
4433                  */
4434                 return 0;
4435
4436         gfx_v11_0_cp_enable(adev, false);
4437         gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4438
4439         adev->gfxhub.funcs->gart_disable(adev);
4440
4441         adev->gfx.is_poweron = false;
4442
4443         return 0;
4444 }
4445
4446 static int gfx_v11_0_suspend(void *handle)
4447 {
4448         return gfx_v11_0_hw_fini(handle);
4449 }
4450
4451 static int gfx_v11_0_resume(void *handle)
4452 {
4453         return gfx_v11_0_hw_init(handle);
4454 }
4455
4456 static bool gfx_v11_0_is_idle(void *handle)
4457 {
4458         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4459
4460         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4461                                 GRBM_STATUS, GUI_ACTIVE))
4462                 return false;
4463         else
4464                 return true;
4465 }
4466
4467 static int gfx_v11_0_wait_for_idle(void *handle)
4468 {
4469         unsigned i;
4470         u32 tmp;
4471         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4472
4473         for (i = 0; i < adev->usec_timeout; i++) {
4474                 /* read MC_STATUS */
4475                 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4476                         GRBM_STATUS__GUI_ACTIVE_MASK;
4477
4478                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4479                         return 0;
4480                 udelay(1);
4481         }
4482         return -ETIMEDOUT;
4483 }
4484
4485 static int gfx_v11_0_soft_reset(void *handle)
4486 {
4487         u32 grbm_soft_reset = 0;
4488         u32 tmp;
4489         int i, j, k;
4490         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4491
4492         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4493         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4494         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4495         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4496         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4497         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4498
4499         gfx_v11_0_set_safe_mode(adev);
4500
4501         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4502                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4503                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4504                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4505                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4506                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4507                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4508                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4509
4510                                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4511                                 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4512                         }
4513                 }
4514         }
4515         for (i = 0; i < adev->gfx.me.num_me; ++i) {
4516                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4517                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4518                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4519                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4520                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4521                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4522                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4523
4524                                 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4525                         }
4526                 }
4527         }
4528
4529         WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4530
4531         // Read CP_VMID_RESET register three times.
4532         // to get sufficient time for GFX_HQD_ACTIVE reach 0
4533         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4534         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4535         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4536
4537         for (i = 0; i < adev->usec_timeout; i++) {
4538                 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4539                     !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4540                         break;
4541                 udelay(1);
4542         }
4543         if (i >= adev->usec_timeout) {
4544                 printk("Failed to wait all pipes clean\n");
4545                 return -EINVAL;
4546         }
4547
4548         /**********  trigger soft reset  ***********/
4549         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4550         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4551                                         SOFT_RESET_CP, 1);
4552         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4553                                         SOFT_RESET_GFX, 1);
4554         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4555                                         SOFT_RESET_CPF, 1);
4556         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4557                                         SOFT_RESET_CPC, 1);
4558         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4559                                         SOFT_RESET_CPG, 1);
4560         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4561         /**********  exit soft reset  ***********/
4562         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4563         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4564                                         SOFT_RESET_CP, 0);
4565         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4566                                         SOFT_RESET_GFX, 0);
4567         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4568                                         SOFT_RESET_CPF, 0);
4569         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4570                                         SOFT_RESET_CPC, 0);
4571         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4572                                         SOFT_RESET_CPG, 0);
4573         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4574
4575         tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4576         tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4577         WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4578
4579         WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4580         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4581
4582         for (i = 0; i < adev->usec_timeout; i++) {
4583                 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4584                         break;
4585                 udelay(1);
4586         }
4587         if (i >= adev->usec_timeout) {
4588                 printk("Failed to wait CP_VMID_RESET to 0\n");
4589                 return -EINVAL;
4590         }
4591
4592         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4593         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4594         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4595         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4596         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4597         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4598
4599         gfx_v11_0_unset_safe_mode(adev);
4600
4601         return gfx_v11_0_cp_resume(adev);
4602 }
4603
4604 static bool gfx_v11_0_check_soft_reset(void *handle)
4605 {
4606         int i, r;
4607         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4608         struct amdgpu_ring *ring;
4609         long tmo = msecs_to_jiffies(1000);
4610
4611         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4612                 ring = &adev->gfx.gfx_ring[i];
4613                 r = amdgpu_ring_test_ib(ring, tmo);
4614                 if (r)
4615                         return true;
4616         }
4617
4618         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4619                 ring = &adev->gfx.compute_ring[i];
4620                 r = amdgpu_ring_test_ib(ring, tmo);
4621                 if (r)
4622                         return true;
4623         }
4624
4625         return false;
4626 }
4627
4628 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4629 {
4630         uint64_t clock;
4631
4632         amdgpu_gfx_off_ctrl(adev, false);
4633         mutex_lock(&adev->gfx.gpu_clock_mutex);
4634         clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) |
4635                 ((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL);
4636         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4637         amdgpu_gfx_off_ctrl(adev, true);
4638         return clock;
4639 }
4640
4641 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4642                                            uint32_t vmid,
4643                                            uint32_t gds_base, uint32_t gds_size,
4644                                            uint32_t gws_base, uint32_t gws_size,
4645                                            uint32_t oa_base, uint32_t oa_size)
4646 {
4647         struct amdgpu_device *adev = ring->adev;
4648
4649         /* GDS Base */
4650         gfx_v11_0_write_data_to_reg(ring, 0, false,
4651                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
4652                                     gds_base);
4653
4654         /* GDS Size */
4655         gfx_v11_0_write_data_to_reg(ring, 0, false,
4656                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
4657                                     gds_size);
4658
4659         /* GWS */
4660         gfx_v11_0_write_data_to_reg(ring, 0, false,
4661                                     SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
4662                                     gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4663
4664         /* OA */
4665         gfx_v11_0_write_data_to_reg(ring, 0, false,
4666                                     SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
4667                                     (1 << (oa_size + oa_base)) - (1 << oa_base));
4668 }
4669
4670 static int gfx_v11_0_early_init(void *handle)
4671 {
4672         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4673
4674         adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
4675
4676         adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
4677         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4678                                           AMDGPU_MAX_COMPUTE_RINGS);
4679
4680         gfx_v11_0_set_kiq_pm4_funcs(adev);
4681         gfx_v11_0_set_ring_funcs(adev);
4682         gfx_v11_0_set_irq_funcs(adev);
4683         gfx_v11_0_set_gds_init(adev);
4684         gfx_v11_0_set_rlc_funcs(adev);
4685         gfx_v11_0_set_mqd_funcs(adev);
4686         gfx_v11_0_set_imu_funcs(adev);
4687
4688         gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
4689
4690         return 0;
4691 }
4692
4693 static int gfx_v11_0_ras_late_init(void *handle)
4694 {
4695         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4696         struct ras_common_if *gfx_common_if;
4697         int ret;
4698
4699         gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL);
4700         if (!gfx_common_if)
4701                 return -ENOMEM;
4702
4703         gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX;
4704
4705         ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true);
4706         if (ret)
4707                 dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n");
4708
4709         kfree(gfx_common_if);
4710         return 0;
4711 }
4712
4713 static int gfx_v11_0_late_init(void *handle)
4714 {
4715         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4716         int r;
4717
4718         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4719         if (r)
4720                 return r;
4721
4722         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4723         if (r)
4724                 return r;
4725
4726         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) {
4727                 r = gfx_v11_0_ras_late_init(handle);
4728                 if (r)
4729                         return r;
4730         }
4731
4732         return 0;
4733 }
4734
4735 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
4736 {
4737         uint32_t rlc_cntl;
4738
4739         /* if RLC is not enabled, do nothing */
4740         rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
4741         return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
4742 }
4743
4744 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev)
4745 {
4746         uint32_t data;
4747         unsigned i;
4748
4749         data = RLC_SAFE_MODE__CMD_MASK;
4750         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4751
4752         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
4753
4754         /* wait for RLC_SAFE_MODE */
4755         for (i = 0; i < adev->usec_timeout; i++) {
4756                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
4757                                    RLC_SAFE_MODE, CMD))
4758                         break;
4759                 udelay(1);
4760         }
4761 }
4762
4763 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev)
4764 {
4765         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
4766 }
4767
4768 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
4769                                       bool enable)
4770 {
4771         uint32_t def, data;
4772
4773         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
4774                 return;
4775
4776         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4777
4778         if (enable)
4779                 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4780         else
4781                 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4782
4783         if (def != data)
4784                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4785 }
4786
4787 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
4788                                        bool enable)
4789 {
4790         uint32_t def, data;
4791
4792         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4793                 return;
4794
4795         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4796
4797         if (enable)
4798                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4799         else
4800                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4801
4802         if (def != data)
4803                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4804 }
4805
4806 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
4807                                            bool enable)
4808 {
4809         uint32_t def, data;
4810
4811         if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
4812                 return;
4813
4814         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4815
4816         if (enable)
4817                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4818         else
4819                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4820
4821         if (def != data)
4822                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4823 }
4824
4825 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4826                                                        bool enable)
4827 {
4828         uint32_t data, def;
4829
4830         if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
4831                 return;
4832
4833         /* It is disabled by HW by default */
4834         if (enable) {
4835                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4836                         /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4837                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4838
4839                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4840                                   RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4841                                   RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4842
4843                         if (def != data)
4844                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4845                 }
4846         } else {
4847                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4848                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4849
4850                         data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4851                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4852                                  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4853
4854                         if (def != data)
4855                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4856                 }
4857         }
4858 }
4859
4860 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4861                                                        bool enable)
4862 {
4863         uint32_t def, data;
4864
4865         if (!(adev->cg_flags &
4866               (AMD_CG_SUPPORT_GFX_CGCG |
4867               AMD_CG_SUPPORT_GFX_CGLS |
4868               AMD_CG_SUPPORT_GFX_3D_CGCG |
4869               AMD_CG_SUPPORT_GFX_3D_CGLS)))
4870                 return;
4871
4872         if (enable) {
4873                 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4874
4875                 /* unset CGCG override */
4876                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4877                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4878                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4879                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4880                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
4881                     adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4882                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4883
4884                 /* update CGCG override bits */
4885                 if (def != data)
4886                         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4887
4888                 /* enable cgcg FSM(0x0000363F) */
4889                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4890
4891                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
4892                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
4893                         data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4894                                  RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4895                 }
4896
4897                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4898                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
4899                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4900                                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4901                 }
4902
4903                 if (def != data)
4904                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4905
4906                 /* Program RLC_CGCG_CGLS_CTRL_3D */
4907                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4908
4909                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
4910                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
4911                         data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4912                                  RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4913                 }
4914
4915                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
4916                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
4917                         data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4918                                  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4919                 }
4920
4921                 if (def != data)
4922                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4923
4924                 /* set IDLE_POLL_COUNT(0x00900100) */
4925                 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
4926
4927                 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
4928                 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4929                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4930
4931                 if (def != data)
4932                         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
4933
4934                 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4935                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4936                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4937                 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4938                 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4939                 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
4940
4941                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4942                 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4943                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4944
4945                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4946                 if (adev->sdma.num_instances > 1) {
4947                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4948                         data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4949                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4950                 }
4951         } else {
4952                 /* Program RLC_CGCG_CGLS_CTRL */
4953                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4954
4955                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4956                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4957
4958                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4959                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4960
4961                 if (def != data)
4962                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4963
4964                 /* Program RLC_CGCG_CGLS_CTRL_3D */
4965                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4966
4967                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4968                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4969                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4970                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4971
4972                 if (def != data)
4973                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4974
4975                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4976                 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4977                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4978
4979                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4980                 if (adev->sdma.num_instances > 1) {
4981                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4982                         data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4983                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4984                 }
4985         }
4986 }
4987
4988 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4989                                             bool enable)
4990 {
4991         amdgpu_gfx_rlc_enter_safe_mode(adev);
4992
4993         gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
4994
4995         gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
4996
4997         gfx_v11_0_update_repeater_fgcg(adev, enable);
4998
4999         gfx_v11_0_update_sram_fgcg(adev, enable);
5000
5001         gfx_v11_0_update_perf_clk(adev, enable);
5002
5003         if (adev->cg_flags &
5004             (AMD_CG_SUPPORT_GFX_MGCG |
5005              AMD_CG_SUPPORT_GFX_CGLS |
5006              AMD_CG_SUPPORT_GFX_CGCG |
5007              AMD_CG_SUPPORT_GFX_3D_CGCG |
5008              AMD_CG_SUPPORT_GFX_3D_CGLS))
5009                 gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
5010
5011         amdgpu_gfx_rlc_exit_safe_mode(adev);
5012
5013         return 0;
5014 }
5015
5016 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5017 {
5018         u32 reg, data;
5019
5020         amdgpu_gfx_off_ctrl(adev, false);
5021
5022         reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
5023         if (amdgpu_sriov_is_pp_one_vf(adev))
5024                 data = RREG32_NO_KIQ(reg);
5025         else
5026                 data = RREG32(reg);
5027
5028         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5029         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5030
5031         if (amdgpu_sriov_is_pp_one_vf(adev))
5032                 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5033         else
5034                 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5035
5036         amdgpu_gfx_off_ctrl(adev, true);
5037 }
5038
5039 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5040         .is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5041         .set_safe_mode = gfx_v11_0_set_safe_mode,
5042         .unset_safe_mode = gfx_v11_0_unset_safe_mode,
5043         .init = gfx_v11_0_rlc_init,
5044         .get_csb_size = gfx_v11_0_get_csb_size,
5045         .get_csb_buffer = gfx_v11_0_get_csb_buffer,
5046         .resume = gfx_v11_0_rlc_resume,
5047         .stop = gfx_v11_0_rlc_stop,
5048         .reset = gfx_v11_0_rlc_reset,
5049         .start = gfx_v11_0_rlc_start,
5050         .update_spm_vmid = gfx_v11_0_update_spm_vmid,
5051 };
5052
5053 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5054 {
5055         u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5056
5057         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5058                 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5059         else
5060                 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5061
5062         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5063
5064         // Program RLC_PG_DELAY3 for CGPG hysteresis
5065         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5066                 switch (adev->ip_versions[GC_HWIP][0]) {
5067                 case IP_VERSION(11, 0, 1):
5068                 case IP_VERSION(11, 0, 4):
5069                         WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5070                         break;
5071                 default:
5072                         break;
5073                 }
5074         }
5075 }
5076
5077 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5078 {
5079         amdgpu_gfx_rlc_enter_safe_mode(adev);
5080
5081         gfx_v11_cntl_power_gating(adev, enable);
5082
5083         amdgpu_gfx_rlc_exit_safe_mode(adev);
5084 }
5085
5086 static int gfx_v11_0_set_powergating_state(void *handle,
5087                                            enum amd_powergating_state state)
5088 {
5089         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5090         bool enable = (state == AMD_PG_STATE_GATE);
5091
5092         if (amdgpu_sriov_vf(adev))
5093                 return 0;
5094
5095         switch (adev->ip_versions[GC_HWIP][0]) {
5096         case IP_VERSION(11, 0, 0):
5097         case IP_VERSION(11, 0, 2):
5098         case IP_VERSION(11, 0, 3):
5099                 amdgpu_gfx_off_ctrl(adev, enable);
5100                 break;
5101         case IP_VERSION(11, 0, 1):
5102         case IP_VERSION(11, 0, 4):
5103                 gfx_v11_cntl_pg(adev, enable);
5104                 amdgpu_gfx_off_ctrl(adev, enable);
5105                 break;
5106         default:
5107                 break;
5108         }
5109
5110         return 0;
5111 }
5112
5113 static int gfx_v11_0_set_clockgating_state(void *handle,
5114                                           enum amd_clockgating_state state)
5115 {
5116         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5117
5118         if (amdgpu_sriov_vf(adev))
5119                 return 0;
5120
5121         switch (adev->ip_versions[GC_HWIP][0]) {
5122         case IP_VERSION(11, 0, 0):
5123         case IP_VERSION(11, 0, 1):
5124         case IP_VERSION(11, 0, 2):
5125         case IP_VERSION(11, 0, 3):
5126         case IP_VERSION(11, 0, 4):
5127                 gfx_v11_0_update_gfx_clock_gating(adev,
5128                                 state ==  AMD_CG_STATE_GATE);
5129                 break;
5130         default:
5131                 break;
5132         }
5133
5134         return 0;
5135 }
5136
5137 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5138 {
5139         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5140         int data;
5141
5142         /* AMD_CG_SUPPORT_GFX_MGCG */
5143         data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5144         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5145                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5146
5147         /* AMD_CG_SUPPORT_REPEATER_FGCG */
5148         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5149                 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5150
5151         /* AMD_CG_SUPPORT_GFX_FGCG */
5152         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5153                 *flags |= AMD_CG_SUPPORT_GFX_FGCG;
5154
5155         /* AMD_CG_SUPPORT_GFX_PERF_CLK */
5156         if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5157                 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5158
5159         /* AMD_CG_SUPPORT_GFX_CGCG */
5160         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5161         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5162                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5163
5164         /* AMD_CG_SUPPORT_GFX_CGLS */
5165         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5166                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5167
5168         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5169         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5170         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5171                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5172
5173         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5174         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5175                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5176 }
5177
5178 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5179 {
5180         /* gfx11 is 32bit rptr*/
5181         return *(uint32_t *)ring->rptr_cpu_addr;
5182 }
5183
5184 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5185 {
5186         struct amdgpu_device *adev = ring->adev;
5187         u64 wptr;
5188
5189         /* XXX check if swapping is necessary on BE */
5190         if (ring->use_doorbell) {
5191                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5192         } else {
5193                 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5194                 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5195         }
5196
5197         return wptr;
5198 }
5199
5200 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5201 {
5202         struct amdgpu_device *adev = ring->adev;
5203         uint32_t *wptr_saved;
5204         uint32_t *is_queue_unmap;
5205         uint64_t aggregated_db_index;
5206         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
5207         uint64_t wptr_tmp;
5208
5209         if (ring->is_mes_queue) {
5210                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5211                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5212                                               sizeof(uint32_t));
5213                 aggregated_db_index =
5214                         amdgpu_mes_get_aggregated_doorbell_index(adev,
5215                                                                  ring->hw_prio);
5216
5217                 wptr_tmp = ring->wptr & ring->buf_mask;
5218                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5219                 *wptr_saved = wptr_tmp;
5220                 /* assume doorbell always being used by mes mapped queue */
5221                 if (*is_queue_unmap) {
5222                         WDOORBELL64(aggregated_db_index, wptr_tmp);
5223                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5224                 } else {
5225                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5226
5227                         if (*is_queue_unmap)
5228                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
5229                 }
5230         } else {
5231                 if (ring->use_doorbell) {
5232                         /* XXX check if swapping is necessary on BE */
5233                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5234                                      ring->wptr);
5235                         WDOORBELL64(ring->doorbell_index, ring->wptr);
5236                 } else {
5237                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5238                                      lower_32_bits(ring->wptr));
5239                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5240                                      upper_32_bits(ring->wptr));
5241                 }
5242         }
5243 }
5244
5245 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5246 {
5247         /* gfx11 hardware is 32bit rptr */
5248         return *(uint32_t *)ring->rptr_cpu_addr;
5249 }
5250
5251 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5252 {
5253         u64 wptr;
5254
5255         /* XXX check if swapping is necessary on BE */
5256         if (ring->use_doorbell)
5257                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5258         else
5259                 BUG();
5260         return wptr;
5261 }
5262
5263 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5264 {
5265         struct amdgpu_device *adev = ring->adev;
5266         uint32_t *wptr_saved;
5267         uint32_t *is_queue_unmap;
5268         uint64_t aggregated_db_index;
5269         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
5270         uint64_t wptr_tmp;
5271
5272         if (ring->is_mes_queue) {
5273                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5274                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5275                                               sizeof(uint32_t));
5276                 aggregated_db_index =
5277                         amdgpu_mes_get_aggregated_doorbell_index(adev,
5278                                                                  ring->hw_prio);
5279
5280                 wptr_tmp = ring->wptr & ring->buf_mask;
5281                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5282                 *wptr_saved = wptr_tmp;
5283                 /* assume doorbell always used by mes mapped queue */
5284                 if (*is_queue_unmap) {
5285                         WDOORBELL64(aggregated_db_index, wptr_tmp);
5286                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5287                 } else {
5288                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5289
5290                         if (*is_queue_unmap)
5291                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
5292                 }
5293         } else {
5294                 /* XXX check if swapping is necessary on BE */
5295                 if (ring->use_doorbell) {
5296                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5297                                      ring->wptr);
5298                         WDOORBELL64(ring->doorbell_index, ring->wptr);
5299                 } else {
5300                         BUG(); /* only DOORBELL method supported on gfx11 now */
5301                 }
5302         }
5303 }
5304
5305 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5306 {
5307         struct amdgpu_device *adev = ring->adev;
5308         u32 ref_and_mask, reg_mem_engine;
5309         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5310
5311         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5312                 switch (ring->me) {
5313                 case 1:
5314                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5315                         break;
5316                 case 2:
5317                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5318                         break;
5319                 default:
5320                         return;
5321                 }
5322                 reg_mem_engine = 0;
5323         } else {
5324                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5325                 reg_mem_engine = 1; /* pfp */
5326         }
5327
5328         gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5329                                adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5330                                adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5331                                ref_and_mask, ref_and_mask, 0x20);
5332 }
5333
5334 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5335                                        struct amdgpu_job *job,
5336                                        struct amdgpu_ib *ib,
5337                                        uint32_t flags)
5338 {
5339         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5340         u32 header, control = 0;
5341
5342         BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5343
5344         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5345
5346         control |= ib->length_dw | (vmid << 24);
5347
5348         if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5349                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5350
5351                 if (flags & AMDGPU_IB_PREEMPTED)
5352                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5353
5354                 if (vmid)
5355                         gfx_v11_0_ring_emit_de_meta(ring,
5356                                     (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5357         }
5358
5359         if (ring->is_mes_queue)
5360                 /* inherit vmid from mqd */
5361                 control |= 0x400000;
5362
5363         amdgpu_ring_write(ring, header);
5364         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5365         amdgpu_ring_write(ring,
5366 #ifdef __BIG_ENDIAN
5367                 (2 << 0) |
5368 #endif
5369                 lower_32_bits(ib->gpu_addr));
5370         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5371         amdgpu_ring_write(ring, control);
5372 }
5373
5374 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5375                                            struct amdgpu_job *job,
5376                                            struct amdgpu_ib *ib,
5377                                            uint32_t flags)
5378 {
5379         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5380         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5381
5382         if (ring->is_mes_queue)
5383                 /* inherit vmid from mqd */
5384                 control |= 0x40000000;
5385
5386         /* Currently, there is a high possibility to get wave ID mismatch
5387          * between ME and GDS, leading to a hw deadlock, because ME generates
5388          * different wave IDs than the GDS expects. This situation happens
5389          * randomly when at least 5 compute pipes use GDS ordered append.
5390          * The wave IDs generated by ME are also wrong after suspend/resume.
5391          * Those are probably bugs somewhere else in the kernel driver.
5392          *
5393          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5394          * GDS to 0 for this ring (me/pipe).
5395          */
5396         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5397                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5398                 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5399                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5400         }
5401
5402         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5403         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5404         amdgpu_ring_write(ring,
5405 #ifdef __BIG_ENDIAN
5406                                 (2 << 0) |
5407 #endif
5408                                 lower_32_bits(ib->gpu_addr));
5409         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5410         amdgpu_ring_write(ring, control);
5411 }
5412
5413 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5414                                      u64 seq, unsigned flags)
5415 {
5416         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5417         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5418
5419         /* RELEASE_MEM - flush caches, send int */
5420         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5421         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5422                                  PACKET3_RELEASE_MEM_GCR_GL2_WB |
5423                                  PACKET3_RELEASE_MEM_GCR_GL2_INV |
5424                                  PACKET3_RELEASE_MEM_GCR_GL2_US |
5425                                  PACKET3_RELEASE_MEM_GCR_GL1_INV |
5426                                  PACKET3_RELEASE_MEM_GCR_GLV_INV |
5427                                  PACKET3_RELEASE_MEM_GCR_GLM_INV |
5428                                  PACKET3_RELEASE_MEM_GCR_GLM_WB |
5429                                  PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5430                                  PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5431                                  PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5432         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5433                                  PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5434
5435         /*
5436          * the address should be Qword aligned if 64bit write, Dword
5437          * aligned if only send 32bit data low (discard data high)
5438          */
5439         if (write64bit)
5440                 BUG_ON(addr & 0x7);
5441         else
5442                 BUG_ON(addr & 0x3);
5443         amdgpu_ring_write(ring, lower_32_bits(addr));
5444         amdgpu_ring_write(ring, upper_32_bits(addr));
5445         amdgpu_ring_write(ring, lower_32_bits(seq));
5446         amdgpu_ring_write(ring, upper_32_bits(seq));
5447         amdgpu_ring_write(ring, ring->is_mes_queue ?
5448                          (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5449 }
5450
5451 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5452 {
5453         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5454         uint32_t seq = ring->fence_drv.sync_seq;
5455         uint64_t addr = ring->fence_drv.gpu_addr;
5456
5457         gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5458                                upper_32_bits(addr), seq, 0xffffffff, 4);
5459 }
5460
5461 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5462                                    uint16_t pasid, uint32_t flush_type,
5463                                    bool all_hub, uint8_t dst_sel)
5464 {
5465         amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5466         amdgpu_ring_write(ring,
5467                           PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5468                           PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5469                           PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5470                           PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5471 }
5472
5473 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5474                                          unsigned vmid, uint64_t pd_addr)
5475 {
5476         if (ring->is_mes_queue)
5477                 gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5478         else
5479                 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5480
5481         /* compute doesn't have PFP */
5482         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5483                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5484                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5485                 amdgpu_ring_write(ring, 0x0);
5486         }
5487 }
5488
5489 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5490                                           u64 seq, unsigned int flags)
5491 {
5492         struct amdgpu_device *adev = ring->adev;
5493
5494         /* we only allocate 32bit for each seq wb address */
5495         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5496
5497         /* write fence seq to the "addr" */
5498         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5499         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5500                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5501         amdgpu_ring_write(ring, lower_32_bits(addr));
5502         amdgpu_ring_write(ring, upper_32_bits(addr));
5503         amdgpu_ring_write(ring, lower_32_bits(seq));
5504
5505         if (flags & AMDGPU_FENCE_FLAG_INT) {
5506                 /* set register to trigger INT */
5507                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5508                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5509                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5510                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5511                 amdgpu_ring_write(ring, 0);
5512                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5513         }
5514 }
5515
5516 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5517                                          uint32_t flags)
5518 {
5519         uint32_t dw2 = 0;
5520
5521         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5522         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5523                 /* set load_global_config & load_global_uconfig */
5524                 dw2 |= 0x8001;
5525                 /* set load_cs_sh_regs */
5526                 dw2 |= 0x01000000;
5527                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5528                 dw2 |= 0x10002;
5529         }
5530
5531         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5532         amdgpu_ring_write(ring, dw2);
5533         amdgpu_ring_write(ring, 0);
5534 }
5535
5536 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5537 {
5538         unsigned ret;
5539
5540         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5541         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5542         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5543         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5544         ret = ring->wptr & ring->buf_mask;
5545         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5546
5547         return ret;
5548 }
5549
5550 static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5551 {
5552         unsigned cur;
5553         BUG_ON(offset > ring->buf_mask);
5554         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5555
5556         cur = (ring->wptr - 1) & ring->buf_mask;
5557         if (likely(cur > offset))
5558                 ring->ring[offset] = cur - offset;
5559         else
5560                 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
5561 }
5562
5563 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5564 {
5565         int i, r = 0;
5566         struct amdgpu_device *adev = ring->adev;
5567         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5568         struct amdgpu_ring *kiq_ring = &kiq->ring;
5569         unsigned long flags;
5570
5571         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5572                 return -EINVAL;
5573
5574         spin_lock_irqsave(&kiq->ring_lock, flags);
5575
5576         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5577                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5578                 return -ENOMEM;
5579         }
5580
5581         /* assert preemption condition */
5582         amdgpu_ring_set_preempt_cond_exec(ring, false);
5583
5584         /* assert IB preemption, emit the trailing fence */
5585         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5586                                    ring->trail_fence_gpu_addr,
5587                                    ++ring->trail_seq);
5588         amdgpu_ring_commit(kiq_ring);
5589
5590         spin_unlock_irqrestore(&kiq->ring_lock, flags);
5591
5592         /* poll the trailing fence */
5593         for (i = 0; i < adev->usec_timeout; i++) {
5594                 if (ring->trail_seq ==
5595                     le32_to_cpu(*(ring->trail_fence_cpu_addr)))
5596                         break;
5597                 udelay(1);
5598         }
5599
5600         if (i >= adev->usec_timeout) {
5601                 r = -EINVAL;
5602                 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
5603         }
5604
5605         /* deassert preemption condition */
5606         amdgpu_ring_set_preempt_cond_exec(ring, true);
5607         return r;
5608 }
5609
5610 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5611 {
5612         struct amdgpu_device *adev = ring->adev;
5613         struct v10_de_ib_state de_payload = {0};
5614         uint64_t offset, gds_addr, de_payload_gpu_addr;
5615         void *de_payload_cpu_addr;
5616         int cnt;
5617
5618         if (ring->is_mes_queue) {
5619                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5620                                   gfx[0].gfx_meta_data) +
5621                         offsetof(struct v10_gfx_meta_data, de_payload);
5622                 de_payload_gpu_addr =
5623                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5624                 de_payload_cpu_addr =
5625                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5626
5627                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5628                                   gfx[0].gds_backup) +
5629                         offsetof(struct v10_gfx_meta_data, de_payload);
5630                 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5631         } else {
5632                 offset = offsetof(struct v10_gfx_meta_data, de_payload);
5633                 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5634                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5635
5636                 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5637                                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
5638                                  PAGE_SIZE);
5639         }
5640
5641         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5642         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5643
5644         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5645         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5646         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5647                                  WRITE_DATA_DST_SEL(8) |
5648                                  WR_CONFIRM) |
5649                                  WRITE_DATA_CACHE_POLICY(0));
5650         amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5651         amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5652
5653         if (resume)
5654                 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5655                                            sizeof(de_payload) >> 2);
5656         else
5657                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5658                                            sizeof(de_payload) >> 2);
5659 }
5660
5661 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5662                                     bool secure)
5663 {
5664         uint32_t v = secure ? FRAME_TMZ : 0;
5665
5666         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5667         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5668 }
5669
5670 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5671                                      uint32_t reg_val_offs)
5672 {
5673         struct amdgpu_device *adev = ring->adev;
5674
5675         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5676         amdgpu_ring_write(ring, 0 |     /* src: register*/
5677                                 (5 << 8) |      /* dst: memory */
5678                                 (1 << 20));     /* write confirm */
5679         amdgpu_ring_write(ring, reg);
5680         amdgpu_ring_write(ring, 0);
5681         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5682                                 reg_val_offs * 4));
5683         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5684                                 reg_val_offs * 4));
5685 }
5686
5687 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5688                                    uint32_t val)
5689 {
5690         uint32_t cmd = 0;
5691
5692         switch (ring->funcs->type) {
5693         case AMDGPU_RING_TYPE_GFX:
5694                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5695                 break;
5696         case AMDGPU_RING_TYPE_KIQ:
5697                 cmd = (1 << 16); /* no inc addr */
5698                 break;
5699         default:
5700                 cmd = WR_CONFIRM;
5701                 break;
5702         }
5703         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5704         amdgpu_ring_write(ring, cmd);
5705         amdgpu_ring_write(ring, reg);
5706         amdgpu_ring_write(ring, 0);
5707         amdgpu_ring_write(ring, val);
5708 }
5709
5710 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5711                                         uint32_t val, uint32_t mask)
5712 {
5713         gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5714 }
5715
5716 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5717                                                    uint32_t reg0, uint32_t reg1,
5718                                                    uint32_t ref, uint32_t mask)
5719 {
5720         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5721
5722         gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5723                                ref, mask, 0x20);
5724 }
5725
5726 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
5727                                          unsigned vmid)
5728 {
5729         struct amdgpu_device *adev = ring->adev;
5730         uint32_t value = 0;
5731
5732         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5733         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5734         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5735         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5736         WREG32_SOC15(GC, 0, regSQ_CMD, value);
5737 }
5738
5739 static void
5740 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5741                                       uint32_t me, uint32_t pipe,
5742                                       enum amdgpu_interrupt_state state)
5743 {
5744         uint32_t cp_int_cntl, cp_int_cntl_reg;
5745
5746         if (!me) {
5747                 switch (pipe) {
5748                 case 0:
5749                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
5750                         break;
5751                 case 1:
5752                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
5753                         break;
5754                 default:
5755                         DRM_DEBUG("invalid pipe %d\n", pipe);
5756                         return;
5757                 }
5758         } else {
5759                 DRM_DEBUG("invalid me %d\n", me);
5760                 return;
5761         }
5762
5763         switch (state) {
5764         case AMDGPU_IRQ_STATE_DISABLE:
5765                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5766                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5767                                             TIME_STAMP_INT_ENABLE, 0);
5768                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5769                                             GENERIC0_INT_ENABLE, 0);
5770                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5771                 break;
5772         case AMDGPU_IRQ_STATE_ENABLE:
5773                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5774                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5775                                             TIME_STAMP_INT_ENABLE, 1);
5776                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5777                                             GENERIC0_INT_ENABLE, 1);
5778                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5779                 break;
5780         default:
5781                 break;
5782         }
5783 }
5784
5785 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5786                                                      int me, int pipe,
5787                                                      enum amdgpu_interrupt_state state)
5788 {
5789         u32 mec_int_cntl, mec_int_cntl_reg;
5790
5791         /*
5792          * amdgpu controls only the first MEC. That's why this function only
5793          * handles the setting of interrupts for this specific MEC. All other
5794          * pipes' interrupts are set by amdkfd.
5795          */
5796
5797         if (me == 1) {
5798                 switch (pipe) {
5799                 case 0:
5800                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
5801                         break;
5802                 case 1:
5803                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
5804                         break;
5805                 case 2:
5806                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
5807                         break;
5808                 case 3:
5809                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
5810                         break;
5811                 default:
5812                         DRM_DEBUG("invalid pipe %d\n", pipe);
5813                         return;
5814                 }
5815         } else {
5816                 DRM_DEBUG("invalid me %d\n", me);
5817                 return;
5818         }
5819
5820         switch (state) {
5821         case AMDGPU_IRQ_STATE_DISABLE:
5822                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5823                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5824                                              TIME_STAMP_INT_ENABLE, 0);
5825                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5826                                              GENERIC0_INT_ENABLE, 0);
5827                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5828                 break;
5829         case AMDGPU_IRQ_STATE_ENABLE:
5830                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5831                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5832                                              TIME_STAMP_INT_ENABLE, 1);
5833                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5834                                              GENERIC0_INT_ENABLE, 1);
5835                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5836                 break;
5837         default:
5838                 break;
5839         }
5840 }
5841
5842 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5843                                             struct amdgpu_irq_src *src,
5844                                             unsigned type,
5845                                             enum amdgpu_interrupt_state state)
5846 {
5847         switch (type) {
5848         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5849                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
5850                 break;
5851         case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
5852                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
5853                 break;
5854         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5855                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5856                 break;
5857         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5858                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5859                 break;
5860         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5861                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5862                 break;
5863         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5864                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5865                 break;
5866         default:
5867                 break;
5868         }
5869         return 0;
5870 }
5871
5872 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
5873                              struct amdgpu_irq_src *source,
5874                              struct amdgpu_iv_entry *entry)
5875 {
5876         int i;
5877         u8 me_id, pipe_id, queue_id;
5878         struct amdgpu_ring *ring;
5879         uint32_t mes_queue_id = entry->src_data[0];
5880
5881         DRM_DEBUG("IH: CP EOP\n");
5882
5883         if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
5884                 struct amdgpu_mes_queue *queue;
5885
5886                 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
5887
5888                 spin_lock(&adev->mes.queue_id_lock);
5889                 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
5890                 if (queue) {
5891                         DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
5892                         amdgpu_fence_process(queue->ring);
5893                 }
5894                 spin_unlock(&adev->mes.queue_id_lock);
5895         } else {
5896                 me_id = (entry->ring_id & 0x0c) >> 2;
5897                 pipe_id = (entry->ring_id & 0x03) >> 0;
5898                 queue_id = (entry->ring_id & 0x70) >> 4;
5899
5900                 switch (me_id) {
5901                 case 0:
5902                         if (pipe_id == 0)
5903                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5904                         else
5905                                 amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
5906                         break;
5907                 case 1:
5908                 case 2:
5909                         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5910                                 ring = &adev->gfx.compute_ring[i];
5911                                 /* Per-queue interrupt is supported for MEC starting from VI.
5912                                  * The interrupt can only be enabled/disabled per pipe instead
5913                                  * of per queue.
5914                                  */
5915                                 if ((ring->me == me_id) &&
5916                                     (ring->pipe == pipe_id) &&
5917                                     (ring->queue == queue_id))
5918                                         amdgpu_fence_process(ring);
5919                         }
5920                         break;
5921                 }
5922         }
5923
5924         return 0;
5925 }
5926
5927 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5928                                               struct amdgpu_irq_src *source,
5929                                               unsigned type,
5930                                               enum amdgpu_interrupt_state state)
5931 {
5932         switch (state) {
5933         case AMDGPU_IRQ_STATE_DISABLE:
5934         case AMDGPU_IRQ_STATE_ENABLE:
5935                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5936                                PRIV_REG_INT_ENABLE,
5937                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5938                 break;
5939         default:
5940                 break;
5941         }
5942
5943         return 0;
5944 }
5945
5946 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5947                                                struct amdgpu_irq_src *source,
5948                                                unsigned type,
5949                                                enum amdgpu_interrupt_state state)
5950 {
5951         switch (state) {
5952         case AMDGPU_IRQ_STATE_DISABLE:
5953         case AMDGPU_IRQ_STATE_ENABLE:
5954                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5955                                PRIV_INSTR_INT_ENABLE,
5956                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5957                 break;
5958         default:
5959                 break;
5960         }
5961
5962         return 0;
5963 }
5964
5965 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
5966                                         struct amdgpu_iv_entry *entry)
5967 {
5968         u8 me_id, pipe_id, queue_id;
5969         struct amdgpu_ring *ring;
5970         int i;
5971
5972         me_id = (entry->ring_id & 0x0c) >> 2;
5973         pipe_id = (entry->ring_id & 0x03) >> 0;
5974         queue_id = (entry->ring_id & 0x70) >> 4;
5975
5976         switch (me_id) {
5977         case 0:
5978                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
5979                         ring = &adev->gfx.gfx_ring[i];
5980                         /* we only enabled 1 gfx queue per pipe for now */
5981                         if (ring->me == me_id && ring->pipe == pipe_id)
5982                                 drm_sched_fault(&ring->sched);
5983                 }
5984                 break;
5985         case 1:
5986         case 2:
5987                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5988                         ring = &adev->gfx.compute_ring[i];
5989                         if (ring->me == me_id && ring->pipe == pipe_id &&
5990                             ring->queue == queue_id)
5991                                 drm_sched_fault(&ring->sched);
5992                 }
5993                 break;
5994         default:
5995                 BUG();
5996                 break;
5997         }
5998 }
5999
6000 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
6001                                   struct amdgpu_irq_src *source,
6002                                   struct amdgpu_iv_entry *entry)
6003 {
6004         DRM_ERROR("Illegal register access in command stream\n");
6005         gfx_v11_0_handle_priv_fault(adev, entry);
6006         return 0;
6007 }
6008
6009 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
6010                                    struct amdgpu_irq_src *source,
6011                                    struct amdgpu_iv_entry *entry)
6012 {
6013         DRM_ERROR("Illegal instruction in command stream\n");
6014         gfx_v11_0_handle_priv_fault(adev, entry);
6015         return 0;
6016 }
6017
6018 #if 0
6019 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6020                                              struct amdgpu_irq_src *src,
6021                                              unsigned int type,
6022                                              enum amdgpu_interrupt_state state)
6023 {
6024         uint32_t tmp, target;
6025         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6026
6027         target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6028         target += ring->pipe;
6029
6030         switch (type) {
6031         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6032                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6033                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6034                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6035                                             GENERIC2_INT_ENABLE, 0);
6036                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6037
6038                         tmp = RREG32_SOC15_IP(GC, target);
6039                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6040                                             GENERIC2_INT_ENABLE, 0);
6041                         WREG32_SOC15_IP(GC, target, tmp);
6042                 } else {
6043                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6044                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6045                                             GENERIC2_INT_ENABLE, 1);
6046                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6047
6048                         tmp = RREG32_SOC15_IP(GC, target);
6049                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6050                                             GENERIC2_INT_ENABLE, 1);
6051                         WREG32_SOC15_IP(GC, target, tmp);
6052                 }
6053                 break;
6054         default:
6055                 BUG(); /* kiq only support GENERIC2_INT now */
6056                 break;
6057         }
6058         return 0;
6059 }
6060 #endif
6061
6062 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6063 {
6064         const unsigned int gcr_cntl =
6065                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6066                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6067                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6068                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6069                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6070                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6071                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6072                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6073
6074         /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6075         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6076         amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6077         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6078         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6079         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6080         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6081         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6082         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6083 }
6084
6085 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6086         .name = "gfx_v11_0",
6087         .early_init = gfx_v11_0_early_init,
6088         .late_init = gfx_v11_0_late_init,
6089         .sw_init = gfx_v11_0_sw_init,
6090         .sw_fini = gfx_v11_0_sw_fini,
6091         .hw_init = gfx_v11_0_hw_init,
6092         .hw_fini = gfx_v11_0_hw_fini,
6093         .suspend = gfx_v11_0_suspend,
6094         .resume = gfx_v11_0_resume,
6095         .is_idle = gfx_v11_0_is_idle,
6096         .wait_for_idle = gfx_v11_0_wait_for_idle,
6097         .soft_reset = gfx_v11_0_soft_reset,
6098         .check_soft_reset = gfx_v11_0_check_soft_reset,
6099         .set_clockgating_state = gfx_v11_0_set_clockgating_state,
6100         .set_powergating_state = gfx_v11_0_set_powergating_state,
6101         .get_clockgating_state = gfx_v11_0_get_clockgating_state,
6102 };
6103
6104 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6105         .type = AMDGPU_RING_TYPE_GFX,
6106         .align_mask = 0xff,
6107         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6108         .support_64bit_ptrs = true,
6109         .secure_submission_supported = true,
6110         .vmhub = AMDGPU_GFXHUB_0,
6111         .get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6112         .get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6113         .set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6114         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6115                 5 + /* COND_EXEC */
6116                 7 + /* PIPELINE_SYNC */
6117                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6118                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6119                 2 + /* VM_FLUSH */
6120                 8 + /* FENCE for VM_FLUSH */
6121                 20 + /* GDS switch */
6122                 5 + /* COND_EXEC */
6123                 7 + /* HDP_flush */
6124                 4 + /* VGT_flush */
6125                 31 + /* DE_META */
6126                 3 + /* CNTX_CTRL */
6127                 5 + /* HDP_INVL */
6128                 8 + 8 + /* FENCE x2 */
6129                 8, /* gfx_v11_0_emit_mem_sync */
6130         .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
6131         .emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6132         .emit_fence = gfx_v11_0_ring_emit_fence,
6133         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6134         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6135         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6136         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6137         .test_ring = gfx_v11_0_ring_test_ring,
6138         .test_ib = gfx_v11_0_ring_test_ib,
6139         .insert_nop = amdgpu_ring_insert_nop,
6140         .pad_ib = amdgpu_ring_generic_pad_ib,
6141         .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6142         .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6143         .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
6144         .preempt_ib = gfx_v11_0_ring_preempt_ib,
6145         .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6146         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6147         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6148         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6149         .soft_recovery = gfx_v11_0_ring_soft_recovery,
6150         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6151 };
6152
6153 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6154         .type = AMDGPU_RING_TYPE_COMPUTE,
6155         .align_mask = 0xff,
6156         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6157         .support_64bit_ptrs = true,
6158         .vmhub = AMDGPU_GFXHUB_0,
6159         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6160         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6161         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6162         .emit_frame_size =
6163                 20 + /* gfx_v11_0_ring_emit_gds_switch */
6164                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
6165                 5 + /* hdp invalidate */
6166                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6167                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6168                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6169                 2 + /* gfx_v11_0_ring_emit_vm_flush */
6170                 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6171                 8, /* gfx_v11_0_emit_mem_sync */
6172         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6173         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6174         .emit_fence = gfx_v11_0_ring_emit_fence,
6175         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6176         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6177         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6178         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6179         .test_ring = gfx_v11_0_ring_test_ring,
6180         .test_ib = gfx_v11_0_ring_test_ib,
6181         .insert_nop = amdgpu_ring_insert_nop,
6182         .pad_ib = amdgpu_ring_generic_pad_ib,
6183         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6184         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6185         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6186         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6187 };
6188
6189 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6190         .type = AMDGPU_RING_TYPE_KIQ,
6191         .align_mask = 0xff,
6192         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6193         .support_64bit_ptrs = true,
6194         .vmhub = AMDGPU_GFXHUB_0,
6195         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6196         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6197         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6198         .emit_frame_size =
6199                 20 + /* gfx_v11_0_ring_emit_gds_switch */
6200                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
6201                 5 + /*hdp invalidate */
6202                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6203                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6204                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6205                 2 + /* gfx_v11_0_ring_emit_vm_flush */
6206                 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6207         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6208         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6209         .emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6210         .test_ring = gfx_v11_0_ring_test_ring,
6211         .test_ib = gfx_v11_0_ring_test_ib,
6212         .insert_nop = amdgpu_ring_insert_nop,
6213         .pad_ib = amdgpu_ring_generic_pad_ib,
6214         .emit_rreg = gfx_v11_0_ring_emit_rreg,
6215         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6216         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6217         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6218 };
6219
6220 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
6221 {
6222         int i;
6223
6224         adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq;
6225
6226         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6227                 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
6228
6229         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6230                 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
6231 }
6232
6233 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
6234         .set = gfx_v11_0_set_eop_interrupt_state,
6235         .process = gfx_v11_0_eop_irq,
6236 };
6237
6238 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
6239         .set = gfx_v11_0_set_priv_reg_fault_state,
6240         .process = gfx_v11_0_priv_reg_irq,
6241 };
6242
6243 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
6244         .set = gfx_v11_0_set_priv_inst_fault_state,
6245         .process = gfx_v11_0_priv_inst_irq,
6246 };
6247
6248 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
6249 {
6250         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6251         adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
6252
6253         adev->gfx.priv_reg_irq.num_types = 1;
6254         adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
6255
6256         adev->gfx.priv_inst_irq.num_types = 1;
6257         adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
6258 }
6259
6260 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
6261 {
6262         if (adev->flags & AMD_IS_APU)
6263                 adev->gfx.imu.mode = MISSION_MODE;
6264         else
6265                 adev->gfx.imu.mode = DEBUG_MODE;
6266
6267         adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
6268 }
6269
6270 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
6271 {
6272         adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
6273 }
6274
6275 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
6276 {
6277         unsigned total_cu = adev->gfx.config.max_cu_per_sh *
6278                             adev->gfx.config.max_sh_per_se *
6279                             adev->gfx.config.max_shader_engines;
6280
6281         adev->gds.gds_size = 0x1000;
6282         adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
6283         adev->gds.gws_size = 64;
6284         adev->gds.oa_size = 16;
6285 }
6286
6287 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
6288 {
6289         /* set gfx eng mqd */
6290         adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
6291                 sizeof(struct v11_gfx_mqd);
6292         adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
6293                 gfx_v11_0_gfx_mqd_init;
6294         /* set compute eng mqd */
6295         adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
6296                 sizeof(struct v11_compute_mqd);
6297         adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
6298                 gfx_v11_0_compute_mqd_init;
6299 }
6300
6301 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
6302                                                           u32 bitmap)
6303 {
6304         u32 data;
6305
6306         if (!bitmap)
6307                 return;
6308
6309         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6310         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6311
6312         WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
6313 }
6314
6315 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
6316 {
6317         u32 data, wgp_bitmask;
6318         data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
6319         data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
6320
6321         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6322         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6323
6324         wgp_bitmask =
6325                 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
6326
6327         return (~data) & wgp_bitmask;
6328 }
6329
6330 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
6331 {
6332         u32 wgp_idx, wgp_active_bitmap;
6333         u32 cu_bitmap_per_wgp, cu_active_bitmap;
6334
6335         wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
6336         cu_active_bitmap = 0;
6337
6338         for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
6339                 /* if there is one WGP enabled, it means 2 CUs will be enabled */
6340                 cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
6341                 if (wgp_active_bitmap & (1 << wgp_idx))
6342                         cu_active_bitmap |= cu_bitmap_per_wgp;
6343         }
6344
6345         return cu_active_bitmap;
6346 }
6347
6348 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
6349                                  struct amdgpu_cu_info *cu_info)
6350 {
6351         int i, j, k, counter, active_cu_number = 0;
6352         u32 mask, bitmap;
6353         unsigned disable_masks[8 * 2];
6354
6355         if (!adev || !cu_info)
6356                 return -EINVAL;
6357
6358         amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
6359
6360         mutex_lock(&adev->grbm_idx_mutex);
6361         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6362                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6363                         mask = 1;
6364                         counter = 0;
6365                         gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
6366                         if (i < 8 && j < 2)
6367                                 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
6368                                         adev, disable_masks[i * 2 + j]);
6369                         bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
6370
6371                         /**
6372                          * GFX11 could support more than 4 SEs, while the bitmap
6373                          * in cu_info struct is 4x4 and ioctl interface struct
6374                          * drm_amdgpu_info_device should keep stable.
6375                          * So we use last two columns of bitmap to store cu mask for
6376                          * SEs 4 to 7, the layout of the bitmap is as below:
6377                          *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
6378                          *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
6379                          *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
6380                          *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
6381                          *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
6382                          *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
6383                          *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
6384                          *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
6385                          */
6386                         cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
6387
6388                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
6389                                 if (bitmap & mask)
6390                                         counter++;
6391
6392                                 mask <<= 1;
6393                         }
6394                         active_cu_number += counter;
6395                 }
6396         }
6397         gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6398         mutex_unlock(&adev->grbm_idx_mutex);
6399
6400         cu_info->number = active_cu_number;
6401         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6402
6403         return 0;
6404 }
6405
6406 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
6407 {
6408         .type = AMD_IP_BLOCK_TYPE_GFX,
6409         .major = 11,
6410         .minor = 0,
6411         .rev = 0,
6412         .funcs = &gfx_v11_0_ip_funcs,
6413 };
This page took 0.422358 seconds and 4 git commands to generate.