]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
Merge tag 'kvm-x86-hyperv-6.8' of https://github.com/kvm-x86/linux into HEAD
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v11_0.c
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v11_0.h"
34 #include "soc21.h"
35 #include "nvd.h"
36
37 #include "gc/gc_11_0_0_offset.h"
38 #include "gc/gc_11_0_0_sh_mask.h"
39 #include "smuio/smuio_13_0_6_offset.h"
40 #include "smuio/smuio_13_0_6_sh_mask.h"
41 #include "navi10_enum.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
43
44 #include "soc15.h"
45 #include "soc15d.h"
46 #include "clearstate_gfx11.h"
47 #include "v11_structs.h"
48 #include "gfx_v11_0.h"
49 #include "gfx_v11_0_3.h"
50 #include "nbio_v4_3.h"
51 #include "mes_v11_0.h"
52
53 #define GFX11_NUM_GFX_RINGS             1
54 #define GFX11_MEC_HPD_SIZE      2048
55
56 #define RLCG_UCODE_LOADING_START_ADDRESS        0x00002000L
57 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1        0x1388
58
59 #define regCGTT_WD_CLK_CTRL             0x5086
60 #define regCGTT_WD_CLK_CTRL_BASE_IDX    1
61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1   0x4e7e
62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX  1
63 #define regPC_CONFIG_CNTL_1             0x194d
64 #define regPC_CONFIG_CNTL_1_BASE_IDX    1
65
66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
68 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
69 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
70 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
73 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
74 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
76 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
77 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
78 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
80 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
81 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
82 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
83 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
85 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
86 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
87 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
88 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
89 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
90 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
91
92 static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
93         SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
94 };
95
96 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
97 {
98         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
99         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
100         SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
101         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
102         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
103         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
104         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
105         SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
106         SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
107 };
108
109 static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = {
110         SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_DEBUG5, 0xffffffff, 0x00000800),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x8000b007),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL2, 0x007f0000, 0x00000000),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xffcfffff, 0x0000200a),
123         SOC15_REG_GOLDEN_VALUE(GC, 0, regUTCL1_CTRL_2, 0xffffffff, 0x0000048f)
124 };
125
126 #define DEFAULT_SH_MEM_CONFIG \
127         ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
128          (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
129          (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
130
131 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
132 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
133 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
134 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
135 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
136 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
137 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
138 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
139                                  struct amdgpu_cu_info *cu_info);
140 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
141 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
142                                    u32 sh_num, u32 instance, int xcc_id);
143 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
144
145 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
146 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
147 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
148                                      uint32_t val);
149 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
150 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
151                                            uint16_t pasid, uint32_t flush_type,
152                                            bool all_hub, uint8_t dst_sel);
153 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
154 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
155 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
156                                       bool enable);
157
158 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
159 {
160         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
161         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
162                           PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
163                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
164         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
165         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
166         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
167         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
168         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
169         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
170 }
171
172 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
173                                  struct amdgpu_ring *ring)
174 {
175         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
176         uint64_t wptr_addr = ring->wptr_gpu_addr;
177         uint32_t me = 0, eng_sel = 0;
178
179         switch (ring->funcs->type) {
180         case AMDGPU_RING_TYPE_COMPUTE:
181                 me = 1;
182                 eng_sel = 0;
183                 break;
184         case AMDGPU_RING_TYPE_GFX:
185                 me = 0;
186                 eng_sel = 4;
187                 break;
188         case AMDGPU_RING_TYPE_MES:
189                 me = 2;
190                 eng_sel = 5;
191                 break;
192         default:
193                 WARN_ON(1);
194         }
195
196         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
197         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
198         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
199                           PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
200                           PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
201                           PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
202                           PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
203                           PACKET3_MAP_QUEUES_ME((me)) |
204                           PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
205                           PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
206                           PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
207                           PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
208         amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
209         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
210         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
211         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
212         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
213 }
214
215 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
216                                    struct amdgpu_ring *ring,
217                                    enum amdgpu_unmap_queues_action action,
218                                    u64 gpu_addr, u64 seq)
219 {
220         struct amdgpu_device *adev = kiq_ring->adev;
221         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
222
223         if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
224                 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
225                 return;
226         }
227
228         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
229         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
230                           PACKET3_UNMAP_QUEUES_ACTION(action) |
231                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
232                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
233                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
234         amdgpu_ring_write(kiq_ring,
235                   PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
236
237         if (action == PREEMPT_QUEUES_NO_UNMAP) {
238                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
239                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
240                 amdgpu_ring_write(kiq_ring, seq);
241         } else {
242                 amdgpu_ring_write(kiq_ring, 0);
243                 amdgpu_ring_write(kiq_ring, 0);
244                 amdgpu_ring_write(kiq_ring, 0);
245         }
246 }
247
248 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
249                                    struct amdgpu_ring *ring,
250                                    u64 addr,
251                                    u64 seq)
252 {
253         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
254
255         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
256         amdgpu_ring_write(kiq_ring,
257                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
258                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
259                           PACKET3_QUERY_STATUS_COMMAND(2));
260         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
261                           PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
262                           PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
263         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
264         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
265         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
266         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
267 }
268
269 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
270                                 uint16_t pasid, uint32_t flush_type,
271                                 bool all_hub)
272 {
273         gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
274 }
275
276 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
277         .kiq_set_resources = gfx11_kiq_set_resources,
278         .kiq_map_queues = gfx11_kiq_map_queues,
279         .kiq_unmap_queues = gfx11_kiq_unmap_queues,
280         .kiq_query_status = gfx11_kiq_query_status,
281         .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
282         .set_resources_size = 8,
283         .map_queues_size = 7,
284         .unmap_queues_size = 6,
285         .query_status_size = 7,
286         .invalidate_tlbs_size = 2,
287 };
288
289 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
290 {
291         adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
292 }
293
294 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
295 {
296         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
297         case IP_VERSION(11, 0, 1):
298         case IP_VERSION(11, 0, 4):
299                 soc15_program_register_sequence(adev,
300                                                 golden_settings_gc_11_0_1,
301                                                 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
302                 break;
303         case IP_VERSION(11, 5, 0):
304                 soc15_program_register_sequence(adev,
305                                                 golden_settings_gc_11_5_0,
306                                                 (const u32)ARRAY_SIZE(golden_settings_gc_11_5_0));
307                 break;
308         default:
309                 break;
310         }
311         soc15_program_register_sequence(adev,
312                                         golden_settings_gc_11_0,
313                                         (const u32)ARRAY_SIZE(golden_settings_gc_11_0));
314
315 }
316
317 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
318                                        bool wc, uint32_t reg, uint32_t val)
319 {
320         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
321         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
322                           WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
323         amdgpu_ring_write(ring, reg);
324         amdgpu_ring_write(ring, 0);
325         amdgpu_ring_write(ring, val);
326 }
327
328 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
329                                   int mem_space, int opt, uint32_t addr0,
330                                   uint32_t addr1, uint32_t ref, uint32_t mask,
331                                   uint32_t inv)
332 {
333         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
334         amdgpu_ring_write(ring,
335                           /* memory (1) or register (0) */
336                           (WAIT_REG_MEM_MEM_SPACE(mem_space) |
337                            WAIT_REG_MEM_OPERATION(opt) | /* wait */
338                            WAIT_REG_MEM_FUNCTION(3) |  /* equal */
339                            WAIT_REG_MEM_ENGINE(eng_sel)));
340
341         if (mem_space)
342                 BUG_ON(addr0 & 0x3); /* Dword align */
343         amdgpu_ring_write(ring, addr0);
344         amdgpu_ring_write(ring, addr1);
345         amdgpu_ring_write(ring, ref);
346         amdgpu_ring_write(ring, mask);
347         amdgpu_ring_write(ring, inv); /* poll interval */
348 }
349
350 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
351 {
352         struct amdgpu_device *adev = ring->adev;
353         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
354         uint32_t tmp = 0;
355         unsigned i;
356         int r;
357
358         WREG32(scratch, 0xCAFEDEAD);
359         r = amdgpu_ring_alloc(ring, 5);
360         if (r) {
361                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
362                           ring->idx, r);
363                 return r;
364         }
365
366         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
367                 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
368         } else {
369                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
370                 amdgpu_ring_write(ring, scratch -
371                                   PACKET3_SET_UCONFIG_REG_START);
372                 amdgpu_ring_write(ring, 0xDEADBEEF);
373         }
374         amdgpu_ring_commit(ring);
375
376         for (i = 0; i < adev->usec_timeout; i++) {
377                 tmp = RREG32(scratch);
378                 if (tmp == 0xDEADBEEF)
379                         break;
380                 if (amdgpu_emu_mode == 1)
381                         msleep(1);
382                 else
383                         udelay(1);
384         }
385
386         if (i >= adev->usec_timeout)
387                 r = -ETIMEDOUT;
388         return r;
389 }
390
391 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
392 {
393         struct amdgpu_device *adev = ring->adev;
394         struct amdgpu_ib ib;
395         struct dma_fence *f = NULL;
396         unsigned index;
397         uint64_t gpu_addr;
398         volatile uint32_t *cpu_ptr;
399         long r;
400
401         /* MES KIQ fw hasn't indirect buffer support for now */
402         if (adev->enable_mes_kiq &&
403             ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
404                 return 0;
405
406         memset(&ib, 0, sizeof(ib));
407
408         if (ring->is_mes_queue) {
409                 uint32_t padding, offset;
410
411                 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
412                 padding = amdgpu_mes_ctx_get_offs(ring,
413                                                   AMDGPU_MES_CTX_PADDING_OFFS);
414
415                 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
416                 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
417
418                 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
419                 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
420                 *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
421         } else {
422                 r = amdgpu_device_wb_get(adev, &index);
423                 if (r)
424                         return r;
425
426                 gpu_addr = adev->wb.gpu_addr + (index * 4);
427                 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
428                 cpu_ptr = &adev->wb.wb[index];
429
430                 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
431                 if (r) {
432                         DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
433                         goto err1;
434                 }
435         }
436
437         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
438         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
439         ib.ptr[2] = lower_32_bits(gpu_addr);
440         ib.ptr[3] = upper_32_bits(gpu_addr);
441         ib.ptr[4] = 0xDEADBEEF;
442         ib.length_dw = 5;
443
444         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
445         if (r)
446                 goto err2;
447
448         r = dma_fence_wait_timeout(f, false, timeout);
449         if (r == 0) {
450                 r = -ETIMEDOUT;
451                 goto err2;
452         } else if (r < 0) {
453                 goto err2;
454         }
455
456         if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
457                 r = 0;
458         else
459                 r = -EINVAL;
460 err2:
461         if (!ring->is_mes_queue)
462                 amdgpu_ib_free(adev, &ib, NULL);
463         dma_fence_put(f);
464 err1:
465         if (!ring->is_mes_queue)
466                 amdgpu_device_wb_free(adev, index);
467         return r;
468 }
469
470 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
471 {
472         amdgpu_ucode_release(&adev->gfx.pfp_fw);
473         amdgpu_ucode_release(&adev->gfx.me_fw);
474         amdgpu_ucode_release(&adev->gfx.rlc_fw);
475         amdgpu_ucode_release(&adev->gfx.mec_fw);
476
477         kfree(adev->gfx.rlc.register_list_format);
478 }
479
480 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
481 {
482         const struct psp_firmware_header_v1_0 *toc_hdr;
483         int err = 0;
484         char fw_name[40];
485
486         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
487         err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name);
488         if (err)
489                 goto out;
490
491         toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
492         adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
493         adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
494         adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
495         adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
496                                 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
497         return 0;
498 out:
499         amdgpu_ucode_release(&adev->psp.toc_fw);
500         return err;
501 }
502
503 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
504 {
505         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
506         case IP_VERSION(11, 0, 0):
507         case IP_VERSION(11, 0, 2):
508         case IP_VERSION(11, 0, 3):
509                 if ((adev->gfx.me_fw_version >= 1505) &&
510                     (adev->gfx.pfp_fw_version >= 1600) &&
511                     (adev->gfx.mec_fw_version >= 512)) {
512                         if (amdgpu_sriov_vf(adev))
513                                 adev->gfx.cp_gfx_shadow = true;
514                         else
515                                 adev->gfx.cp_gfx_shadow = false;
516                 }
517                 break;
518         default:
519                 adev->gfx.cp_gfx_shadow = false;
520                 break;
521         }
522 }
523
524 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
525 {
526         char fw_name[40];
527         char ucode_prefix[30];
528         int err;
529         const struct rlc_firmware_header_v2_0 *rlc_hdr;
530         uint16_t version_major;
531         uint16_t version_minor;
532
533         DRM_DEBUG("\n");
534
535         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
536
537         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
538         err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
539         if (err)
540                 goto out;
541         /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
542         adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
543                                 (union amdgpu_firmware_header *)
544                                 adev->gfx.pfp_fw->data, 2, 0);
545         if (adev->gfx.rs64_enable) {
546                 dev_info(adev->dev, "CP RS64 enable\n");
547                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
548                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
549                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
550         } else {
551                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
552         }
553
554         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
555         err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
556         if (err)
557                 goto out;
558         if (adev->gfx.rs64_enable) {
559                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
560                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
561                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
562         } else {
563                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
564         }
565
566         if (!amdgpu_sriov_vf(adev)) {
567                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
568                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
569                 if (err)
570                         goto out;
571                 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
572                 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
573                 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
574                 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
575                 if (err)
576                         goto out;
577         }
578
579         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
580         err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
581         if (err)
582                 goto out;
583         if (adev->gfx.rs64_enable) {
584                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
585                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
586                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
587                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
588                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
589         } else {
590                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
591                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
592         }
593
594         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
595                 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
596
597         /* only one MEC for gfx 11.0.0. */
598         adev->gfx.mec2_fw = NULL;
599
600         gfx_v11_0_check_fw_cp_gfx_shadow(adev);
601
602         if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
603                 err = adev->gfx.imu.funcs->init_microcode(adev);
604                 if (err)
605                         DRM_ERROR("Failed to init imu firmware!\n");
606                 return err;
607         }
608
609 out:
610         if (err) {
611                 amdgpu_ucode_release(&adev->gfx.pfp_fw);
612                 amdgpu_ucode_release(&adev->gfx.me_fw);
613                 amdgpu_ucode_release(&adev->gfx.rlc_fw);
614                 amdgpu_ucode_release(&adev->gfx.mec_fw);
615         }
616
617         return err;
618 }
619
620 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
621 {
622         u32 count = 0;
623         const struct cs_section_def *sect = NULL;
624         const struct cs_extent_def *ext = NULL;
625
626         /* begin clear state */
627         count += 2;
628         /* context control state */
629         count += 3;
630
631         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
632                 for (ext = sect->section; ext->extent != NULL; ++ext) {
633                         if (sect->id == SECT_CONTEXT)
634                                 count += 2 + ext->reg_count;
635                         else
636                                 return 0;
637                 }
638         }
639
640         /* set PA_SC_TILE_STEERING_OVERRIDE */
641         count += 3;
642         /* end clear state */
643         count += 2;
644         /* clear state */
645         count += 2;
646
647         return count;
648 }
649
650 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
651                                     volatile u32 *buffer)
652 {
653         u32 count = 0, i;
654         const struct cs_section_def *sect = NULL;
655         const struct cs_extent_def *ext = NULL;
656         int ctx_reg_offset;
657
658         if (adev->gfx.rlc.cs_data == NULL)
659                 return;
660         if (buffer == NULL)
661                 return;
662
663         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
664         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
665
666         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
667         buffer[count++] = cpu_to_le32(0x80000000);
668         buffer[count++] = cpu_to_le32(0x80000000);
669
670         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
671                 for (ext = sect->section; ext->extent != NULL; ++ext) {
672                         if (sect->id == SECT_CONTEXT) {
673                                 buffer[count++] =
674                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
675                                 buffer[count++] = cpu_to_le32(ext->reg_index -
676                                                 PACKET3_SET_CONTEXT_REG_START);
677                                 for (i = 0; i < ext->reg_count; i++)
678                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
679                         } else {
680                                 return;
681                         }
682                 }
683         }
684
685         ctx_reg_offset =
686                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
687         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
688         buffer[count++] = cpu_to_le32(ctx_reg_offset);
689         buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
690
691         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
692         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
693
694         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
695         buffer[count++] = cpu_to_le32(0);
696 }
697
698 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
699 {
700         /* clear state block */
701         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
702                         &adev->gfx.rlc.clear_state_gpu_addr,
703                         (void **)&adev->gfx.rlc.cs_ptr);
704
705         /* jump table block */
706         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
707                         &adev->gfx.rlc.cp_table_gpu_addr,
708                         (void **)&adev->gfx.rlc.cp_table_ptr);
709 }
710
711 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
712 {
713         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
714
715         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
716         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
717         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
718         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
719         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
720         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
721         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
722         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
723         adev->gfx.rlc.rlcg_reg_access_supported = true;
724 }
725
726 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
727 {
728         const struct cs_section_def *cs_data;
729         int r;
730
731         adev->gfx.rlc.cs_data = gfx11_cs_data;
732
733         cs_data = adev->gfx.rlc.cs_data;
734
735         if (cs_data) {
736                 /* init clear state block */
737                 r = amdgpu_gfx_rlc_init_csb(adev);
738                 if (r)
739                         return r;
740         }
741
742         /* init spm vmid with 0xf */
743         if (adev->gfx.rlc.funcs->update_spm_vmid)
744                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
745
746         return 0;
747 }
748
749 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
750 {
751         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
752         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
753         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
754 }
755
756 static void gfx_v11_0_me_init(struct amdgpu_device *adev)
757 {
758         bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
759
760         amdgpu_gfx_graphics_queue_acquire(adev);
761 }
762
763 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
764 {
765         int r;
766         u32 *hpd;
767         size_t mec_hpd_size;
768
769         bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
770
771         /* take ownership of the relevant compute queues */
772         amdgpu_gfx_compute_queue_acquire(adev);
773         mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
774
775         if (mec_hpd_size) {
776                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
777                                               AMDGPU_GEM_DOMAIN_GTT,
778                                               &adev->gfx.mec.hpd_eop_obj,
779                                               &adev->gfx.mec.hpd_eop_gpu_addr,
780                                               (void **)&hpd);
781                 if (r) {
782                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
783                         gfx_v11_0_mec_fini(adev);
784                         return r;
785                 }
786
787                 memset(hpd, 0, mec_hpd_size);
788
789                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
790                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
791         }
792
793         return 0;
794 }
795
796 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
797 {
798         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
799                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
800                 (address << SQ_IND_INDEX__INDEX__SHIFT));
801         return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
802 }
803
804 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
805                            uint32_t thread, uint32_t regno,
806                            uint32_t num, uint32_t *out)
807 {
808         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
809                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
810                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
811                 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
812                 (SQ_IND_INDEX__AUTO_INCR_MASK));
813         while (num--)
814                 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
815 }
816
817 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
818 {
819         /* in gfx11 the SIMD_ID is specified as part of the INSTANCE
820          * field when performing a select_se_sh so it should be
821          * zero here */
822         WARN_ON(simd != 0);
823
824         /* type 3 wave data */
825         dst[(*no_fields)++] = 3;
826         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
827         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
828         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
829         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
830         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
831         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
832         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
833         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
834         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
835         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
836         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
837         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
838         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
839         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
840         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
841 }
842
843 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
844                                      uint32_t wave, uint32_t start,
845                                      uint32_t size, uint32_t *dst)
846 {
847         WARN_ON(simd != 0);
848
849         wave_read_regs(
850                 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
851                 dst);
852 }
853
854 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
855                                       uint32_t wave, uint32_t thread,
856                                       uint32_t start, uint32_t size,
857                                       uint32_t *dst)
858 {
859         wave_read_regs(
860                 adev, wave, thread,
861                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
862 }
863
864 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
865                                         u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
866 {
867         soc21_grbm_select(adev, me, pipe, q, vm);
868 }
869
870 /* all sizes are in bytes */
871 #define MQD_SHADOW_BASE_SIZE      73728
872 #define MQD_SHADOW_BASE_ALIGNMENT 256
873 #define MQD_FWWORKAREA_SIZE       484
874 #define MQD_FWWORKAREA_ALIGNMENT  256
875
876 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
877                                          struct amdgpu_gfx_shadow_info *shadow_info)
878 {
879         if (adev->gfx.cp_gfx_shadow) {
880                 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
881                 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
882                 shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
883                 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
884                 return 0;
885         } else {
886                 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
887                 return -ENOTSUPP;
888         }
889 }
890
891 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
892         .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
893         .select_se_sh = &gfx_v11_0_select_se_sh,
894         .read_wave_data = &gfx_v11_0_read_wave_data,
895         .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
896         .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
897         .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
898         .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
899         .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
900 };
901
902 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
903 {
904         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
905         case IP_VERSION(11, 0, 0):
906         case IP_VERSION(11, 0, 2):
907                 adev->gfx.config.max_hw_contexts = 8;
908                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
909                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
910                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
911                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
912                 break;
913         case IP_VERSION(11, 0, 3):
914                 adev->gfx.ras = &gfx_v11_0_3_ras;
915                 adev->gfx.config.max_hw_contexts = 8;
916                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
917                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
918                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
919                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
920                 break;
921         case IP_VERSION(11, 0, 1):
922         case IP_VERSION(11, 0, 4):
923         case IP_VERSION(11, 5, 0):
924                 adev->gfx.config.max_hw_contexts = 8;
925                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
926                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
927                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
928                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
929                 break;
930         default:
931                 BUG();
932                 break;
933         }
934
935         return 0;
936 }
937
938 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
939                                    int me, int pipe, int queue)
940 {
941         int r;
942         struct amdgpu_ring *ring;
943         unsigned int irq_type;
944
945         ring = &adev->gfx.gfx_ring[ring_id];
946
947         ring->me = me;
948         ring->pipe = pipe;
949         ring->queue = queue;
950
951         ring->ring_obj = NULL;
952         ring->use_doorbell = true;
953
954         if (!ring_id)
955                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
956         else
957                 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
958         ring->vm_hub = AMDGPU_GFXHUB(0);
959         sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
960
961         irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
962         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
963                              AMDGPU_RING_PRIO_DEFAULT, NULL);
964         if (r)
965                 return r;
966         return 0;
967 }
968
969 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
970                                        int mec, int pipe, int queue)
971 {
972         int r;
973         unsigned irq_type;
974         struct amdgpu_ring *ring;
975         unsigned int hw_prio;
976
977         ring = &adev->gfx.compute_ring[ring_id];
978
979         /* mec0 is me1 */
980         ring->me = mec + 1;
981         ring->pipe = pipe;
982         ring->queue = queue;
983
984         ring->ring_obj = NULL;
985         ring->use_doorbell = true;
986         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
987         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
988                                 + (ring_id * GFX11_MEC_HPD_SIZE);
989         ring->vm_hub = AMDGPU_GFXHUB(0);
990         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
991
992         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
993                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
994                 + ring->pipe;
995         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
996                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
997         /* type-2 packets are deprecated on MEC, use type-3 instead */
998         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
999                              hw_prio, NULL);
1000         if (r)
1001                 return r;
1002
1003         return 0;
1004 }
1005
1006 static struct {
1007         SOC21_FIRMWARE_ID       id;
1008         unsigned int            offset;
1009         unsigned int            size;
1010 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
1011
1012 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
1013 {
1014         RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
1015
1016         while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
1017                         (ucode->id < SOC21_FIRMWARE_ID_MAX)) {
1018                 rlc_autoload_info[ucode->id].id = ucode->id;
1019                 rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
1020                 rlc_autoload_info[ucode->id].size = ucode->size * 4;
1021
1022                 ucode++;
1023         }
1024 }
1025
1026 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
1027 {
1028         uint32_t total_size = 0;
1029         SOC21_FIRMWARE_ID id;
1030
1031         gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
1032
1033         for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
1034                 total_size += rlc_autoload_info[id].size;
1035
1036         /* In case the offset in rlc toc ucode is aligned */
1037         if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
1038                 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
1039                         rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
1040
1041         return total_size;
1042 }
1043
1044 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1045 {
1046         int r;
1047         uint32_t total_size;
1048
1049         total_size = gfx_v11_0_calc_toc_total_size(adev);
1050
1051         r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1052                                       AMDGPU_GEM_DOMAIN_VRAM |
1053                                       AMDGPU_GEM_DOMAIN_GTT,
1054                                       &adev->gfx.rlc.rlc_autoload_bo,
1055                                       &adev->gfx.rlc.rlc_autoload_gpu_addr,
1056                                       (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1057
1058         if (r) {
1059                 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1060                 return r;
1061         }
1062
1063         return 0;
1064 }
1065
1066 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1067                                               SOC21_FIRMWARE_ID id,
1068                                               const void *fw_data,
1069                                               uint32_t fw_size,
1070                                               uint32_t *fw_autoload_mask)
1071 {
1072         uint32_t toc_offset;
1073         uint32_t toc_fw_size;
1074         char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1075
1076         if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1077                 return;
1078
1079         toc_offset = rlc_autoload_info[id].offset;
1080         toc_fw_size = rlc_autoload_info[id].size;
1081
1082         if (fw_size == 0)
1083                 fw_size = toc_fw_size;
1084
1085         if (fw_size > toc_fw_size)
1086                 fw_size = toc_fw_size;
1087
1088         memcpy(ptr + toc_offset, fw_data, fw_size);
1089
1090         if (fw_size < toc_fw_size)
1091                 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1092
1093         if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1094                 *(uint64_t *)fw_autoload_mask |= 1ULL << id;
1095 }
1096
1097 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1098                                                         uint32_t *fw_autoload_mask)
1099 {
1100         void *data;
1101         uint32_t size;
1102         uint64_t *toc_ptr;
1103
1104         *(uint64_t *)fw_autoload_mask |= 0x1;
1105
1106         DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1107
1108         data = adev->psp.toc.start_addr;
1109         size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1110
1111         toc_ptr = (uint64_t *)data + size / 8 - 1;
1112         *toc_ptr = *(uint64_t *)fw_autoload_mask;
1113
1114         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1115                                         data, size, fw_autoload_mask);
1116 }
1117
1118 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1119                                                         uint32_t *fw_autoload_mask)
1120 {
1121         const __le32 *fw_data;
1122         uint32_t fw_size;
1123         const struct gfx_firmware_header_v1_0 *cp_hdr;
1124         const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1125         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1126         const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1127         uint16_t version_major, version_minor;
1128
1129         if (adev->gfx.rs64_enable) {
1130                 /* pfp ucode */
1131                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1132                         adev->gfx.pfp_fw->data;
1133                 /* instruction */
1134                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1135                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1136                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1137                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1138                                                 fw_data, fw_size, fw_autoload_mask);
1139                 /* data */
1140                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1141                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1142                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1143                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1144                                                 fw_data, fw_size, fw_autoload_mask);
1145                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1146                                                 fw_data, fw_size, fw_autoload_mask);
1147                 /* me ucode */
1148                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1149                         adev->gfx.me_fw->data;
1150                 /* instruction */
1151                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1152                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1153                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1154                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1155                                                 fw_data, fw_size, fw_autoload_mask);
1156                 /* data */
1157                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1158                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1159                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1160                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1161                                                 fw_data, fw_size, fw_autoload_mask);
1162                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1163                                                 fw_data, fw_size, fw_autoload_mask);
1164                 /* mec ucode */
1165                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1166                         adev->gfx.mec_fw->data;
1167                 /* instruction */
1168                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1169                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1170                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1171                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1172                                                 fw_data, fw_size, fw_autoload_mask);
1173                 /* data */
1174                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1175                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1176                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1177                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1178                                                 fw_data, fw_size, fw_autoload_mask);
1179                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1180                                                 fw_data, fw_size, fw_autoload_mask);
1181                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1182                                                 fw_data, fw_size, fw_autoload_mask);
1183                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1184                                                 fw_data, fw_size, fw_autoload_mask);
1185         } else {
1186                 /* pfp ucode */
1187                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1188                         adev->gfx.pfp_fw->data;
1189                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1190                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1191                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1192                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1193                                                 fw_data, fw_size, fw_autoload_mask);
1194
1195                 /* me ucode */
1196                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1197                         adev->gfx.me_fw->data;
1198                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1199                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1200                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1201                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1202                                                 fw_data, fw_size, fw_autoload_mask);
1203
1204                 /* mec ucode */
1205                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1206                         adev->gfx.mec_fw->data;
1207                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1208                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1209                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1210                         cp_hdr->jt_size * 4;
1211                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1212                                                 fw_data, fw_size, fw_autoload_mask);
1213         }
1214
1215         /* rlc ucode */
1216         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1217                 adev->gfx.rlc_fw->data;
1218         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1219                         le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1220         fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1221         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1222                                         fw_data, fw_size, fw_autoload_mask);
1223
1224         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1225         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1226         if (version_major == 2) {
1227                 if (version_minor >= 2) {
1228                         rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1229
1230                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1231                                         le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1232                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1233                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1234                                         fw_data, fw_size, fw_autoload_mask);
1235
1236                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1237                                         le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1238                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1239                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1240                                         fw_data, fw_size, fw_autoload_mask);
1241                 }
1242         }
1243 }
1244
1245 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1246                                                         uint32_t *fw_autoload_mask)
1247 {
1248         const __le32 *fw_data;
1249         uint32_t fw_size;
1250         const struct sdma_firmware_header_v2_0 *sdma_hdr;
1251
1252         sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1253                 adev->sdma.instance[0].fw->data;
1254         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1255                         le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1256         fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1257
1258         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1259                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1260
1261         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1262                         le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1263         fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1264
1265         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1266                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1267 }
1268
1269 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1270                                                         uint32_t *fw_autoload_mask)
1271 {
1272         const __le32 *fw_data;
1273         unsigned fw_size;
1274         const struct mes_firmware_header_v1_0 *mes_hdr;
1275         int pipe, ucode_id, data_id;
1276
1277         for (pipe = 0; pipe < 2; pipe++) {
1278                 if (pipe==0) {
1279                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1280                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1281                 } else {
1282                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1283                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1284                 }
1285
1286                 mes_hdr = (const struct mes_firmware_header_v1_0 *)
1287                         adev->mes.fw[pipe]->data;
1288
1289                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1290                                 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1291                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1292
1293                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1294                                 ucode_id, fw_data, fw_size, fw_autoload_mask);
1295
1296                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1297                                 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1298                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1299
1300                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1301                                 data_id, fw_data, fw_size, fw_autoload_mask);
1302         }
1303 }
1304
1305 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1306 {
1307         uint32_t rlc_g_offset, rlc_g_size;
1308         uint64_t gpu_addr;
1309         uint32_t autoload_fw_id[2];
1310
1311         memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1312
1313         /* RLC autoload sequence 2: copy ucode */
1314         gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1315         gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1316         gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1317         gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1318
1319         rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1320         rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1321         gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1322
1323         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1324         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1325
1326         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1327
1328         /* RLC autoload sequence 3: load IMU fw */
1329         if (adev->gfx.imu.funcs->load_microcode)
1330                 adev->gfx.imu.funcs->load_microcode(adev);
1331         /* RLC autoload sequence 4 init IMU fw */
1332         if (adev->gfx.imu.funcs->setup_imu)
1333                 adev->gfx.imu.funcs->setup_imu(adev);
1334         if (adev->gfx.imu.funcs->start_imu)
1335                 adev->gfx.imu.funcs->start_imu(adev);
1336
1337         /* RLC autoload sequence 5 disable gpa mode */
1338         gfx_v11_0_disable_gpa_mode(adev);
1339
1340         return 0;
1341 }
1342
1343 static int gfx_v11_0_sw_init(void *handle)
1344 {
1345         int i, j, k, r, ring_id = 0;
1346         struct amdgpu_kiq *kiq;
1347         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1348
1349         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1350         case IP_VERSION(11, 0, 0):
1351         case IP_VERSION(11, 0, 2):
1352         case IP_VERSION(11, 0, 3):
1353                 adev->gfx.me.num_me = 1;
1354                 adev->gfx.me.num_pipe_per_me = 1;
1355                 adev->gfx.me.num_queue_per_pipe = 1;
1356                 adev->gfx.mec.num_mec = 2;
1357                 adev->gfx.mec.num_pipe_per_mec = 4;
1358                 adev->gfx.mec.num_queue_per_pipe = 4;
1359                 break;
1360         case IP_VERSION(11, 0, 1):
1361         case IP_VERSION(11, 0, 4):
1362         case IP_VERSION(11, 5, 0):
1363                 adev->gfx.me.num_me = 1;
1364                 adev->gfx.me.num_pipe_per_me = 1;
1365                 adev->gfx.me.num_queue_per_pipe = 1;
1366                 adev->gfx.mec.num_mec = 1;
1367                 adev->gfx.mec.num_pipe_per_mec = 4;
1368                 adev->gfx.mec.num_queue_per_pipe = 4;
1369                 break;
1370         default:
1371                 adev->gfx.me.num_me = 1;
1372                 adev->gfx.me.num_pipe_per_me = 1;
1373                 adev->gfx.me.num_queue_per_pipe = 1;
1374                 adev->gfx.mec.num_mec = 1;
1375                 adev->gfx.mec.num_pipe_per_mec = 4;
1376                 adev->gfx.mec.num_queue_per_pipe = 8;
1377                 break;
1378         }
1379
1380         /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
1381         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
1382             amdgpu_sriov_is_pp_one_vf(adev))
1383                 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
1384
1385         /* EOP Event */
1386         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1387                               GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1388                               &adev->gfx.eop_irq);
1389         if (r)
1390                 return r;
1391
1392         /* Privileged reg */
1393         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1394                               GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1395                               &adev->gfx.priv_reg_irq);
1396         if (r)
1397                 return r;
1398
1399         /* Privileged inst */
1400         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1401                               GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1402                               &adev->gfx.priv_inst_irq);
1403         if (r)
1404                 return r;
1405
1406         /* FED error */
1407         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
1408                                   GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
1409                                   &adev->gfx.rlc_gc_fed_irq);
1410         if (r)
1411                 return r;
1412
1413         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1414
1415         gfx_v11_0_me_init(adev);
1416
1417         r = gfx_v11_0_rlc_init(adev);
1418         if (r) {
1419                 DRM_ERROR("Failed to init rlc BOs!\n");
1420                 return r;
1421         }
1422
1423         r = gfx_v11_0_mec_init(adev);
1424         if (r) {
1425                 DRM_ERROR("Failed to init MEC BOs!\n");
1426                 return r;
1427         }
1428
1429         /* set up the gfx ring */
1430         for (i = 0; i < adev->gfx.me.num_me; i++) {
1431                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1432                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1433                                 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1434                                         continue;
1435
1436                                 r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1437                                                             i, k, j);
1438                                 if (r)
1439                                         return r;
1440                                 ring_id++;
1441                         }
1442                 }
1443         }
1444
1445         ring_id = 0;
1446         /* set up the compute queues - allocate horizontally across pipes */
1447         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1448                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1449                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1450                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
1451                                                                      k, j))
1452                                         continue;
1453
1454                                 r = gfx_v11_0_compute_ring_init(adev, ring_id,
1455                                                                 i, k, j);
1456                                 if (r)
1457                                         return r;
1458
1459                                 ring_id++;
1460                         }
1461                 }
1462         }
1463
1464         if (!adev->enable_mes_kiq) {
1465                 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
1466                 if (r) {
1467                         DRM_ERROR("Failed to init KIQ BOs!\n");
1468                         return r;
1469                 }
1470
1471                 kiq = &adev->gfx.kiq[0];
1472                 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
1473                 if (r)
1474                         return r;
1475         }
1476
1477         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
1478         if (r)
1479                 return r;
1480
1481         /* allocate visible FB for rlc auto-loading fw */
1482         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1483                 r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1484                 if (r)
1485                         return r;
1486         }
1487
1488         r = gfx_v11_0_gpu_early_init(adev);
1489         if (r)
1490                 return r;
1491
1492         if (amdgpu_gfx_ras_sw_init(adev)) {
1493                 dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
1494                 return -EINVAL;
1495         }
1496
1497         return 0;
1498 }
1499
1500 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1501 {
1502         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1503                               &adev->gfx.pfp.pfp_fw_gpu_addr,
1504                               (void **)&adev->gfx.pfp.pfp_fw_ptr);
1505
1506         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1507                               &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1508                               (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1509 }
1510
1511 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1512 {
1513         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1514                               &adev->gfx.me.me_fw_gpu_addr,
1515                               (void **)&adev->gfx.me.me_fw_ptr);
1516
1517         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1518                                &adev->gfx.me.me_fw_data_gpu_addr,
1519                                (void **)&adev->gfx.me.me_fw_data_ptr);
1520 }
1521
1522 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1523 {
1524         amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1525                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
1526                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1527 }
1528
1529 static int gfx_v11_0_sw_fini(void *handle)
1530 {
1531         int i;
1532         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1533
1534         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1535                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1536         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1537                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1538
1539         amdgpu_gfx_mqd_sw_fini(adev, 0);
1540
1541         if (!adev->enable_mes_kiq) {
1542                 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
1543                 amdgpu_gfx_kiq_fini(adev, 0);
1544         }
1545
1546         gfx_v11_0_pfp_fini(adev);
1547         gfx_v11_0_me_fini(adev);
1548         gfx_v11_0_rlc_fini(adev);
1549         gfx_v11_0_mec_fini(adev);
1550
1551         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1552                 gfx_v11_0_rlc_autoload_buffer_fini(adev);
1553
1554         gfx_v11_0_free_microcode(adev);
1555
1556         return 0;
1557 }
1558
1559 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1560                                    u32 sh_num, u32 instance, int xcc_id)
1561 {
1562         u32 data;
1563
1564         if (instance == 0xffffffff)
1565                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1566                                      INSTANCE_BROADCAST_WRITES, 1);
1567         else
1568                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1569                                      instance);
1570
1571         if (se_num == 0xffffffff)
1572                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1573                                      1);
1574         else
1575                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1576
1577         if (sh_num == 0xffffffff)
1578                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1579                                      1);
1580         else
1581                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1582
1583         WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1584 }
1585
1586 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
1587 {
1588         u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
1589
1590         gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
1591         gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
1592                                            CC_GC_SA_UNIT_DISABLE,
1593                                            SA_DISABLE);
1594         gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
1595         gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
1596                                                  GC_USER_SA_UNIT_DISABLE,
1597                                                  SA_DISABLE);
1598         sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
1599                                             adev->gfx.config.max_shader_engines);
1600
1601         return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
1602 }
1603
1604 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1605 {
1606         u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
1607         u32 rb_mask;
1608
1609         gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1610         gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
1611                                             CC_RB_BACKEND_DISABLE,
1612                                             BACKEND_DISABLE);
1613         gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1614         gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
1615                                                  GC_USER_RB_BACKEND_DISABLE,
1616                                                  BACKEND_DISABLE);
1617         rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
1618                                             adev->gfx.config.max_shader_engines);
1619
1620         return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
1621 }
1622
1623 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1624 {
1625         u32 rb_bitmap_width_per_sa;
1626         u32 max_sa;
1627         u32 active_sa_bitmap;
1628         u32 global_active_rb_bitmap;
1629         u32 active_rb_bitmap = 0;
1630         u32 i;
1631
1632         /* query sa bitmap from SA_UNIT_DISABLE registers */
1633         active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
1634         /* query rb bitmap from RB_BACKEND_DISABLE registers */
1635         global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
1636
1637         /* generate active rb bitmap according to active sa bitmap */
1638         max_sa = adev->gfx.config.max_shader_engines *
1639                  adev->gfx.config.max_sh_per_se;
1640         rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
1641                                  adev->gfx.config.max_sh_per_se;
1642         for (i = 0; i < max_sa; i++) {
1643                 if (active_sa_bitmap & (1 << i))
1644                         active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
1645         }
1646
1647         active_rb_bitmap |= global_active_rb_bitmap;
1648         adev->gfx.config.backend_enable_mask = active_rb_bitmap;
1649         adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
1650 }
1651
1652 #define DEFAULT_SH_MEM_BASES    (0x6000)
1653 #define LDS_APP_BASE           0x1
1654 #define SCRATCH_APP_BASE       0x2
1655
1656 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1657 {
1658         int i;
1659         uint32_t sh_mem_bases;
1660         uint32_t data;
1661
1662         /*
1663          * Configure apertures:
1664          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1665          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1666          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1667          */
1668         sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1669                         SCRATCH_APP_BASE;
1670
1671         mutex_lock(&adev->srbm_mutex);
1672         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1673                 soc21_grbm_select(adev, 0, 0, 0, i);
1674                 /* CP and shaders */
1675                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1676                 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1677
1678                 /* Enable trap for each kfd vmid. */
1679                 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1680                 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1681                 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
1682         }
1683         soc21_grbm_select(adev, 0, 0, 0, 0);
1684         mutex_unlock(&adev->srbm_mutex);
1685
1686         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1687            acccess. These should be enabled by FW for target VMIDs. */
1688         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1689                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1690                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1691                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1692                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1693         }
1694 }
1695
1696 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1697 {
1698         int vmid;
1699
1700         /*
1701          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1702          * access. Compute VMIDs should be enabled by FW for target VMIDs,
1703          * the driver can enable them for graphics. VMID0 should maintain
1704          * access so that HWS firmware can save/restore entries.
1705          */
1706         for (vmid = 1; vmid < 16; vmid++) {
1707                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1708                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1709                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1710                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1711         }
1712 }
1713
1714 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1715 {
1716         /* TODO: harvest feature to be added later. */
1717 }
1718
1719 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
1720 {
1721         /* TCCs are global (not instanced). */
1722         uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
1723                                RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
1724
1725         adev->gfx.config.tcc_disabled_mask =
1726                 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
1727                 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
1728 }
1729
1730 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
1731 {
1732         u32 tmp;
1733         int i;
1734
1735         if (!amdgpu_sriov_vf(adev))
1736                 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1737
1738         gfx_v11_0_setup_rb(adev);
1739         gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
1740         gfx_v11_0_get_tcc_info(adev);
1741         adev->gfx.config.pa_sc_tile_steering_override = 0;
1742
1743         /* Set whether texture coordinate truncation is conformant. */
1744         tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
1745         adev->gfx.config.ta_cntl2_truncate_coord_mode =
1746                 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
1747
1748         /* XXX SH_MEM regs */
1749         /* where to put LDS, scratch, GPUVM in FSA64 space */
1750         mutex_lock(&adev->srbm_mutex);
1751         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
1752                 soc21_grbm_select(adev, 0, 0, 0, i);
1753                 /* CP and shaders */
1754                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1755                 if (i != 0) {
1756                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1757                                 (adev->gmc.private_aperture_start >> 48));
1758                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1759                                 (adev->gmc.shared_aperture_start >> 48));
1760                         WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1761                 }
1762         }
1763         soc21_grbm_select(adev, 0, 0, 0, 0);
1764
1765         mutex_unlock(&adev->srbm_mutex);
1766
1767         gfx_v11_0_init_compute_vmid(adev);
1768         gfx_v11_0_init_gds_vmid(adev);
1769 }
1770
1771 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1772                                                bool enable)
1773 {
1774         u32 tmp;
1775
1776         if (amdgpu_sriov_vf(adev))
1777                 return;
1778
1779         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
1780
1781         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1782                             enable ? 1 : 0);
1783         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1784                             enable ? 1 : 0);
1785         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1786                             enable ? 1 : 0);
1787         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1788                             enable ? 1 : 0);
1789
1790         WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
1791 }
1792
1793 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
1794 {
1795         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1796
1797         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1798                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
1799         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1800                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1801         WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1802
1803         return 0;
1804 }
1805
1806 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
1807 {
1808         u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1809
1810         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1811         WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1812 }
1813
1814 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
1815 {
1816         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1817         udelay(50);
1818         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1819         udelay(50);
1820 }
1821
1822 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1823                                              bool enable)
1824 {
1825         uint32_t rlc_pg_cntl;
1826
1827         rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1828
1829         if (!enable) {
1830                 /* RLC_PG_CNTL[23] = 0 (default)
1831                  * RLC will wait for handshake acks with SMU
1832                  * GFXOFF will be enabled
1833                  * RLC_PG_CNTL[23] = 1
1834                  * RLC will not issue any message to SMU
1835                  * hence no handshake between SMU & RLC
1836                  * GFXOFF will be disabled
1837                  */
1838                 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1839         } else
1840                 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1841         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1842 }
1843
1844 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
1845 {
1846         /* TODO: enable rlc & smu handshake until smu
1847          * and gfxoff feature works as expected */
1848         if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1849                 gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
1850
1851         WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1852         udelay(50);
1853 }
1854
1855 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
1856 {
1857         uint32_t tmp;
1858
1859         /* enable Save Restore Machine */
1860         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1861         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1862         tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1863         WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1864 }
1865
1866 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
1867 {
1868         const struct rlc_firmware_header_v2_0 *hdr;
1869         const __le32 *fw_data;
1870         unsigned i, fw_size;
1871
1872         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1873         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1874                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1875         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1876
1877         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1878                      RLCG_UCODE_LOADING_START_ADDRESS);
1879
1880         for (i = 0; i < fw_size; i++)
1881                 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1882                              le32_to_cpup(fw_data++));
1883
1884         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1885 }
1886
1887 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1888 {
1889         const struct rlc_firmware_header_v2_2 *hdr;
1890         const __le32 *fw_data;
1891         unsigned i, fw_size;
1892         u32 tmp;
1893
1894         hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1895
1896         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1897                         le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1898         fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1899
1900         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1901
1902         for (i = 0; i < fw_size; i++) {
1903                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1904                         msleep(1);
1905                 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1906                                 le32_to_cpup(fw_data++));
1907         }
1908
1909         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1910
1911         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1912                         le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1913         fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1914
1915         WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1916         for (i = 0; i < fw_size; i++) {
1917                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1918                         msleep(1);
1919                 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1920                                 le32_to_cpup(fw_data++));
1921         }
1922
1923         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1924
1925         tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1926         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1927         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1928         WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1929 }
1930
1931 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
1932 {
1933         const struct rlc_firmware_header_v2_3 *hdr;
1934         const __le32 *fw_data;
1935         unsigned i, fw_size;
1936         u32 tmp;
1937
1938         hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
1939
1940         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1941                         le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
1942         fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
1943
1944         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
1945
1946         for (i = 0; i < fw_size; i++) {
1947                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1948                         msleep(1);
1949                 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
1950                                 le32_to_cpup(fw_data++));
1951         }
1952
1953         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
1954
1955         tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
1956         tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1957         WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
1958
1959         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1960                         le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
1961         fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
1962
1963         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
1964
1965         for (i = 0; i < fw_size; i++) {
1966                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1967                         msleep(1);
1968                 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
1969                                 le32_to_cpup(fw_data++));
1970         }
1971
1972         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
1973
1974         tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
1975         tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
1976         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
1977 }
1978
1979 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
1980 {
1981         const struct rlc_firmware_header_v2_0 *hdr;
1982         uint16_t version_major;
1983         uint16_t version_minor;
1984
1985         if (!adev->gfx.rlc_fw)
1986                 return -EINVAL;
1987
1988         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1989         amdgpu_ucode_print_rlc_hdr(&hdr->header);
1990
1991         version_major = le16_to_cpu(hdr->header.header_version_major);
1992         version_minor = le16_to_cpu(hdr->header.header_version_minor);
1993
1994         if (version_major == 2) {
1995                 gfx_v11_0_load_rlcg_microcode(adev);
1996                 if (amdgpu_dpm == 1) {
1997                         if (version_minor >= 2)
1998                                 gfx_v11_0_load_rlc_iram_dram_microcode(adev);
1999                         if (version_minor == 3)
2000                                 gfx_v11_0_load_rlcp_rlcv_microcode(adev);
2001                 }
2002                 
2003                 return 0;
2004         }
2005
2006         return -EINVAL;
2007 }
2008
2009 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
2010 {
2011         int r;
2012
2013         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
2014                 gfx_v11_0_init_csb(adev);
2015
2016                 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
2017                         gfx_v11_0_rlc_enable_srm(adev);
2018         } else {
2019                 if (amdgpu_sriov_vf(adev)) {
2020                         gfx_v11_0_init_csb(adev);
2021                         return 0;
2022                 }
2023
2024                 adev->gfx.rlc.funcs->stop(adev);
2025
2026                 /* disable CG */
2027                 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
2028
2029                 /* disable PG */
2030                 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
2031
2032                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2033                         /* legacy rlc firmware loading */
2034                         r = gfx_v11_0_rlc_load_microcode(adev);
2035                         if (r)
2036                                 return r;
2037                 }
2038
2039                 gfx_v11_0_init_csb(adev);
2040
2041                 adev->gfx.rlc.funcs->start(adev);
2042         }
2043         return 0;
2044 }
2045
2046 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
2047 {
2048         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2049         uint32_t tmp;
2050         int i;
2051
2052         /* Trigger an invalidation of the L1 instruction caches */
2053         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2054         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2055         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2056
2057         /* Wait for invalidation complete */
2058         for (i = 0; i < usec_timeout; i++) {
2059                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2060                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2061                                         INVALIDATE_CACHE_COMPLETE))
2062                         break;
2063                 udelay(1);
2064         }
2065
2066         if (i >= usec_timeout) {
2067                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2068                 return -EINVAL;
2069         }
2070
2071         if (amdgpu_emu_mode == 1)
2072                 adev->hdp.funcs->flush_hdp(adev, NULL);
2073
2074         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2075         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2076         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2077         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2078         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2079         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2080
2081         /* Program me ucode address into intruction cache address register */
2082         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2083                         lower_32_bits(addr) & 0xFFFFF000);
2084         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2085                         upper_32_bits(addr));
2086
2087         return 0;
2088 }
2089
2090 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
2091 {
2092         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2093         uint32_t tmp;
2094         int i;
2095
2096         /* Trigger an invalidation of the L1 instruction caches */
2097         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2098         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2099         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2100
2101         /* Wait for invalidation complete */
2102         for (i = 0; i < usec_timeout; i++) {
2103                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2104                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2105                                         INVALIDATE_CACHE_COMPLETE))
2106                         break;
2107                 udelay(1);
2108         }
2109
2110         if (i >= usec_timeout) {
2111                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2112                 return -EINVAL;
2113         }
2114
2115         if (amdgpu_emu_mode == 1)
2116                 adev->hdp.funcs->flush_hdp(adev, NULL);
2117
2118         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2119         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2120         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2121         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2122         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2123         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2124
2125         /* Program pfp ucode address into intruction cache address register */
2126         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2127                         lower_32_bits(addr) & 0xFFFFF000);
2128         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2129                         upper_32_bits(addr));
2130
2131         return 0;
2132 }
2133
2134 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2135 {
2136         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2137         uint32_t tmp;
2138         int i;
2139
2140         /* Trigger an invalidation of the L1 instruction caches */
2141         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2142         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2143
2144         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2145
2146         /* Wait for invalidation complete */
2147         for (i = 0; i < usec_timeout; i++) {
2148                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2149                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2150                                         INVALIDATE_CACHE_COMPLETE))
2151                         break;
2152                 udelay(1);
2153         }
2154
2155         if (i >= usec_timeout) {
2156                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2157                 return -EINVAL;
2158         }
2159
2160         if (amdgpu_emu_mode == 1)
2161                 adev->hdp.funcs->flush_hdp(adev, NULL);
2162
2163         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2164         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2165         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2166         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2167         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2168
2169         /* Program mec1 ucode address into intruction cache address register */
2170         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2171                         lower_32_bits(addr) & 0xFFFFF000);
2172         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2173                         upper_32_bits(addr));
2174
2175         return 0;
2176 }
2177
2178 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2179 {
2180         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2181         uint32_t tmp;
2182         unsigned i, pipe_id;
2183         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2184
2185         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2186                 adev->gfx.pfp_fw->data;
2187
2188         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2189                 lower_32_bits(addr));
2190         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2191                 upper_32_bits(addr));
2192
2193         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2194         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2195         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2196         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2197         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2198
2199         /*
2200          * Programming any of the CP_PFP_IC_BASE registers
2201          * forces invalidation of the ME L1 I$. Wait for the
2202          * invalidation complete
2203          */
2204         for (i = 0; i < usec_timeout; i++) {
2205                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2206                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2207                         INVALIDATE_CACHE_COMPLETE))
2208                         break;
2209                 udelay(1);
2210         }
2211
2212         if (i >= usec_timeout) {
2213                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2214                 return -EINVAL;
2215         }
2216
2217         /* Prime the L1 instruction caches */
2218         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2219         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2220         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2221         /* Waiting for cache primed*/
2222         for (i = 0; i < usec_timeout; i++) {
2223                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2224                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2225                         ICACHE_PRIMED))
2226                         break;
2227                 udelay(1);
2228         }
2229
2230         if (i >= usec_timeout) {
2231                 dev_err(adev->dev, "failed to prime instruction cache\n");
2232                 return -EINVAL;
2233         }
2234
2235         mutex_lock(&adev->srbm_mutex);
2236         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2237                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2238                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2239                         (pfp_hdr->ucode_start_addr_hi << 30) |
2240                         (pfp_hdr->ucode_start_addr_lo >> 2));
2241                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2242                         pfp_hdr->ucode_start_addr_hi >> 2);
2243
2244                 /*
2245                  * Program CP_ME_CNTL to reset given PIPE to take
2246                  * effect of CP_PFP_PRGRM_CNTR_START.
2247                  */
2248                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2249                 if (pipe_id == 0)
2250                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2251                                         PFP_PIPE0_RESET, 1);
2252                 else
2253                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2254                                         PFP_PIPE1_RESET, 1);
2255                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2256
2257                 /* Clear pfp pipe0 reset bit. */
2258                 if (pipe_id == 0)
2259                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2260                                         PFP_PIPE0_RESET, 0);
2261                 else
2262                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2263                                         PFP_PIPE1_RESET, 0);
2264                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2265
2266                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2267                         lower_32_bits(addr2));
2268                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2269                         upper_32_bits(addr2));
2270         }
2271         soc21_grbm_select(adev, 0, 0, 0, 0);
2272         mutex_unlock(&adev->srbm_mutex);
2273
2274         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2275         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2276         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2277         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2278
2279         /* Invalidate the data caches */
2280         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2281         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2282         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2283
2284         for (i = 0; i < usec_timeout; i++) {
2285                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2286                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2287                         INVALIDATE_DCACHE_COMPLETE))
2288                         break;
2289                 udelay(1);
2290         }
2291
2292         if (i >= usec_timeout) {
2293                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2294                 return -EINVAL;
2295         }
2296
2297         return 0;
2298 }
2299
2300 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2301 {
2302         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2303         uint32_t tmp;
2304         unsigned i, pipe_id;
2305         const struct gfx_firmware_header_v2_0 *me_hdr;
2306
2307         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2308                 adev->gfx.me_fw->data;
2309
2310         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2311                 lower_32_bits(addr));
2312         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2313                 upper_32_bits(addr));
2314
2315         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2316         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2317         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2318         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2319         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2320
2321         /*
2322          * Programming any of the CP_ME_IC_BASE registers
2323          * forces invalidation of the ME L1 I$. Wait for the
2324          * invalidation complete
2325          */
2326         for (i = 0; i < usec_timeout; i++) {
2327                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2328                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2329                         INVALIDATE_CACHE_COMPLETE))
2330                         break;
2331                 udelay(1);
2332         }
2333
2334         if (i >= usec_timeout) {
2335                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2336                 return -EINVAL;
2337         }
2338
2339         /* Prime the instruction caches */
2340         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2341         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2342         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2343
2344         /* Waiting for instruction cache primed*/
2345         for (i = 0; i < usec_timeout; i++) {
2346                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2347                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2348                         ICACHE_PRIMED))
2349                         break;
2350                 udelay(1);
2351         }
2352
2353         if (i >= usec_timeout) {
2354                 dev_err(adev->dev, "failed to prime instruction cache\n");
2355                 return -EINVAL;
2356         }
2357
2358         mutex_lock(&adev->srbm_mutex);
2359         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2360                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2361                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2362                         (me_hdr->ucode_start_addr_hi << 30) |
2363                         (me_hdr->ucode_start_addr_lo >> 2) );
2364                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2365                         me_hdr->ucode_start_addr_hi>>2);
2366
2367                 /*
2368                  * Program CP_ME_CNTL to reset given PIPE to take
2369                  * effect of CP_PFP_PRGRM_CNTR_START.
2370                  */
2371                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2372                 if (pipe_id == 0)
2373                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2374                                         ME_PIPE0_RESET, 1);
2375                 else
2376                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2377                                         ME_PIPE1_RESET, 1);
2378                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2379
2380                 /* Clear pfp pipe0 reset bit. */
2381                 if (pipe_id == 0)
2382                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2383                                         ME_PIPE0_RESET, 0);
2384                 else
2385                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2386                                         ME_PIPE1_RESET, 0);
2387                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2388
2389                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2390                         lower_32_bits(addr2));
2391                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2392                         upper_32_bits(addr2));
2393         }
2394         soc21_grbm_select(adev, 0, 0, 0, 0);
2395         mutex_unlock(&adev->srbm_mutex);
2396
2397         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2398         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2399         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2400         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2401
2402         /* Invalidate the data caches */
2403         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2404         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2405         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2406
2407         for (i = 0; i < usec_timeout; i++) {
2408                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2409                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2410                         INVALIDATE_DCACHE_COMPLETE))
2411                         break;
2412                 udelay(1);
2413         }
2414
2415         if (i >= usec_timeout) {
2416                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2417                 return -EINVAL;
2418         }
2419
2420         return 0;
2421 }
2422
2423 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2424 {
2425         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2426         uint32_t tmp;
2427         unsigned i;
2428         const struct gfx_firmware_header_v2_0 *mec_hdr;
2429
2430         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2431                 adev->gfx.mec_fw->data;
2432
2433         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2434         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2435         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2436         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2437         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2438
2439         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2440         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2441         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2442         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2443
2444         mutex_lock(&adev->srbm_mutex);
2445         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2446                 soc21_grbm_select(adev, 1, i, 0, 0);
2447
2448                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2449                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2450                      upper_32_bits(addr2));
2451
2452                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2453                                         mec_hdr->ucode_start_addr_lo >> 2 |
2454                                         mec_hdr->ucode_start_addr_hi << 30);
2455                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2456                                         mec_hdr->ucode_start_addr_hi >> 2);
2457
2458                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2459                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2460                      upper_32_bits(addr));
2461         }
2462         mutex_unlock(&adev->srbm_mutex);
2463         soc21_grbm_select(adev, 0, 0, 0, 0);
2464
2465         /* Trigger an invalidation of the L1 instruction caches */
2466         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2467         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2468         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2469
2470         /* Wait for invalidation complete */
2471         for (i = 0; i < usec_timeout; i++) {
2472                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2473                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2474                                        INVALIDATE_DCACHE_COMPLETE))
2475                         break;
2476                 udelay(1);
2477         }
2478
2479         if (i >= usec_timeout) {
2480                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2481                 return -EINVAL;
2482         }
2483
2484         /* Trigger an invalidation of the L1 instruction caches */
2485         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2486         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2487         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2488
2489         /* Wait for invalidation complete */
2490         for (i = 0; i < usec_timeout; i++) {
2491                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2492                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2493                                        INVALIDATE_CACHE_COMPLETE))
2494                         break;
2495                 udelay(1);
2496         }
2497
2498         if (i >= usec_timeout) {
2499                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2500                 return -EINVAL;
2501         }
2502
2503         return 0;
2504 }
2505
2506 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2507 {
2508         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2509         const struct gfx_firmware_header_v2_0 *me_hdr;
2510         const struct gfx_firmware_header_v2_0 *mec_hdr;
2511         uint32_t pipe_id, tmp;
2512
2513         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2514                 adev->gfx.mec_fw->data;
2515         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2516                 adev->gfx.me_fw->data;
2517         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2518                 adev->gfx.pfp_fw->data;
2519
2520         /* config pfp program start addr */
2521         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2522                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2523                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2524                         (pfp_hdr->ucode_start_addr_hi << 30) |
2525                         (pfp_hdr->ucode_start_addr_lo >> 2));
2526                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2527                         pfp_hdr->ucode_start_addr_hi >> 2);
2528         }
2529         soc21_grbm_select(adev, 0, 0, 0, 0);
2530
2531         /* reset pfp pipe */
2532         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2533         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2534         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2535         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2536
2537         /* clear pfp pipe reset */
2538         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2539         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2540         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2541
2542         /* config me program start addr */
2543         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2544                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2545                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2546                         (me_hdr->ucode_start_addr_hi << 30) |
2547                         (me_hdr->ucode_start_addr_lo >> 2) );
2548                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2549                         me_hdr->ucode_start_addr_hi>>2);
2550         }
2551         soc21_grbm_select(adev, 0, 0, 0, 0);
2552
2553         /* reset me pipe */
2554         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2555         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2556         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2557         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2558
2559         /* clear me pipe reset */
2560         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2561         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2562         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2563
2564         /* config mec program start addr */
2565         for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2566                 soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2567                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2568                                         mec_hdr->ucode_start_addr_lo >> 2 |
2569                                         mec_hdr->ucode_start_addr_hi << 30);
2570                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2571                                         mec_hdr->ucode_start_addr_hi >> 2);
2572         }
2573         soc21_grbm_select(adev, 0, 0, 0, 0);
2574
2575         /* reset mec pipe */
2576         tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2577         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2578         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2579         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2580         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2581         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2582
2583         /* clear mec pipe reset */
2584         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2585         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2586         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2587         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2588         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2589 }
2590
2591 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2592 {
2593         uint32_t cp_status;
2594         uint32_t bootload_status;
2595         int i, r;
2596         uint64_t addr, addr2;
2597
2598         for (i = 0; i < adev->usec_timeout; i++) {
2599                 cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2600
2601                 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
2602                             IP_VERSION(11, 0, 1) ||
2603                     amdgpu_ip_version(adev, GC_HWIP, 0) ==
2604                             IP_VERSION(11, 0, 4) ||
2605                     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0))
2606                         bootload_status = RREG32_SOC15(GC, 0,
2607                                         regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2608                 else
2609                         bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2610
2611                 if ((cp_status == 0) &&
2612                     (REG_GET_FIELD(bootload_status,
2613                         RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2614                         break;
2615                 }
2616                 udelay(1);
2617         }
2618
2619         if (i >= adev->usec_timeout) {
2620                 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2621                 return -ETIMEDOUT;
2622         }
2623
2624         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2625                 if (adev->gfx.rs64_enable) {
2626                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2627                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2628                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2629                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2630                         r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2631                         if (r)
2632                                 return r;
2633                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2634                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2635                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2636                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2637                         r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2638                         if (r)
2639                                 return r;
2640                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2641                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2642                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2643                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2644                         r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2645                         if (r)
2646                                 return r;
2647                 } else {
2648                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2649                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2650                         r = gfx_v11_0_config_me_cache(adev, addr);
2651                         if (r)
2652                                 return r;
2653                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2654                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2655                         r = gfx_v11_0_config_pfp_cache(adev, addr);
2656                         if (r)
2657                                 return r;
2658                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2659                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2660                         r = gfx_v11_0_config_mec_cache(adev, addr);
2661                         if (r)
2662                                 return r;
2663                 }
2664         }
2665
2666         return 0;
2667 }
2668
2669 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2670 {
2671         int i;
2672         u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2673
2674         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2675         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2676         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2677
2678         for (i = 0; i < adev->usec_timeout; i++) {
2679                 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2680                         break;
2681                 udelay(1);
2682         }
2683
2684         if (i >= adev->usec_timeout)
2685                 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2686
2687         return 0;
2688 }
2689
2690 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
2691 {
2692         int r;
2693         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2694         const __le32 *fw_data;
2695         unsigned i, fw_size;
2696
2697         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2698                 adev->gfx.pfp_fw->data;
2699
2700         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2701
2702         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2703                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2704         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
2705
2706         r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
2707                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2708                                       &adev->gfx.pfp.pfp_fw_obj,
2709                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2710                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2711         if (r) {
2712                 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
2713                 gfx_v11_0_pfp_fini(adev);
2714                 return r;
2715         }
2716
2717         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
2718
2719         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2720         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2721
2722         gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
2723
2724         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
2725
2726         for (i = 0; i < pfp_hdr->jt_size; i++)
2727                 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
2728                              le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
2729
2730         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2731
2732         return 0;
2733 }
2734
2735 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2736 {
2737         int r;
2738         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2739         const __le32 *fw_ucode, *fw_data;
2740         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2741         uint32_t tmp;
2742         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2743
2744         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2745                 adev->gfx.pfp_fw->data;
2746
2747         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2748
2749         /* instruction */
2750         fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2751                 le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2752         fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2753         /* data */
2754         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2755                 le32_to_cpu(pfp_hdr->data_offset_bytes));
2756         fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2757
2758         /* 64kb align */
2759         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2760                                       64 * 1024,
2761                                       AMDGPU_GEM_DOMAIN_VRAM |
2762                                       AMDGPU_GEM_DOMAIN_GTT,
2763                                       &adev->gfx.pfp.pfp_fw_obj,
2764                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2765                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2766         if (r) {
2767                 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2768                 gfx_v11_0_pfp_fini(adev);
2769                 return r;
2770         }
2771
2772         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2773                                       64 * 1024,
2774                                       AMDGPU_GEM_DOMAIN_VRAM |
2775                                       AMDGPU_GEM_DOMAIN_GTT,
2776                                       &adev->gfx.pfp.pfp_fw_data_obj,
2777                                       &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2778                                       (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2779         if (r) {
2780                 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2781                 gfx_v11_0_pfp_fini(adev);
2782                 return r;
2783         }
2784
2785         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2786         memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2787
2788         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2789         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2790         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2791         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2792
2793         if (amdgpu_emu_mode == 1)
2794                 adev->hdp.funcs->flush_hdp(adev, NULL);
2795
2796         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2797                 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2798         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2799                 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2800
2801         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2802         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2803         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2804         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2805         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2806
2807         /*
2808          * Programming any of the CP_PFP_IC_BASE registers
2809          * forces invalidation of the ME L1 I$. Wait for the
2810          * invalidation complete
2811          */
2812         for (i = 0; i < usec_timeout; i++) {
2813                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2814                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2815                         INVALIDATE_CACHE_COMPLETE))
2816                         break;
2817                 udelay(1);
2818         }
2819
2820         if (i >= usec_timeout) {
2821                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2822                 return -EINVAL;
2823         }
2824
2825         /* Prime the L1 instruction caches */
2826         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2827         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2828         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2829         /* Waiting for cache primed*/
2830         for (i = 0; i < usec_timeout; i++) {
2831                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2832                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2833                         ICACHE_PRIMED))
2834                         break;
2835                 udelay(1);
2836         }
2837
2838         if (i >= usec_timeout) {
2839                 dev_err(adev->dev, "failed to prime instruction cache\n");
2840                 return -EINVAL;
2841         }
2842
2843         mutex_lock(&adev->srbm_mutex);
2844         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2845                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2846                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2847                         (pfp_hdr->ucode_start_addr_hi << 30) |
2848                         (pfp_hdr->ucode_start_addr_lo >> 2) );
2849                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2850                         pfp_hdr->ucode_start_addr_hi>>2);
2851
2852                 /*
2853                  * Program CP_ME_CNTL to reset given PIPE to take
2854                  * effect of CP_PFP_PRGRM_CNTR_START.
2855                  */
2856                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2857                 if (pipe_id == 0)
2858                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2859                                         PFP_PIPE0_RESET, 1);
2860                 else
2861                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2862                                         PFP_PIPE1_RESET, 1);
2863                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2864
2865                 /* Clear pfp pipe0 reset bit. */
2866                 if (pipe_id == 0)
2867                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2868                                         PFP_PIPE0_RESET, 0);
2869                 else
2870                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2871                                         PFP_PIPE1_RESET, 0);
2872                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2873
2874                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2875                         lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2876                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2877                         upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2878         }
2879         soc21_grbm_select(adev, 0, 0, 0, 0);
2880         mutex_unlock(&adev->srbm_mutex);
2881
2882         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2883         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2884         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2885         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2886
2887         /* Invalidate the data caches */
2888         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2889         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2890         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2891
2892         for (i = 0; i < usec_timeout; i++) {
2893                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2894                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2895                         INVALIDATE_DCACHE_COMPLETE))
2896                         break;
2897                 udelay(1);
2898         }
2899
2900         if (i >= usec_timeout) {
2901                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2902                 return -EINVAL;
2903         }
2904
2905         return 0;
2906 }
2907
2908 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
2909 {
2910         int r;
2911         const struct gfx_firmware_header_v1_0 *me_hdr;
2912         const __le32 *fw_data;
2913         unsigned i, fw_size;
2914
2915         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2916                 adev->gfx.me_fw->data;
2917
2918         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2919
2920         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2921                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2922         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
2923
2924         r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
2925                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2926                                       &adev->gfx.me.me_fw_obj,
2927                                       &adev->gfx.me.me_fw_gpu_addr,
2928                                       (void **)&adev->gfx.me.me_fw_ptr);
2929         if (r) {
2930                 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
2931                 gfx_v11_0_me_fini(adev);
2932                 return r;
2933         }
2934
2935         memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
2936
2937         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2938         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2939
2940         gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
2941
2942         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
2943
2944         for (i = 0; i < me_hdr->jt_size; i++)
2945                 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
2946                              le32_to_cpup(fw_data + me_hdr->jt_offset + i));
2947
2948         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
2949
2950         return 0;
2951 }
2952
2953 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2954 {
2955         int r;
2956         const struct gfx_firmware_header_v2_0 *me_hdr;
2957         const __le32 *fw_ucode, *fw_data;
2958         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2959         uint32_t tmp;
2960         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2961
2962         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2963                 adev->gfx.me_fw->data;
2964
2965         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2966
2967         /* instruction */
2968         fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2969                 le32_to_cpu(me_hdr->ucode_offset_bytes));
2970         fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2971         /* data */
2972         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2973                 le32_to_cpu(me_hdr->data_offset_bytes));
2974         fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2975
2976         /* 64kb align*/
2977         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2978                                       64 * 1024,
2979                                       AMDGPU_GEM_DOMAIN_VRAM |
2980                                       AMDGPU_GEM_DOMAIN_GTT,
2981                                       &adev->gfx.me.me_fw_obj,
2982                                       &adev->gfx.me.me_fw_gpu_addr,
2983                                       (void **)&adev->gfx.me.me_fw_ptr);
2984         if (r) {
2985                 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
2986                 gfx_v11_0_me_fini(adev);
2987                 return r;
2988         }
2989
2990         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2991                                       64 * 1024,
2992                                       AMDGPU_GEM_DOMAIN_VRAM |
2993                                       AMDGPU_GEM_DOMAIN_GTT,
2994                                       &adev->gfx.me.me_fw_data_obj,
2995                                       &adev->gfx.me.me_fw_data_gpu_addr,
2996                                       (void **)&adev->gfx.me.me_fw_data_ptr);
2997         if (r) {
2998                 dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
2999                 gfx_v11_0_pfp_fini(adev);
3000                 return r;
3001         }
3002
3003         memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
3004         memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
3005
3006         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3007         amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
3008         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3009         amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
3010
3011         if (amdgpu_emu_mode == 1)
3012                 adev->hdp.funcs->flush_hdp(adev, NULL);
3013
3014         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
3015                 lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
3016         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
3017                 upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
3018
3019         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
3020         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
3021         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
3022         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
3023         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
3024
3025         /*
3026          * Programming any of the CP_ME_IC_BASE registers
3027          * forces invalidation of the ME L1 I$. Wait for the
3028          * invalidation complete
3029          */
3030         for (i = 0; i < usec_timeout; i++) {
3031                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3032                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3033                         INVALIDATE_CACHE_COMPLETE))
3034                         break;
3035                 udelay(1);
3036         }
3037
3038         if (i >= usec_timeout) {
3039                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3040                 return -EINVAL;
3041         }
3042
3043         /* Prime the instruction caches */
3044         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3045         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
3046         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
3047
3048         /* Waiting for instruction cache primed*/
3049         for (i = 0; i < usec_timeout; i++) {
3050                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3051                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3052                         ICACHE_PRIMED))
3053                         break;
3054                 udelay(1);
3055         }
3056
3057         if (i >= usec_timeout) {
3058                 dev_err(adev->dev, "failed to prime instruction cache\n");
3059                 return -EINVAL;
3060         }
3061
3062         mutex_lock(&adev->srbm_mutex);
3063         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3064                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3065                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3066                         (me_hdr->ucode_start_addr_hi << 30) |
3067                         (me_hdr->ucode_start_addr_lo >> 2) );
3068                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3069                         me_hdr->ucode_start_addr_hi>>2);
3070
3071                 /*
3072                  * Program CP_ME_CNTL to reset given PIPE to take
3073                  * effect of CP_PFP_PRGRM_CNTR_START.
3074                  */
3075                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3076                 if (pipe_id == 0)
3077                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3078                                         ME_PIPE0_RESET, 1);
3079                 else
3080                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3081                                         ME_PIPE1_RESET, 1);
3082                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3083
3084                 /* Clear pfp pipe0 reset bit. */
3085                 if (pipe_id == 0)
3086                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3087                                         ME_PIPE0_RESET, 0);
3088                 else
3089                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3090                                         ME_PIPE1_RESET, 0);
3091                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3092
3093                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
3094                         lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3095                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
3096                         upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3097         }
3098         soc21_grbm_select(adev, 0, 0, 0, 0);
3099         mutex_unlock(&adev->srbm_mutex);
3100
3101         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3102         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3103         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3104         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3105
3106         /* Invalidate the data caches */
3107         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3108         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3109         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3110
3111         for (i = 0; i < usec_timeout; i++) {
3112                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3113                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3114                         INVALIDATE_DCACHE_COMPLETE))
3115                         break;
3116                 udelay(1);
3117         }
3118
3119         if (i >= usec_timeout) {
3120                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3121                 return -EINVAL;
3122         }
3123
3124         return 0;
3125 }
3126
3127 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3128 {
3129         int r;
3130
3131         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3132                 return -EINVAL;
3133
3134         gfx_v11_0_cp_gfx_enable(adev, false);
3135
3136         if (adev->gfx.rs64_enable)
3137                 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3138         else
3139                 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3140         if (r) {
3141                 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3142                 return r;
3143         }
3144
3145         if (adev->gfx.rs64_enable)
3146                 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3147         else
3148                 r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3149         if (r) {
3150                 dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3151                 return r;
3152         }
3153
3154         return 0;
3155 }
3156
3157 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3158 {
3159         struct amdgpu_ring *ring;
3160         const struct cs_section_def *sect = NULL;
3161         const struct cs_extent_def *ext = NULL;
3162         int r, i;
3163         int ctx_reg_offset;
3164
3165         /* init the CP */
3166         WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3167                      adev->gfx.config.max_hw_contexts - 1);
3168         WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3169
3170         if (!amdgpu_async_gfx_ring)
3171                 gfx_v11_0_cp_gfx_enable(adev, true);
3172
3173         ring = &adev->gfx.gfx_ring[0];
3174         r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3175         if (r) {
3176                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3177                 return r;
3178         }
3179
3180         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3181         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3182
3183         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3184         amdgpu_ring_write(ring, 0x80000000);
3185         amdgpu_ring_write(ring, 0x80000000);
3186
3187         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3188                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3189                         if (sect->id == SECT_CONTEXT) {
3190                                 amdgpu_ring_write(ring,
3191                                                   PACKET3(PACKET3_SET_CONTEXT_REG,
3192                                                           ext->reg_count));
3193                                 amdgpu_ring_write(ring, ext->reg_index -
3194                                                   PACKET3_SET_CONTEXT_REG_START);
3195                                 for (i = 0; i < ext->reg_count; i++)
3196                                         amdgpu_ring_write(ring, ext->extent[i]);
3197                         }
3198                 }
3199         }
3200
3201         ctx_reg_offset =
3202                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3203         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3204         amdgpu_ring_write(ring, ctx_reg_offset);
3205         amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3206
3207         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3208         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3209
3210         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3211         amdgpu_ring_write(ring, 0);
3212
3213         amdgpu_ring_commit(ring);
3214
3215         /* submit cs packet to copy state 0 to next available state */
3216         if (adev->gfx.num_gfx_rings > 1) {
3217                 /* maximum supported gfx ring is 2 */
3218                 ring = &adev->gfx.gfx_ring[1];
3219                 r = amdgpu_ring_alloc(ring, 2);
3220                 if (r) {
3221                         DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3222                         return r;
3223                 }
3224
3225                 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3226                 amdgpu_ring_write(ring, 0);
3227
3228                 amdgpu_ring_commit(ring);
3229         }
3230         return 0;
3231 }
3232
3233 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3234                                          CP_PIPE_ID pipe)
3235 {
3236         u32 tmp;
3237
3238         tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3239         tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3240
3241         WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3242 }
3243
3244 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3245                                           struct amdgpu_ring *ring)
3246 {
3247         u32 tmp;
3248
3249         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3250         if (ring->use_doorbell) {
3251                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3252                                     DOORBELL_OFFSET, ring->doorbell_index);
3253                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3254                                     DOORBELL_EN, 1);
3255         } else {
3256                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3257                                     DOORBELL_EN, 0);
3258         }
3259         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3260
3261         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3262                             DOORBELL_RANGE_LOWER, ring->doorbell_index);
3263         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3264
3265         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3266                      CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3267 }
3268
3269 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3270 {
3271         struct amdgpu_ring *ring;
3272         u32 tmp;
3273         u32 rb_bufsz;
3274         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3275
3276         /* Set the write pointer delay */
3277         WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3278
3279         /* set the RB to use vmid 0 */
3280         WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3281
3282         /* Init gfx ring 0 for pipe 0 */
3283         mutex_lock(&adev->srbm_mutex);
3284         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3285
3286         /* Set ring buffer size */
3287         ring = &adev->gfx.gfx_ring[0];
3288         rb_bufsz = order_base_2(ring->ring_size / 8);
3289         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3290         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3291         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3292
3293         /* Initialize the ring buffer's write pointers */
3294         ring->wptr = 0;
3295         WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3296         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3297
3298         /* set the wb address wether it's enabled or not */
3299         rptr_addr = ring->rptr_gpu_addr;
3300         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3301         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3302                      CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3303
3304         wptr_gpu_addr = ring->wptr_gpu_addr;
3305         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3306                      lower_32_bits(wptr_gpu_addr));
3307         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3308                      upper_32_bits(wptr_gpu_addr));
3309
3310         mdelay(1);
3311         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3312
3313         rb_addr = ring->gpu_addr >> 8;
3314         WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3315         WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3316
3317         WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3318
3319         gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3320         mutex_unlock(&adev->srbm_mutex);
3321
3322         /* Init gfx ring 1 for pipe 1 */
3323         if (adev->gfx.num_gfx_rings > 1) {
3324                 mutex_lock(&adev->srbm_mutex);
3325                 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3326                 /* maximum supported gfx ring is 2 */
3327                 ring = &adev->gfx.gfx_ring[1];
3328                 rb_bufsz = order_base_2(ring->ring_size / 8);
3329                 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3330                 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3331                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3332                 /* Initialize the ring buffer's write pointers */
3333                 ring->wptr = 0;
3334                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3335                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3336                 /* Set the wb address wether it's enabled or not */
3337                 rptr_addr = ring->rptr_gpu_addr;
3338                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3339                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3340                              CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3341                 wptr_gpu_addr = ring->wptr_gpu_addr;
3342                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3343                              lower_32_bits(wptr_gpu_addr));
3344                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3345                              upper_32_bits(wptr_gpu_addr));
3346
3347                 mdelay(1);
3348                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3349
3350                 rb_addr = ring->gpu_addr >> 8;
3351                 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3352                 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3353                 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3354
3355                 gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3356                 mutex_unlock(&adev->srbm_mutex);
3357         }
3358         /* Switch to pipe 0 */
3359         mutex_lock(&adev->srbm_mutex);
3360         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3361         mutex_unlock(&adev->srbm_mutex);
3362
3363         /* start the ring */
3364         gfx_v11_0_cp_gfx_start(adev);
3365
3366         return 0;
3367 }
3368
3369 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3370 {
3371         u32 data;
3372
3373         if (adev->gfx.rs64_enable) {
3374                 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3375                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3376                                                          enable ? 0 : 1);
3377                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3378                                                          enable ? 0 : 1);
3379                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3380                                                          enable ? 0 : 1);
3381                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3382                                                          enable ? 0 : 1);
3383                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3384                                                          enable ? 0 : 1);
3385                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3386                                                          enable ? 1 : 0);
3387                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3388                                                          enable ? 1 : 0);
3389                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3390                                                          enable ? 1 : 0);
3391                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3392                                                          enable ? 1 : 0);
3393                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3394                                                          enable ? 0 : 1);
3395                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3396         } else {
3397                 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3398
3399                 if (enable) {
3400                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3401                         if (!adev->enable_mes_kiq)
3402                                 data = REG_SET_FIELD(data, CP_MEC_CNTL,
3403                                                      MEC_ME2_HALT, 0);
3404                 } else {
3405                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3406                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3407                 }
3408                 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3409         }
3410
3411         udelay(50);
3412 }
3413
3414 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3415 {
3416         const struct gfx_firmware_header_v1_0 *mec_hdr;
3417         const __le32 *fw_data;
3418         unsigned i, fw_size;
3419         u32 *fw = NULL;
3420         int r;
3421
3422         if (!adev->gfx.mec_fw)
3423                 return -EINVAL;
3424
3425         gfx_v11_0_cp_compute_enable(adev, false);
3426
3427         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3428         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3429
3430         fw_data = (const __le32 *)
3431                 (adev->gfx.mec_fw->data +
3432                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3433         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3434
3435         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3436                                           PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3437                                           &adev->gfx.mec.mec_fw_obj,
3438                                           &adev->gfx.mec.mec_fw_gpu_addr,
3439                                           (void **)&fw);
3440         if (r) {
3441                 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3442                 gfx_v11_0_mec_fini(adev);
3443                 return r;
3444         }
3445
3446         memcpy(fw, fw_data, fw_size);
3447         
3448         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3449         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3450
3451         gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3452
3453         /* MEC1 */
3454         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3455
3456         for (i = 0; i < mec_hdr->jt_size; i++)
3457                 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3458                              le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3459
3460         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3461
3462         return 0;
3463 }
3464
3465 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3466 {
3467         const struct gfx_firmware_header_v2_0 *mec_hdr;
3468         const __le32 *fw_ucode, *fw_data;
3469         u32 tmp, fw_ucode_size, fw_data_size;
3470         u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3471         u32 *fw_ucode_ptr, *fw_data_ptr;
3472         int r;
3473
3474         if (!adev->gfx.mec_fw)
3475                 return -EINVAL;
3476
3477         gfx_v11_0_cp_compute_enable(adev, false);
3478
3479         mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3480         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3481
3482         fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3483                                 le32_to_cpu(mec_hdr->ucode_offset_bytes));
3484         fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3485
3486         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3487                                 le32_to_cpu(mec_hdr->data_offset_bytes));
3488         fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3489
3490         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3491                                       64 * 1024,
3492                                       AMDGPU_GEM_DOMAIN_VRAM |
3493                                       AMDGPU_GEM_DOMAIN_GTT,
3494                                       &adev->gfx.mec.mec_fw_obj,
3495                                       &adev->gfx.mec.mec_fw_gpu_addr,
3496                                       (void **)&fw_ucode_ptr);
3497         if (r) {
3498                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3499                 gfx_v11_0_mec_fini(adev);
3500                 return r;
3501         }
3502
3503         r = amdgpu_bo_create_reserved(adev, fw_data_size,
3504                                       64 * 1024,
3505                                       AMDGPU_GEM_DOMAIN_VRAM |
3506                                       AMDGPU_GEM_DOMAIN_GTT,
3507                                       &adev->gfx.mec.mec_fw_data_obj,
3508                                       &adev->gfx.mec.mec_fw_data_gpu_addr,
3509                                       (void **)&fw_data_ptr);
3510         if (r) {
3511                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3512                 gfx_v11_0_mec_fini(adev);
3513                 return r;
3514         }
3515
3516         memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3517         memcpy(fw_data_ptr, fw_data, fw_data_size);
3518
3519         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3520         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3521         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3522         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3523
3524         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3525         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3526         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3527         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3528         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3529
3530         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3531         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3532         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3533         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3534
3535         mutex_lock(&adev->srbm_mutex);
3536         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3537                 soc21_grbm_select(adev, 1, i, 0, 0);
3538
3539                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3540                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3541                      upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3542
3543                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3544                                         mec_hdr->ucode_start_addr_lo >> 2 |
3545                                         mec_hdr->ucode_start_addr_hi << 30);
3546                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3547                                         mec_hdr->ucode_start_addr_hi >> 2);
3548
3549                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3550                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3551                      upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3552         }
3553         mutex_unlock(&adev->srbm_mutex);
3554         soc21_grbm_select(adev, 0, 0, 0, 0);
3555
3556         /* Trigger an invalidation of the L1 instruction caches */
3557         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3558         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3559         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3560
3561         /* Wait for invalidation complete */
3562         for (i = 0; i < usec_timeout; i++) {
3563                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3564                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3565                                        INVALIDATE_DCACHE_COMPLETE))
3566                         break;
3567                 udelay(1);
3568         }
3569
3570         if (i >= usec_timeout) {
3571                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3572                 return -EINVAL;
3573         }
3574
3575         /* Trigger an invalidation of the L1 instruction caches */
3576         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3577         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3578         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3579
3580         /* Wait for invalidation complete */
3581         for (i = 0; i < usec_timeout; i++) {
3582                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3583                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3584                                        INVALIDATE_CACHE_COMPLETE))
3585                         break;
3586                 udelay(1);
3587         }
3588
3589         if (i >= usec_timeout) {
3590                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3591                 return -EINVAL;
3592         }
3593
3594         return 0;
3595 }
3596
3597 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3598 {
3599         uint32_t tmp;
3600         struct amdgpu_device *adev = ring->adev;
3601
3602         /* tell RLC which is KIQ queue */
3603         tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3604         tmp &= 0xffffff00;
3605         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3606         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3607         tmp |= 0x80;
3608         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3609 }
3610
3611 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3612 {
3613         /* set graphics engine doorbell range */
3614         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3615                      (adev->doorbell_index.gfx_ring0 * 2) << 2);
3616         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3617                      (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3618
3619         /* set compute engine doorbell range */
3620         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3621                      (adev->doorbell_index.kiq * 2) << 2);
3622         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3623                      (adev->doorbell_index.userqueue_end * 2) << 2);
3624 }
3625
3626 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3627                                   struct amdgpu_mqd_prop *prop)
3628 {
3629         struct v11_gfx_mqd *mqd = m;
3630         uint64_t hqd_gpu_addr, wb_gpu_addr;
3631         uint32_t tmp;
3632         uint32_t rb_bufsz;
3633
3634         /* set up gfx hqd wptr */
3635         mqd->cp_gfx_hqd_wptr = 0;
3636         mqd->cp_gfx_hqd_wptr_hi = 0;
3637
3638         /* set the pointer to the MQD */
3639         mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3640         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3641
3642         /* set up mqd control */
3643         tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3644         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3645         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3646         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3647         mqd->cp_gfx_mqd_control = tmp;
3648
3649         /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3650         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3651         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3652         mqd->cp_gfx_hqd_vmid = 0;
3653
3654         /* set up default queue priority level
3655          * 0x0 = low priority, 0x1 = high priority */
3656         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3657         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
3658         mqd->cp_gfx_hqd_queue_priority = tmp;
3659
3660         /* set up time quantum */
3661         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
3662         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
3663         mqd->cp_gfx_hqd_quantum = tmp;
3664
3665         /* set up gfx hqd base. this is similar as CP_RB_BASE */
3666         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3667         mqd->cp_gfx_hqd_base = hqd_gpu_addr;
3668         mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
3669
3670         /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3671         wb_gpu_addr = prop->rptr_gpu_addr;
3672         mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
3673         mqd->cp_gfx_hqd_rptr_addr_hi =
3674                 upper_32_bits(wb_gpu_addr) & 0xffff;
3675
3676         /* set up rb_wptr_poll addr */
3677         wb_gpu_addr = prop->wptr_gpu_addr;
3678         mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3679         mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3680
3681         /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3682         rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
3683         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
3684         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
3685         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
3686 #ifdef __BIG_ENDIAN
3687         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
3688 #endif
3689         mqd->cp_gfx_hqd_cntl = tmp;
3690
3691         /* set up cp_doorbell_control */
3692         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3693         if (prop->use_doorbell) {
3694                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3695                                     DOORBELL_OFFSET, prop->doorbell_index);
3696                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3697                                     DOORBELL_EN, 1);
3698         } else
3699                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3700                                     DOORBELL_EN, 0);
3701         mqd->cp_rb_doorbell_control = tmp;
3702
3703         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3704         mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
3705
3706         /* active the queue */
3707         mqd->cp_gfx_hqd_active = 1;
3708
3709         return 0;
3710 }
3711
3712 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
3713 {
3714         struct amdgpu_device *adev = ring->adev;
3715         struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3716         int mqd_idx = ring - &adev->gfx.gfx_ring[0];
3717
3718         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3719                 memset((void *)mqd, 0, sizeof(*mqd));
3720                 mutex_lock(&adev->srbm_mutex);
3721                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3722                 amdgpu_ring_init_mqd(ring);
3723                 soc21_grbm_select(adev, 0, 0, 0, 0);
3724                 mutex_unlock(&adev->srbm_mutex);
3725                 if (adev->gfx.me.mqd_backup[mqd_idx])
3726                         memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3727         } else {
3728                 /* restore mqd with the backup copy */
3729                 if (adev->gfx.me.mqd_backup[mqd_idx])
3730                         memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
3731                 /* reset the ring */
3732                 ring->wptr = 0;
3733                 *ring->wptr_cpu_addr = 0;
3734                 amdgpu_ring_clear_ring(ring);
3735         }
3736
3737         return 0;
3738 }
3739
3740 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
3741 {
3742         int r, i;
3743         struct amdgpu_ring *ring;
3744
3745         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3746                 ring = &adev->gfx.gfx_ring[i];
3747
3748                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3749                 if (unlikely(r != 0))
3750                         return r;
3751
3752                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3753                 if (!r) {
3754                         r = gfx_v11_0_gfx_init_queue(ring);
3755                         amdgpu_bo_kunmap(ring->mqd_obj);
3756                         ring->mqd_ptr = NULL;
3757                 }
3758                 amdgpu_bo_unreserve(ring->mqd_obj);
3759                 if (r)
3760                         return r;
3761         }
3762
3763         r = amdgpu_gfx_enable_kgq(adev, 0);
3764         if (r)
3765                 return r;
3766
3767         return gfx_v11_0_cp_gfx_start(adev);
3768 }
3769
3770 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
3771                                       struct amdgpu_mqd_prop *prop)
3772 {
3773         struct v11_compute_mqd *mqd = m;
3774         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3775         uint32_t tmp;
3776
3777         mqd->header = 0xC0310800;
3778         mqd->compute_pipelinestat_enable = 0x00000001;
3779         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3780         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3781         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3782         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3783         mqd->compute_misc_reserved = 0x00000007;
3784
3785         eop_base_addr = prop->eop_gpu_addr >> 8;
3786         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3787         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3788
3789         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3790         tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3791         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3792                         (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
3793
3794         mqd->cp_hqd_eop_control = tmp;
3795
3796         /* enable doorbell? */
3797         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3798
3799         if (prop->use_doorbell) {
3800                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3801                                     DOORBELL_OFFSET, prop->doorbell_index);
3802                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3803                                     DOORBELL_EN, 1);
3804                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3805                                     DOORBELL_SOURCE, 0);
3806                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3807                                     DOORBELL_HIT, 0);
3808         } else {
3809                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3810                                     DOORBELL_EN, 0);
3811         }
3812
3813         mqd->cp_hqd_pq_doorbell_control = tmp;
3814
3815         /* disable the queue if it's active */
3816         mqd->cp_hqd_dequeue_request = 0;
3817         mqd->cp_hqd_pq_rptr = 0;
3818         mqd->cp_hqd_pq_wptr_lo = 0;
3819         mqd->cp_hqd_pq_wptr_hi = 0;
3820
3821         /* set the pointer to the MQD */
3822         mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
3823         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3824
3825         /* set MQD vmid to 0 */
3826         tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
3827         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3828         mqd->cp_mqd_control = tmp;
3829
3830         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3831         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3832         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3833         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3834
3835         /* set up the HQD, this is similar to CP_RB0_CNTL */
3836         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3837         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3838                             (order_base_2(prop->queue_size / 4) - 1));
3839         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3840                             (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3841         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3842         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3843         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3844         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3845         mqd->cp_hqd_pq_control = tmp;
3846
3847         /* set the wb address whether it's enabled or not */
3848         wb_gpu_addr = prop->rptr_gpu_addr;
3849         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3850         mqd->cp_hqd_pq_rptr_report_addr_hi =
3851                 upper_32_bits(wb_gpu_addr) & 0xffff;
3852
3853         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3854         wb_gpu_addr = prop->wptr_gpu_addr;
3855         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3856         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3857
3858         tmp = 0;
3859         /* enable the doorbell if requested */
3860         if (prop->use_doorbell) {
3861                 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3862                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3863                                 DOORBELL_OFFSET, prop->doorbell_index);
3864
3865                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3866                                     DOORBELL_EN, 1);
3867                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3868                                     DOORBELL_SOURCE, 0);
3869                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3870                                     DOORBELL_HIT, 0);
3871         }
3872
3873         mqd->cp_hqd_pq_doorbell_control = tmp;
3874
3875         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3876         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3877
3878         /* set the vmid for the queue */
3879         mqd->cp_hqd_vmid = 0;
3880
3881         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3882         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
3883         mqd->cp_hqd_persistent_state = tmp;
3884
3885         /* set MIN_IB_AVAIL_SIZE */
3886         tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
3887         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3888         mqd->cp_hqd_ib_control = tmp;
3889
3890         /* set static priority for a compute queue/ring */
3891         mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
3892         mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
3893
3894         mqd->cp_hqd_active = prop->hqd_active;
3895
3896         return 0;
3897 }
3898
3899 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
3900 {
3901         struct amdgpu_device *adev = ring->adev;
3902         struct v11_compute_mqd *mqd = ring->mqd_ptr;
3903         int j;
3904
3905         /* inactivate the queue */
3906         if (amdgpu_sriov_vf(adev))
3907                 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
3908
3909         /* disable wptr polling */
3910         WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3911
3912         /* write the EOP addr */
3913         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
3914                mqd->cp_hqd_eop_base_addr_lo);
3915         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
3916                mqd->cp_hqd_eop_base_addr_hi);
3917
3918         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3919         WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
3920                mqd->cp_hqd_eop_control);
3921
3922         /* enable doorbell? */
3923         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3924                mqd->cp_hqd_pq_doorbell_control);
3925
3926         /* disable the queue if it's active */
3927         if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
3928                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
3929                 for (j = 0; j < adev->usec_timeout; j++) {
3930                         if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
3931                                 break;
3932                         udelay(1);
3933                 }
3934                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
3935                        mqd->cp_hqd_dequeue_request);
3936                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
3937                        mqd->cp_hqd_pq_rptr);
3938                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3939                        mqd->cp_hqd_pq_wptr_lo);
3940                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3941                        mqd->cp_hqd_pq_wptr_hi);
3942         }
3943
3944         /* set the pointer to the MQD */
3945         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
3946                mqd->cp_mqd_base_addr_lo);
3947         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
3948                mqd->cp_mqd_base_addr_hi);
3949
3950         /* set MQD vmid to 0 */
3951         WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
3952                mqd->cp_mqd_control);
3953
3954         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3955         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
3956                mqd->cp_hqd_pq_base_lo);
3957         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
3958                mqd->cp_hqd_pq_base_hi);
3959
3960         /* set up the HQD, this is similar to CP_RB0_CNTL */
3961         WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
3962                mqd->cp_hqd_pq_control);
3963
3964         /* set the wb address whether it's enabled or not */
3965         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
3966                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3967         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3968                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3969
3970         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3971         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
3972                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3973         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3974                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3975
3976         /* enable the doorbell if requested */
3977         if (ring->use_doorbell) {
3978                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3979                         (adev->doorbell_index.kiq * 2) << 2);
3980                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3981                         (adev->doorbell_index.userqueue_end * 2) << 2);
3982         }
3983
3984         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3985                mqd->cp_hqd_pq_doorbell_control);
3986
3987         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3988         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3989                mqd->cp_hqd_pq_wptr_lo);
3990         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3991                mqd->cp_hqd_pq_wptr_hi);
3992
3993         /* set the vmid for the queue */
3994         WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
3995
3996         WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
3997                mqd->cp_hqd_persistent_state);
3998
3999         /* activate the queue */
4000         WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
4001                mqd->cp_hqd_active);
4002
4003         if (ring->use_doorbell)
4004                 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4005
4006         return 0;
4007 }
4008
4009 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
4010 {
4011         struct amdgpu_device *adev = ring->adev;
4012         struct v11_compute_mqd *mqd = ring->mqd_ptr;
4013
4014         gfx_v11_0_kiq_setting(ring);
4015
4016         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4017                 /* reset MQD to a clean status */
4018                 if (adev->gfx.kiq[0].mqd_backup)
4019                         memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
4020
4021                 /* reset ring buffer */
4022                 ring->wptr = 0;
4023                 amdgpu_ring_clear_ring(ring);
4024
4025                 mutex_lock(&adev->srbm_mutex);
4026                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4027                 gfx_v11_0_kiq_init_register(ring);
4028                 soc21_grbm_select(adev, 0, 0, 0, 0);
4029                 mutex_unlock(&adev->srbm_mutex);
4030         } else {
4031                 memset((void *)mqd, 0, sizeof(*mqd));
4032                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4033                         amdgpu_ring_clear_ring(ring);
4034                 mutex_lock(&adev->srbm_mutex);
4035                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4036                 amdgpu_ring_init_mqd(ring);
4037                 gfx_v11_0_kiq_init_register(ring);
4038                 soc21_grbm_select(adev, 0, 0, 0, 0);
4039                 mutex_unlock(&adev->srbm_mutex);
4040
4041                 if (adev->gfx.kiq[0].mqd_backup)
4042                         memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
4043         }
4044
4045         return 0;
4046 }
4047
4048 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
4049 {
4050         struct amdgpu_device *adev = ring->adev;
4051         struct v11_compute_mqd *mqd = ring->mqd_ptr;
4052         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4053
4054         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4055                 memset((void *)mqd, 0, sizeof(*mqd));
4056                 mutex_lock(&adev->srbm_mutex);
4057                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4058                 amdgpu_ring_init_mqd(ring);
4059                 soc21_grbm_select(adev, 0, 0, 0, 0);
4060                 mutex_unlock(&adev->srbm_mutex);
4061
4062                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4063                         memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4064         } else {
4065                 /* restore MQD to a clean status */
4066                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4067                         memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4068                 /* reset ring buffer */
4069                 ring->wptr = 0;
4070                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4071                 amdgpu_ring_clear_ring(ring);
4072         }
4073
4074         return 0;
4075 }
4076
4077 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4078 {
4079         struct amdgpu_ring *ring;
4080         int r;
4081
4082         ring = &adev->gfx.kiq[0].ring;
4083
4084         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4085         if (unlikely(r != 0))
4086                 return r;
4087
4088         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4089         if (unlikely(r != 0)) {
4090                 amdgpu_bo_unreserve(ring->mqd_obj);
4091                 return r;
4092         }
4093
4094         gfx_v11_0_kiq_init_queue(ring);
4095         amdgpu_bo_kunmap(ring->mqd_obj);
4096         ring->mqd_ptr = NULL;
4097         amdgpu_bo_unreserve(ring->mqd_obj);
4098         ring->sched.ready = true;
4099         return 0;
4100 }
4101
4102 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4103 {
4104         struct amdgpu_ring *ring = NULL;
4105         int r = 0, i;
4106
4107         if (!amdgpu_async_gfx_ring)
4108                 gfx_v11_0_cp_compute_enable(adev, true);
4109
4110         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4111                 ring = &adev->gfx.compute_ring[i];
4112
4113                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4114                 if (unlikely(r != 0))
4115                         goto done;
4116                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4117                 if (!r) {
4118                         r = gfx_v11_0_kcq_init_queue(ring);
4119                         amdgpu_bo_kunmap(ring->mqd_obj);
4120                         ring->mqd_ptr = NULL;
4121                 }
4122                 amdgpu_bo_unreserve(ring->mqd_obj);
4123                 if (r)
4124                         goto done;
4125         }
4126
4127         r = amdgpu_gfx_enable_kcq(adev, 0);
4128 done:
4129         return r;
4130 }
4131
4132 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4133 {
4134         int r, i;
4135         struct amdgpu_ring *ring;
4136
4137         if (!(adev->flags & AMD_IS_APU))
4138                 gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4139
4140         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4141                 /* legacy firmware loading */
4142                 r = gfx_v11_0_cp_gfx_load_microcode(adev);
4143                 if (r)
4144                         return r;
4145
4146                 if (adev->gfx.rs64_enable)
4147                         r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4148                 else
4149                         r = gfx_v11_0_cp_compute_load_microcode(adev);
4150                 if (r)
4151                         return r;
4152         }
4153
4154         gfx_v11_0_cp_set_doorbell_range(adev);
4155
4156         if (amdgpu_async_gfx_ring) {
4157                 gfx_v11_0_cp_compute_enable(adev, true);
4158                 gfx_v11_0_cp_gfx_enable(adev, true);
4159         }
4160
4161         if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4162                 r = amdgpu_mes_kiq_hw_init(adev);
4163         else
4164                 r = gfx_v11_0_kiq_resume(adev);
4165         if (r)
4166                 return r;
4167
4168         r = gfx_v11_0_kcq_resume(adev);
4169         if (r)
4170                 return r;
4171
4172         if (!amdgpu_async_gfx_ring) {
4173                 r = gfx_v11_0_cp_gfx_resume(adev);
4174                 if (r)
4175                         return r;
4176         } else {
4177                 r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4178                 if (r)
4179                         return r;
4180         }
4181
4182         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4183                 ring = &adev->gfx.gfx_ring[i];
4184                 r = amdgpu_ring_test_helper(ring);
4185                 if (r)
4186                         return r;
4187         }
4188
4189         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4190                 ring = &adev->gfx.compute_ring[i];
4191                 r = amdgpu_ring_test_helper(ring);
4192                 if (r)
4193                         return r;
4194         }
4195
4196         return 0;
4197 }
4198
4199 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4200 {
4201         gfx_v11_0_cp_gfx_enable(adev, enable);
4202         gfx_v11_0_cp_compute_enable(adev, enable);
4203 }
4204
4205 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4206 {
4207         int r;
4208         bool value;
4209
4210         r = adev->gfxhub.funcs->gart_enable(adev);
4211         if (r)
4212                 return r;
4213
4214         adev->hdp.funcs->flush_hdp(adev, NULL);
4215
4216         value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4217                 false : true;
4218
4219         adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4220         amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
4221
4222         return 0;
4223 }
4224
4225 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4226 {
4227         u32 tmp;
4228
4229         /* select RS64 */
4230         if (adev->gfx.rs64_enable) {
4231                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4232                 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4233                 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4234
4235                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4236                 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4237                 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4238         }
4239
4240         if (amdgpu_emu_mode == 1)
4241                 msleep(100);
4242 }
4243
4244 static int get_gb_addr_config(struct amdgpu_device * adev)
4245 {
4246         u32 gb_addr_config;
4247
4248         gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4249         if (gb_addr_config == 0)
4250                 return -EINVAL;
4251
4252         adev->gfx.config.gb_addr_config_fields.num_pkrs =
4253                 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4254
4255         adev->gfx.config.gb_addr_config = gb_addr_config;
4256
4257         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4258                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4259                                       GB_ADDR_CONFIG, NUM_PIPES);
4260
4261         adev->gfx.config.max_tile_pipes =
4262                 adev->gfx.config.gb_addr_config_fields.num_pipes;
4263
4264         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4265                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4266                                       GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4267         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4268                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4269                                       GB_ADDR_CONFIG, NUM_RB_PER_SE);
4270         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4271                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4272                                       GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4273         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4274                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4275                                       GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4276
4277         return 0;
4278 }
4279
4280 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4281 {
4282         uint32_t data;
4283
4284         data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4285         data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4286         WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4287
4288         data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4289         data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4290         WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4291 }
4292
4293 static int gfx_v11_0_hw_init(void *handle)
4294 {
4295         int r;
4296         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4297
4298         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4299                 if (adev->gfx.imu.funcs) {
4300                         /* RLC autoload sequence 1: Program rlc ram */
4301                         if (adev->gfx.imu.funcs->program_rlc_ram)
4302                                 adev->gfx.imu.funcs->program_rlc_ram(adev);
4303                 }
4304                 /* rlc autoload firmware */
4305                 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4306                 if (r)
4307                         return r;
4308         } else {
4309                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4310                         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4311                                 if (adev->gfx.imu.funcs->load_microcode)
4312                                         adev->gfx.imu.funcs->load_microcode(adev);
4313                                 if (adev->gfx.imu.funcs->setup_imu)
4314                                         adev->gfx.imu.funcs->setup_imu(adev);
4315                                 if (adev->gfx.imu.funcs->start_imu)
4316                                         adev->gfx.imu.funcs->start_imu(adev);
4317                         }
4318
4319                         /* disable gpa mode in backdoor loading */
4320                         gfx_v11_0_disable_gpa_mode(adev);
4321                 }
4322         }
4323
4324         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4325             (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4326                 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4327                 if (r) {
4328                         dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4329                         return r;
4330                 }
4331         }
4332
4333         adev->gfx.is_poweron = true;
4334
4335         if(get_gb_addr_config(adev))
4336                 DRM_WARN("Invalid gb_addr_config !\n");
4337
4338         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4339             adev->gfx.rs64_enable)
4340                 gfx_v11_0_config_gfx_rs64(adev);
4341
4342         r = gfx_v11_0_gfxhub_enable(adev);
4343         if (r)
4344                 return r;
4345
4346         if (!amdgpu_emu_mode)
4347                 gfx_v11_0_init_golden_registers(adev);
4348
4349         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4350             (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4351                 /**
4352                  * For gfx 11, rlc firmware loading relies on smu firmware is
4353                  * loaded firstly, so in direct type, it has to load smc ucode
4354                  * here before rlc.
4355                  */
4356                 if (!(adev->flags & AMD_IS_APU)) {
4357                         r = amdgpu_pm_load_smu_firmware(adev, NULL);
4358                         if (r)
4359                                 return r;
4360                 }
4361         }
4362
4363         gfx_v11_0_constants_init(adev);
4364
4365         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4366                 gfx_v11_0_select_cp_fw_arch(adev);
4367
4368         if (adev->nbio.funcs->gc_doorbell_init)
4369                 adev->nbio.funcs->gc_doorbell_init(adev);
4370
4371         r = gfx_v11_0_rlc_resume(adev);
4372         if (r)
4373                 return r;
4374
4375         /*
4376          * init golden registers and rlc resume may override some registers,
4377          * reconfig them here
4378          */
4379         gfx_v11_0_tcp_harvest(adev);
4380
4381         r = gfx_v11_0_cp_resume(adev);
4382         if (r)
4383                 return r;
4384
4385         /* get IMU version from HW if it's not set */
4386         if (!adev->gfx.imu_fw_version)
4387                 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
4388
4389         return r;
4390 }
4391
4392 static int gfx_v11_0_hw_fini(void *handle)
4393 {
4394         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4395
4396         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4397         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4398
4399         if (!adev->no_hw_access) {
4400                 if (amdgpu_async_gfx_ring) {
4401                         if (amdgpu_gfx_disable_kgq(adev, 0))
4402                                 DRM_ERROR("KGQ disable failed\n");
4403                 }
4404
4405                 if (amdgpu_gfx_disable_kcq(adev, 0))
4406                         DRM_ERROR("KCQ disable failed\n");
4407
4408                 amdgpu_mes_kiq_hw_fini(adev);
4409         }
4410
4411         if (amdgpu_sriov_vf(adev))
4412                 /* Remove the steps disabling CPG and clearing KIQ position,
4413                  * so that CP could perform IDLE-SAVE during switch. Those
4414                  * steps are necessary to avoid a DMAR error in gfx9 but it is
4415                  * not reproduced on gfx11.
4416                  */
4417                 return 0;
4418
4419         gfx_v11_0_cp_enable(adev, false);
4420         gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4421
4422         adev->gfxhub.funcs->gart_disable(adev);
4423
4424         adev->gfx.is_poweron = false;
4425
4426         return 0;
4427 }
4428
4429 static int gfx_v11_0_suspend(void *handle)
4430 {
4431         return gfx_v11_0_hw_fini(handle);
4432 }
4433
4434 static int gfx_v11_0_resume(void *handle)
4435 {
4436         return gfx_v11_0_hw_init(handle);
4437 }
4438
4439 static bool gfx_v11_0_is_idle(void *handle)
4440 {
4441         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4442
4443         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4444                                 GRBM_STATUS, GUI_ACTIVE))
4445                 return false;
4446         else
4447                 return true;
4448 }
4449
4450 static int gfx_v11_0_wait_for_idle(void *handle)
4451 {
4452         unsigned i;
4453         u32 tmp;
4454         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4455
4456         for (i = 0; i < adev->usec_timeout; i++) {
4457                 /* read MC_STATUS */
4458                 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4459                         GRBM_STATUS__GUI_ACTIVE_MASK;
4460
4461                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4462                         return 0;
4463                 udelay(1);
4464         }
4465         return -ETIMEDOUT;
4466 }
4467
4468 static int gfx_v11_0_soft_reset(void *handle)
4469 {
4470         u32 grbm_soft_reset = 0;
4471         u32 tmp;
4472         int i, j, k;
4473         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4474
4475         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4476         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4477         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4478         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4479         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4480         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4481
4482         gfx_v11_0_set_safe_mode(adev, 0);
4483
4484         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4485                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4486                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4487                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4488                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4489                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4490                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4491                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4492
4493                                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4494                                 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4495                         }
4496                 }
4497         }
4498         for (i = 0; i < adev->gfx.me.num_me; ++i) {
4499                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4500                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4501                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4502                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4503                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4504                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4505                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4506
4507                                 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4508                         }
4509                 }
4510         }
4511
4512         WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4513
4514         // Read CP_VMID_RESET register three times.
4515         // to get sufficient time for GFX_HQD_ACTIVE reach 0
4516         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4517         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4518         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4519
4520         for (i = 0; i < adev->usec_timeout; i++) {
4521                 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4522                     !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4523                         break;
4524                 udelay(1);
4525         }
4526         if (i >= adev->usec_timeout) {
4527                 printk("Failed to wait all pipes clean\n");
4528                 return -EINVAL;
4529         }
4530
4531         /**********  trigger soft reset  ***********/
4532         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4533         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4534                                         SOFT_RESET_CP, 1);
4535         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4536                                         SOFT_RESET_GFX, 1);
4537         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4538                                         SOFT_RESET_CPF, 1);
4539         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4540                                         SOFT_RESET_CPC, 1);
4541         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4542                                         SOFT_RESET_CPG, 1);
4543         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4544         /**********  exit soft reset  ***********/
4545         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4546         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4547                                         SOFT_RESET_CP, 0);
4548         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4549                                         SOFT_RESET_GFX, 0);
4550         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4551                                         SOFT_RESET_CPF, 0);
4552         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4553                                         SOFT_RESET_CPC, 0);
4554         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4555                                         SOFT_RESET_CPG, 0);
4556         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4557
4558         tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4559         tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4560         WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4561
4562         WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4563         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4564
4565         for (i = 0; i < adev->usec_timeout; i++) {
4566                 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4567                         break;
4568                 udelay(1);
4569         }
4570         if (i >= adev->usec_timeout) {
4571                 printk("Failed to wait CP_VMID_RESET to 0\n");
4572                 return -EINVAL;
4573         }
4574
4575         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4576         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4577         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4578         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4579         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4580         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4581
4582         gfx_v11_0_unset_safe_mode(adev, 0);
4583
4584         return gfx_v11_0_cp_resume(adev);
4585 }
4586
4587 static bool gfx_v11_0_check_soft_reset(void *handle)
4588 {
4589         int i, r;
4590         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4591         struct amdgpu_ring *ring;
4592         long tmo = msecs_to_jiffies(1000);
4593
4594         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4595                 ring = &adev->gfx.gfx_ring[i];
4596                 r = amdgpu_ring_test_ib(ring, tmo);
4597                 if (r)
4598                         return true;
4599         }
4600
4601         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4602                 ring = &adev->gfx.compute_ring[i];
4603                 r = amdgpu_ring_test_ib(ring, tmo);
4604                 if (r)
4605                         return true;
4606         }
4607
4608         return false;
4609 }
4610
4611 static int gfx_v11_0_post_soft_reset(void *handle)
4612 {
4613         /**
4614          * GFX soft reset will impact MES, need resume MES when do GFX soft reset
4615          */
4616         return amdgpu_mes_resume((struct amdgpu_device *)handle);
4617 }
4618
4619 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4620 {
4621         uint64_t clock;
4622         uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
4623
4624         if (amdgpu_sriov_vf(adev)) {
4625                 amdgpu_gfx_off_ctrl(adev, false);
4626                 mutex_lock(&adev->gfx.gpu_clock_mutex);
4627                 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
4628                 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
4629                 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
4630                 if (clock_counter_hi_pre != clock_counter_hi_after)
4631                         clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
4632                 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4633                 amdgpu_gfx_off_ctrl(adev, true);
4634         } else {
4635                 preempt_disable();
4636                 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
4637                 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
4638                 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
4639                 if (clock_counter_hi_pre != clock_counter_hi_after)
4640                         clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
4641                 preempt_enable();
4642         }
4643         clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
4644
4645         return clock;
4646 }
4647
4648 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4649                                            uint32_t vmid,
4650                                            uint32_t gds_base, uint32_t gds_size,
4651                                            uint32_t gws_base, uint32_t gws_size,
4652                                            uint32_t oa_base, uint32_t oa_size)
4653 {
4654         struct amdgpu_device *adev = ring->adev;
4655
4656         /* GDS Base */
4657         gfx_v11_0_write_data_to_reg(ring, 0, false,
4658                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
4659                                     gds_base);
4660
4661         /* GDS Size */
4662         gfx_v11_0_write_data_to_reg(ring, 0, false,
4663                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
4664                                     gds_size);
4665
4666         /* GWS */
4667         gfx_v11_0_write_data_to_reg(ring, 0, false,
4668                                     SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
4669                                     gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4670
4671         /* OA */
4672         gfx_v11_0_write_data_to_reg(ring, 0, false,
4673                                     SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
4674                                     (1 << (oa_size + oa_base)) - (1 << oa_base));
4675 }
4676
4677 static int gfx_v11_0_early_init(void *handle)
4678 {
4679         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4680
4681         adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
4682
4683         adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
4684         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4685                                           AMDGPU_MAX_COMPUTE_RINGS);
4686
4687         gfx_v11_0_set_kiq_pm4_funcs(adev);
4688         gfx_v11_0_set_ring_funcs(adev);
4689         gfx_v11_0_set_irq_funcs(adev);
4690         gfx_v11_0_set_gds_init(adev);
4691         gfx_v11_0_set_rlc_funcs(adev);
4692         gfx_v11_0_set_mqd_funcs(adev);
4693         gfx_v11_0_set_imu_funcs(adev);
4694
4695         gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
4696
4697         return gfx_v11_0_init_microcode(adev);
4698 }
4699
4700 static int gfx_v11_0_late_init(void *handle)
4701 {
4702         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4703         int r;
4704
4705         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4706         if (r)
4707                 return r;
4708
4709         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4710         if (r)
4711                 return r;
4712
4713         return 0;
4714 }
4715
4716 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
4717 {
4718         uint32_t rlc_cntl;
4719
4720         /* if RLC is not enabled, do nothing */
4721         rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
4722         return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
4723 }
4724
4725 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4726 {
4727         uint32_t data;
4728         unsigned i;
4729
4730         data = RLC_SAFE_MODE__CMD_MASK;
4731         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4732
4733         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
4734
4735         /* wait for RLC_SAFE_MODE */
4736         for (i = 0; i < adev->usec_timeout; i++) {
4737                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
4738                                    RLC_SAFE_MODE, CMD))
4739                         break;
4740                 udelay(1);
4741         }
4742 }
4743
4744 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4745 {
4746         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
4747 }
4748
4749 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
4750                                       bool enable)
4751 {
4752         uint32_t def, data;
4753
4754         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
4755                 return;
4756
4757         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4758
4759         if (enable)
4760                 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4761         else
4762                 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4763
4764         if (def != data)
4765                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4766 }
4767
4768 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
4769                                        bool enable)
4770 {
4771         uint32_t def, data;
4772
4773         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4774                 return;
4775
4776         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4777
4778         if (enable)
4779                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4780         else
4781                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4782
4783         if (def != data)
4784                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4785 }
4786
4787 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
4788                                            bool enable)
4789 {
4790         uint32_t def, data;
4791
4792         if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
4793                 return;
4794
4795         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4796
4797         if (enable)
4798                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4799         else
4800                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4801
4802         if (def != data)
4803                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4804 }
4805
4806 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4807                                                        bool enable)
4808 {
4809         uint32_t data, def;
4810
4811         if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
4812                 return;
4813
4814         /* It is disabled by HW by default */
4815         if (enable) {
4816                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4817                         /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4818                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4819
4820                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4821                                   RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4822                                   RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4823
4824                         if (def != data)
4825                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4826                 }
4827         } else {
4828                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4829                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4830
4831                         data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4832                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4833                                  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4834
4835                         if (def != data)
4836                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4837                 }
4838         }
4839 }
4840
4841 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4842                                                        bool enable)
4843 {
4844         uint32_t def, data;
4845
4846         if (!(adev->cg_flags &
4847               (AMD_CG_SUPPORT_GFX_CGCG |
4848               AMD_CG_SUPPORT_GFX_CGLS |
4849               AMD_CG_SUPPORT_GFX_3D_CGCG |
4850               AMD_CG_SUPPORT_GFX_3D_CGLS)))
4851                 return;
4852
4853         if (enable) {
4854                 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4855
4856                 /* unset CGCG override */
4857                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4858                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4859                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4860                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4861                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
4862                     adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4863                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4864
4865                 /* update CGCG override bits */
4866                 if (def != data)
4867                         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4868
4869                 /* enable cgcg FSM(0x0000363F) */
4870                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4871
4872                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
4873                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
4874                         data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4875                                  RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4876                 }
4877
4878                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4879                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
4880                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4881                                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4882                 }
4883
4884                 if (def != data)
4885                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4886
4887                 /* Program RLC_CGCG_CGLS_CTRL_3D */
4888                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4889
4890                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
4891                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
4892                         data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4893                                  RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4894                 }
4895
4896                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
4897                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
4898                         data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4899                                  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4900                 }
4901
4902                 if (def != data)
4903                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4904
4905                 /* set IDLE_POLL_COUNT(0x00900100) */
4906                 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
4907
4908                 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
4909                 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4910                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4911
4912                 if (def != data)
4913                         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
4914
4915                 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4916                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4917                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4918                 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4919                 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4920                 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
4921
4922                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4923                 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4924                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4925
4926                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4927                 if (adev->sdma.num_instances > 1) {
4928                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4929                         data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4930                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4931                 }
4932         } else {
4933                 /* Program RLC_CGCG_CGLS_CTRL */
4934                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4935
4936                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4937                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4938
4939                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4940                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4941
4942                 if (def != data)
4943                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4944
4945                 /* Program RLC_CGCG_CGLS_CTRL_3D */
4946                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4947
4948                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4949                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4950                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4951                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4952
4953                 if (def != data)
4954                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4955
4956                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4957                 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4958                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4959
4960                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4961                 if (adev->sdma.num_instances > 1) {
4962                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4963                         data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4964                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4965                 }
4966         }
4967 }
4968
4969 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4970                                             bool enable)
4971 {
4972         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4973
4974         gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
4975
4976         gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
4977
4978         gfx_v11_0_update_repeater_fgcg(adev, enable);
4979
4980         gfx_v11_0_update_sram_fgcg(adev, enable);
4981
4982         gfx_v11_0_update_perf_clk(adev, enable);
4983
4984         if (adev->cg_flags &
4985             (AMD_CG_SUPPORT_GFX_MGCG |
4986              AMD_CG_SUPPORT_GFX_CGLS |
4987              AMD_CG_SUPPORT_GFX_CGCG |
4988              AMD_CG_SUPPORT_GFX_3D_CGCG |
4989              AMD_CG_SUPPORT_GFX_3D_CGLS))
4990                 gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
4991
4992         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4993
4994         return 0;
4995 }
4996
4997 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4998 {
4999         u32 data;
5000
5001         amdgpu_gfx_off_ctrl(adev, false);
5002
5003         data = RREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL);
5004
5005         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5006         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5007
5008         WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5009
5010         amdgpu_gfx_off_ctrl(adev, true);
5011 }
5012
5013 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5014         .is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5015         .set_safe_mode = gfx_v11_0_set_safe_mode,
5016         .unset_safe_mode = gfx_v11_0_unset_safe_mode,
5017         .init = gfx_v11_0_rlc_init,
5018         .get_csb_size = gfx_v11_0_get_csb_size,
5019         .get_csb_buffer = gfx_v11_0_get_csb_buffer,
5020         .resume = gfx_v11_0_rlc_resume,
5021         .stop = gfx_v11_0_rlc_stop,
5022         .reset = gfx_v11_0_rlc_reset,
5023         .start = gfx_v11_0_rlc_start,
5024         .update_spm_vmid = gfx_v11_0_update_spm_vmid,
5025 };
5026
5027 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5028 {
5029         u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5030
5031         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5032                 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5033         else
5034                 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5035
5036         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5037
5038         // Program RLC_PG_DELAY3 for CGPG hysteresis
5039         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5040                 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5041                 case IP_VERSION(11, 0, 1):
5042                 case IP_VERSION(11, 0, 4):
5043                 case IP_VERSION(11, 5, 0):
5044                         WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5045                         break;
5046                 default:
5047                         break;
5048                 }
5049         }
5050 }
5051
5052 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5053 {
5054         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5055
5056         gfx_v11_cntl_power_gating(adev, enable);
5057
5058         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5059 }
5060
5061 static int gfx_v11_0_set_powergating_state(void *handle,
5062                                            enum amd_powergating_state state)
5063 {
5064         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5065         bool enable = (state == AMD_PG_STATE_GATE);
5066
5067         if (amdgpu_sriov_vf(adev))
5068                 return 0;
5069
5070         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5071         case IP_VERSION(11, 0, 0):
5072         case IP_VERSION(11, 0, 2):
5073         case IP_VERSION(11, 0, 3):
5074                 amdgpu_gfx_off_ctrl(adev, enable);
5075                 break;
5076         case IP_VERSION(11, 0, 1):
5077         case IP_VERSION(11, 0, 4):
5078         case IP_VERSION(11, 5, 0):
5079                 if (!enable)
5080                         amdgpu_gfx_off_ctrl(adev, false);
5081
5082                 gfx_v11_cntl_pg(adev, enable);
5083
5084                 if (enable)
5085                         amdgpu_gfx_off_ctrl(adev, true);
5086
5087                 break;
5088         default:
5089                 break;
5090         }
5091
5092         return 0;
5093 }
5094
5095 static int gfx_v11_0_set_clockgating_state(void *handle,
5096                                           enum amd_clockgating_state state)
5097 {
5098         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5099
5100         if (amdgpu_sriov_vf(adev))
5101                 return 0;
5102
5103         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5104         case IP_VERSION(11, 0, 0):
5105         case IP_VERSION(11, 0, 1):
5106         case IP_VERSION(11, 0, 2):
5107         case IP_VERSION(11, 0, 3):
5108         case IP_VERSION(11, 0, 4):
5109         case IP_VERSION(11, 5, 0):
5110                 gfx_v11_0_update_gfx_clock_gating(adev,
5111                                 state ==  AMD_CG_STATE_GATE);
5112                 break;
5113         default:
5114                 break;
5115         }
5116
5117         return 0;
5118 }
5119
5120 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5121 {
5122         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5123         int data;
5124
5125         /* AMD_CG_SUPPORT_GFX_MGCG */
5126         data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5127         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5128                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5129
5130         /* AMD_CG_SUPPORT_REPEATER_FGCG */
5131         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5132                 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5133
5134         /* AMD_CG_SUPPORT_GFX_FGCG */
5135         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5136                 *flags |= AMD_CG_SUPPORT_GFX_FGCG;
5137
5138         /* AMD_CG_SUPPORT_GFX_PERF_CLK */
5139         if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5140                 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5141
5142         /* AMD_CG_SUPPORT_GFX_CGCG */
5143         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5144         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5145                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5146
5147         /* AMD_CG_SUPPORT_GFX_CGLS */
5148         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5149                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5150
5151         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5152         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5153         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5154                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5155
5156         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5157         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5158                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5159 }
5160
5161 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5162 {
5163         /* gfx11 is 32bit rptr*/
5164         return *(uint32_t *)ring->rptr_cpu_addr;
5165 }
5166
5167 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5168 {
5169         struct amdgpu_device *adev = ring->adev;
5170         u64 wptr;
5171
5172         /* XXX check if swapping is necessary on BE */
5173         if (ring->use_doorbell) {
5174                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5175         } else {
5176                 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5177                 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5178         }
5179
5180         return wptr;
5181 }
5182
5183 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5184 {
5185         struct amdgpu_device *adev = ring->adev;
5186
5187         if (ring->use_doorbell) {
5188                 /* XXX check if swapping is necessary on BE */
5189                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5190                              ring->wptr);
5191                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5192         } else {
5193                 WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5194                              lower_32_bits(ring->wptr));
5195                 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5196                              upper_32_bits(ring->wptr));
5197         }
5198 }
5199
5200 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5201 {
5202         /* gfx11 hardware is 32bit rptr */
5203         return *(uint32_t *)ring->rptr_cpu_addr;
5204 }
5205
5206 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5207 {
5208         u64 wptr;
5209
5210         /* XXX check if swapping is necessary on BE */
5211         if (ring->use_doorbell)
5212                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5213         else
5214                 BUG();
5215         return wptr;
5216 }
5217
5218 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5219 {
5220         struct amdgpu_device *adev = ring->adev;
5221
5222         /* XXX check if swapping is necessary on BE */
5223         if (ring->use_doorbell) {
5224                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5225                              ring->wptr);
5226                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5227         } else {
5228                 BUG(); /* only DOORBELL method supported on gfx11 now */
5229         }
5230 }
5231
5232 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5233 {
5234         struct amdgpu_device *adev = ring->adev;
5235         u32 ref_and_mask, reg_mem_engine;
5236         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5237
5238         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5239                 switch (ring->me) {
5240                 case 1:
5241                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5242                         break;
5243                 case 2:
5244                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5245                         break;
5246                 default:
5247                         return;
5248                 }
5249                 reg_mem_engine = 0;
5250         } else {
5251                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5252                 reg_mem_engine = 1; /* pfp */
5253         }
5254
5255         gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5256                                adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5257                                adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5258                                ref_and_mask, ref_and_mask, 0x20);
5259 }
5260
5261 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5262                                        struct amdgpu_job *job,
5263                                        struct amdgpu_ib *ib,
5264                                        uint32_t flags)
5265 {
5266         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5267         u32 header, control = 0;
5268
5269         BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5270
5271         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5272
5273         control |= ib->length_dw | (vmid << 24);
5274
5275         if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5276                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5277
5278                 if (flags & AMDGPU_IB_PREEMPTED)
5279                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5280
5281                 if (vmid)
5282                         gfx_v11_0_ring_emit_de_meta(ring,
5283                                     (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5284         }
5285
5286         if (ring->is_mes_queue)
5287                 /* inherit vmid from mqd */
5288                 control |= 0x400000;
5289
5290         amdgpu_ring_write(ring, header);
5291         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5292         amdgpu_ring_write(ring,
5293 #ifdef __BIG_ENDIAN
5294                 (2 << 0) |
5295 #endif
5296                 lower_32_bits(ib->gpu_addr));
5297         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5298         amdgpu_ring_write(ring, control);
5299 }
5300
5301 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5302                                            struct amdgpu_job *job,
5303                                            struct amdgpu_ib *ib,
5304                                            uint32_t flags)
5305 {
5306         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5307         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5308
5309         if (ring->is_mes_queue)
5310                 /* inherit vmid from mqd */
5311                 control |= 0x40000000;
5312
5313         /* Currently, there is a high possibility to get wave ID mismatch
5314          * between ME and GDS, leading to a hw deadlock, because ME generates
5315          * different wave IDs than the GDS expects. This situation happens
5316          * randomly when at least 5 compute pipes use GDS ordered append.
5317          * The wave IDs generated by ME are also wrong after suspend/resume.
5318          * Those are probably bugs somewhere else in the kernel driver.
5319          *
5320          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5321          * GDS to 0 for this ring (me/pipe).
5322          */
5323         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5324                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5325                 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5326                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5327         }
5328
5329         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5330         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5331         amdgpu_ring_write(ring,
5332 #ifdef __BIG_ENDIAN
5333                                 (2 << 0) |
5334 #endif
5335                                 lower_32_bits(ib->gpu_addr));
5336         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5337         amdgpu_ring_write(ring, control);
5338 }
5339
5340 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5341                                      u64 seq, unsigned flags)
5342 {
5343         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5344         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5345
5346         /* RELEASE_MEM - flush caches, send int */
5347         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5348         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5349                                  PACKET3_RELEASE_MEM_GCR_GL2_WB |
5350                                  PACKET3_RELEASE_MEM_GCR_GL2_INV |
5351                                  PACKET3_RELEASE_MEM_GCR_GL2_US |
5352                                  PACKET3_RELEASE_MEM_GCR_GL1_INV |
5353                                  PACKET3_RELEASE_MEM_GCR_GLV_INV |
5354                                  PACKET3_RELEASE_MEM_GCR_GLM_INV |
5355                                  PACKET3_RELEASE_MEM_GCR_GLM_WB |
5356                                  PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5357                                  PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5358                                  PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5359         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5360                                  PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5361
5362         /*
5363          * the address should be Qword aligned if 64bit write, Dword
5364          * aligned if only send 32bit data low (discard data high)
5365          */
5366         if (write64bit)
5367                 BUG_ON(addr & 0x7);
5368         else
5369                 BUG_ON(addr & 0x3);
5370         amdgpu_ring_write(ring, lower_32_bits(addr));
5371         amdgpu_ring_write(ring, upper_32_bits(addr));
5372         amdgpu_ring_write(ring, lower_32_bits(seq));
5373         amdgpu_ring_write(ring, upper_32_bits(seq));
5374         amdgpu_ring_write(ring, ring->is_mes_queue ?
5375                          (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5376 }
5377
5378 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5379 {
5380         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5381         uint32_t seq = ring->fence_drv.sync_seq;
5382         uint64_t addr = ring->fence_drv.gpu_addr;
5383
5384         gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5385                                upper_32_bits(addr), seq, 0xffffffff, 4);
5386 }
5387
5388 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5389                                    uint16_t pasid, uint32_t flush_type,
5390                                    bool all_hub, uint8_t dst_sel)
5391 {
5392         amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5393         amdgpu_ring_write(ring,
5394                           PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5395                           PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5396                           PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5397                           PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5398 }
5399
5400 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5401                                          unsigned vmid, uint64_t pd_addr)
5402 {
5403         if (ring->is_mes_queue)
5404                 gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5405         else
5406                 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5407
5408         /* compute doesn't have PFP */
5409         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5410                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5411                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5412                 amdgpu_ring_write(ring, 0x0);
5413         }
5414 }
5415
5416 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5417                                           u64 seq, unsigned int flags)
5418 {
5419         struct amdgpu_device *adev = ring->adev;
5420
5421         /* we only allocate 32bit for each seq wb address */
5422         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5423
5424         /* write fence seq to the "addr" */
5425         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5426         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5427                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5428         amdgpu_ring_write(ring, lower_32_bits(addr));
5429         amdgpu_ring_write(ring, upper_32_bits(addr));
5430         amdgpu_ring_write(ring, lower_32_bits(seq));
5431
5432         if (flags & AMDGPU_FENCE_FLAG_INT) {
5433                 /* set register to trigger INT */
5434                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5435                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5436                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5437                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5438                 amdgpu_ring_write(ring, 0);
5439                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5440         }
5441 }
5442
5443 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5444                                          uint32_t flags)
5445 {
5446         uint32_t dw2 = 0;
5447
5448         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5449         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5450                 /* set load_global_config & load_global_uconfig */
5451                 dw2 |= 0x8001;
5452                 /* set load_cs_sh_regs */
5453                 dw2 |= 0x01000000;
5454                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5455                 dw2 |= 0x10002;
5456         }
5457
5458         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5459         amdgpu_ring_write(ring, dw2);
5460         amdgpu_ring_write(ring, 0);
5461 }
5462
5463 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
5464                                            u64 shadow_va, u64 csa_va,
5465                                            u64 gds_va, bool init_shadow,
5466                                            int vmid)
5467 {
5468         struct amdgpu_device *adev = ring->adev;
5469
5470         if (!adev->gfx.cp_gfx_shadow)
5471                 return;
5472
5473         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
5474         amdgpu_ring_write(ring, lower_32_bits(shadow_va));
5475         amdgpu_ring_write(ring, upper_32_bits(shadow_va));
5476         amdgpu_ring_write(ring, lower_32_bits(gds_va));
5477         amdgpu_ring_write(ring, upper_32_bits(gds_va));
5478         amdgpu_ring_write(ring, lower_32_bits(csa_va));
5479         amdgpu_ring_write(ring, upper_32_bits(csa_va));
5480         amdgpu_ring_write(ring, shadow_va ?
5481                           PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
5482         amdgpu_ring_write(ring, init_shadow ?
5483                           PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
5484 }
5485
5486 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5487 {
5488         unsigned ret;
5489
5490         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5491         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5492         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5493         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5494         ret = ring->wptr & ring->buf_mask;
5495         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5496
5497         return ret;
5498 }
5499
5500 static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5501 {
5502         unsigned cur;
5503         BUG_ON(offset > ring->buf_mask);
5504         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5505
5506         cur = (ring->wptr - 1) & ring->buf_mask;
5507         if (likely(cur > offset))
5508                 ring->ring[offset] = cur - offset;
5509         else
5510                 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
5511 }
5512
5513 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5514 {
5515         int i, r = 0;
5516         struct amdgpu_device *adev = ring->adev;
5517         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5518         struct amdgpu_ring *kiq_ring = &kiq->ring;
5519         unsigned long flags;
5520
5521         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5522                 return -EINVAL;
5523
5524         spin_lock_irqsave(&kiq->ring_lock, flags);
5525
5526         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5527                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5528                 return -ENOMEM;
5529         }
5530
5531         /* assert preemption condition */
5532         amdgpu_ring_set_preempt_cond_exec(ring, false);
5533
5534         /* assert IB preemption, emit the trailing fence */
5535         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5536                                    ring->trail_fence_gpu_addr,
5537                                    ++ring->trail_seq);
5538         amdgpu_ring_commit(kiq_ring);
5539
5540         spin_unlock_irqrestore(&kiq->ring_lock, flags);
5541
5542         /* poll the trailing fence */
5543         for (i = 0; i < adev->usec_timeout; i++) {
5544                 if (ring->trail_seq ==
5545                     le32_to_cpu(*(ring->trail_fence_cpu_addr)))
5546                         break;
5547                 udelay(1);
5548         }
5549
5550         if (i >= adev->usec_timeout) {
5551                 r = -EINVAL;
5552                 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
5553         }
5554
5555         /* deassert preemption condition */
5556         amdgpu_ring_set_preempt_cond_exec(ring, true);
5557         return r;
5558 }
5559
5560 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5561 {
5562         struct amdgpu_device *adev = ring->adev;
5563         struct v10_de_ib_state de_payload = {0};
5564         uint64_t offset, gds_addr, de_payload_gpu_addr;
5565         void *de_payload_cpu_addr;
5566         int cnt;
5567
5568         if (ring->is_mes_queue) {
5569                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5570                                   gfx[0].gfx_meta_data) +
5571                         offsetof(struct v10_gfx_meta_data, de_payload);
5572                 de_payload_gpu_addr =
5573                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5574                 de_payload_cpu_addr =
5575                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5576
5577                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5578                                   gfx[0].gds_backup) +
5579                         offsetof(struct v10_gfx_meta_data, de_payload);
5580                 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5581         } else {
5582                 offset = offsetof(struct v10_gfx_meta_data, de_payload);
5583                 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5584                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5585
5586                 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5587                                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
5588                                  PAGE_SIZE);
5589         }
5590
5591         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5592         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5593
5594         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5595         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5596         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5597                                  WRITE_DATA_DST_SEL(8) |
5598                                  WR_CONFIRM) |
5599                                  WRITE_DATA_CACHE_POLICY(0));
5600         amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5601         amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5602
5603         if (resume)
5604                 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5605                                            sizeof(de_payload) >> 2);
5606         else
5607                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5608                                            sizeof(de_payload) >> 2);
5609 }
5610
5611 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5612                                     bool secure)
5613 {
5614         uint32_t v = secure ? FRAME_TMZ : 0;
5615
5616         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5617         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5618 }
5619
5620 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5621                                      uint32_t reg_val_offs)
5622 {
5623         struct amdgpu_device *adev = ring->adev;
5624
5625         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5626         amdgpu_ring_write(ring, 0 |     /* src: register*/
5627                                 (5 << 8) |      /* dst: memory */
5628                                 (1 << 20));     /* write confirm */
5629         amdgpu_ring_write(ring, reg);
5630         amdgpu_ring_write(ring, 0);
5631         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5632                                 reg_val_offs * 4));
5633         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5634                                 reg_val_offs * 4));
5635 }
5636
5637 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5638                                    uint32_t val)
5639 {
5640         uint32_t cmd = 0;
5641
5642         switch (ring->funcs->type) {
5643         case AMDGPU_RING_TYPE_GFX:
5644                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5645                 break;
5646         case AMDGPU_RING_TYPE_KIQ:
5647                 cmd = (1 << 16); /* no inc addr */
5648                 break;
5649         default:
5650                 cmd = WR_CONFIRM;
5651                 break;
5652         }
5653         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5654         amdgpu_ring_write(ring, cmd);
5655         amdgpu_ring_write(ring, reg);
5656         amdgpu_ring_write(ring, 0);
5657         amdgpu_ring_write(ring, val);
5658 }
5659
5660 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5661                                         uint32_t val, uint32_t mask)
5662 {
5663         gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5664 }
5665
5666 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5667                                                    uint32_t reg0, uint32_t reg1,
5668                                                    uint32_t ref, uint32_t mask)
5669 {
5670         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5671
5672         gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5673                                ref, mask, 0x20);
5674 }
5675
5676 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
5677                                          unsigned vmid)
5678 {
5679         struct amdgpu_device *adev = ring->adev;
5680         uint32_t value = 0;
5681
5682         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5683         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5684         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5685         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5686         WREG32_SOC15(GC, 0, regSQ_CMD, value);
5687 }
5688
5689 static void
5690 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5691                                       uint32_t me, uint32_t pipe,
5692                                       enum amdgpu_interrupt_state state)
5693 {
5694         uint32_t cp_int_cntl, cp_int_cntl_reg;
5695
5696         if (!me) {
5697                 switch (pipe) {
5698                 case 0:
5699                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
5700                         break;
5701                 case 1:
5702                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
5703                         break;
5704                 default:
5705                         DRM_DEBUG("invalid pipe %d\n", pipe);
5706                         return;
5707                 }
5708         } else {
5709                 DRM_DEBUG("invalid me %d\n", me);
5710                 return;
5711         }
5712
5713         switch (state) {
5714         case AMDGPU_IRQ_STATE_DISABLE:
5715                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5716                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5717                                             TIME_STAMP_INT_ENABLE, 0);
5718                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5719                                             GENERIC0_INT_ENABLE, 0);
5720                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5721                 break;
5722         case AMDGPU_IRQ_STATE_ENABLE:
5723                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5724                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5725                                             TIME_STAMP_INT_ENABLE, 1);
5726                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5727                                             GENERIC0_INT_ENABLE, 1);
5728                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5729                 break;
5730         default:
5731                 break;
5732         }
5733 }
5734
5735 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5736                                                      int me, int pipe,
5737                                                      enum amdgpu_interrupt_state state)
5738 {
5739         u32 mec_int_cntl, mec_int_cntl_reg;
5740
5741         /*
5742          * amdgpu controls only the first MEC. That's why this function only
5743          * handles the setting of interrupts for this specific MEC. All other
5744          * pipes' interrupts are set by amdkfd.
5745          */
5746
5747         if (me == 1) {
5748                 switch (pipe) {
5749                 case 0:
5750                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
5751                         break;
5752                 case 1:
5753                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
5754                         break;
5755                 case 2:
5756                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
5757                         break;
5758                 case 3:
5759                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
5760                         break;
5761                 default:
5762                         DRM_DEBUG("invalid pipe %d\n", pipe);
5763                         return;
5764                 }
5765         } else {
5766                 DRM_DEBUG("invalid me %d\n", me);
5767                 return;
5768         }
5769
5770         switch (state) {
5771         case AMDGPU_IRQ_STATE_DISABLE:
5772                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5773                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5774                                              TIME_STAMP_INT_ENABLE, 0);
5775                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5776                                              GENERIC0_INT_ENABLE, 0);
5777                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5778                 break;
5779         case AMDGPU_IRQ_STATE_ENABLE:
5780                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5781                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5782                                              TIME_STAMP_INT_ENABLE, 1);
5783                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5784                                              GENERIC0_INT_ENABLE, 1);
5785                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5786                 break;
5787         default:
5788                 break;
5789         }
5790 }
5791
5792 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5793                                             struct amdgpu_irq_src *src,
5794                                             unsigned type,
5795                                             enum amdgpu_interrupt_state state)
5796 {
5797         switch (type) {
5798         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5799                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
5800                 break;
5801         case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
5802                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
5803                 break;
5804         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5805                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5806                 break;
5807         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5808                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5809                 break;
5810         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5811                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5812                 break;
5813         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5814                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5815                 break;
5816         default:
5817                 break;
5818         }
5819         return 0;
5820 }
5821
5822 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
5823                              struct amdgpu_irq_src *source,
5824                              struct amdgpu_iv_entry *entry)
5825 {
5826         int i;
5827         u8 me_id, pipe_id, queue_id;
5828         struct amdgpu_ring *ring;
5829         uint32_t mes_queue_id = entry->src_data[0];
5830
5831         DRM_DEBUG("IH: CP EOP\n");
5832
5833         if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
5834                 struct amdgpu_mes_queue *queue;
5835
5836                 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
5837
5838                 spin_lock(&adev->mes.queue_id_lock);
5839                 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
5840                 if (queue) {
5841                         DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
5842                         amdgpu_fence_process(queue->ring);
5843                 }
5844                 spin_unlock(&adev->mes.queue_id_lock);
5845         } else {
5846                 me_id = (entry->ring_id & 0x0c) >> 2;
5847                 pipe_id = (entry->ring_id & 0x03) >> 0;
5848                 queue_id = (entry->ring_id & 0x70) >> 4;
5849
5850                 switch (me_id) {
5851                 case 0:
5852                         if (pipe_id == 0)
5853                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5854                         else
5855                                 amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
5856                         break;
5857                 case 1:
5858                 case 2:
5859                         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5860                                 ring = &adev->gfx.compute_ring[i];
5861                                 /* Per-queue interrupt is supported for MEC starting from VI.
5862                                  * The interrupt can only be enabled/disabled per pipe instead
5863                                  * of per queue.
5864                                  */
5865                                 if ((ring->me == me_id) &&
5866                                     (ring->pipe == pipe_id) &&
5867                                     (ring->queue == queue_id))
5868                                         amdgpu_fence_process(ring);
5869                         }
5870                         break;
5871                 }
5872         }
5873
5874         return 0;
5875 }
5876
5877 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5878                                               struct amdgpu_irq_src *source,
5879                                               unsigned type,
5880                                               enum amdgpu_interrupt_state state)
5881 {
5882         switch (state) {
5883         case AMDGPU_IRQ_STATE_DISABLE:
5884         case AMDGPU_IRQ_STATE_ENABLE:
5885                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5886                                PRIV_REG_INT_ENABLE,
5887                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5888                 break;
5889         default:
5890                 break;
5891         }
5892
5893         return 0;
5894 }
5895
5896 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5897                                                struct amdgpu_irq_src *source,
5898                                                unsigned type,
5899                                                enum amdgpu_interrupt_state state)
5900 {
5901         switch (state) {
5902         case AMDGPU_IRQ_STATE_DISABLE:
5903         case AMDGPU_IRQ_STATE_ENABLE:
5904                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5905                                PRIV_INSTR_INT_ENABLE,
5906                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5907                 break;
5908         default:
5909                 break;
5910         }
5911
5912         return 0;
5913 }
5914
5915 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
5916                                         struct amdgpu_iv_entry *entry)
5917 {
5918         u8 me_id, pipe_id, queue_id;
5919         struct amdgpu_ring *ring;
5920         int i;
5921
5922         me_id = (entry->ring_id & 0x0c) >> 2;
5923         pipe_id = (entry->ring_id & 0x03) >> 0;
5924         queue_id = (entry->ring_id & 0x70) >> 4;
5925
5926         switch (me_id) {
5927         case 0:
5928                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
5929                         ring = &adev->gfx.gfx_ring[i];
5930                         /* we only enabled 1 gfx queue per pipe for now */
5931                         if (ring->me == me_id && ring->pipe == pipe_id)
5932                                 drm_sched_fault(&ring->sched);
5933                 }
5934                 break;
5935         case 1:
5936         case 2:
5937                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5938                         ring = &adev->gfx.compute_ring[i];
5939                         if (ring->me == me_id && ring->pipe == pipe_id &&
5940                             ring->queue == queue_id)
5941                                 drm_sched_fault(&ring->sched);
5942                 }
5943                 break;
5944         default:
5945                 BUG();
5946                 break;
5947         }
5948 }
5949
5950 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
5951                                   struct amdgpu_irq_src *source,
5952                                   struct amdgpu_iv_entry *entry)
5953 {
5954         DRM_ERROR("Illegal register access in command stream\n");
5955         gfx_v11_0_handle_priv_fault(adev, entry);
5956         return 0;
5957 }
5958
5959 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
5960                                    struct amdgpu_irq_src *source,
5961                                    struct amdgpu_iv_entry *entry)
5962 {
5963         DRM_ERROR("Illegal instruction in command stream\n");
5964         gfx_v11_0_handle_priv_fault(adev, entry);
5965         return 0;
5966 }
5967
5968 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
5969                                   struct amdgpu_irq_src *source,
5970                                   struct amdgpu_iv_entry *entry)
5971 {
5972         if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
5973                 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
5974
5975         return 0;
5976 }
5977
5978 #if 0
5979 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
5980                                              struct amdgpu_irq_src *src,
5981                                              unsigned int type,
5982                                              enum amdgpu_interrupt_state state)
5983 {
5984         uint32_t tmp, target;
5985         struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
5986
5987         target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
5988         target += ring->pipe;
5989
5990         switch (type) {
5991         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
5992                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
5993                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
5994                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
5995                                             GENERIC2_INT_ENABLE, 0);
5996                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
5997
5998                         tmp = RREG32_SOC15_IP(GC, target);
5999                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6000                                             GENERIC2_INT_ENABLE, 0);
6001                         WREG32_SOC15_IP(GC, target, tmp);
6002                 } else {
6003                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6004                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6005                                             GENERIC2_INT_ENABLE, 1);
6006                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6007
6008                         tmp = RREG32_SOC15_IP(GC, target);
6009                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6010                                             GENERIC2_INT_ENABLE, 1);
6011                         WREG32_SOC15_IP(GC, target, tmp);
6012                 }
6013                 break;
6014         default:
6015                 BUG(); /* kiq only support GENERIC2_INT now */
6016                 break;
6017         }
6018         return 0;
6019 }
6020 #endif
6021
6022 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6023 {
6024         const unsigned int gcr_cntl =
6025                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6026                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6027                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6028                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6029                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6030                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6031                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6032                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6033
6034         /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6035         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6036         amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6037         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6038         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6039         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6040         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6041         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6042         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6043 }
6044
6045 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6046         .name = "gfx_v11_0",
6047         .early_init = gfx_v11_0_early_init,
6048         .late_init = gfx_v11_0_late_init,
6049         .sw_init = gfx_v11_0_sw_init,
6050         .sw_fini = gfx_v11_0_sw_fini,
6051         .hw_init = gfx_v11_0_hw_init,
6052         .hw_fini = gfx_v11_0_hw_fini,
6053         .suspend = gfx_v11_0_suspend,
6054         .resume = gfx_v11_0_resume,
6055         .is_idle = gfx_v11_0_is_idle,
6056         .wait_for_idle = gfx_v11_0_wait_for_idle,
6057         .soft_reset = gfx_v11_0_soft_reset,
6058         .check_soft_reset = gfx_v11_0_check_soft_reset,
6059         .post_soft_reset = gfx_v11_0_post_soft_reset,
6060         .set_clockgating_state = gfx_v11_0_set_clockgating_state,
6061         .set_powergating_state = gfx_v11_0_set_powergating_state,
6062         .get_clockgating_state = gfx_v11_0_get_clockgating_state,
6063 };
6064
6065 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6066         .type = AMDGPU_RING_TYPE_GFX,
6067         .align_mask = 0xff,
6068         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6069         .support_64bit_ptrs = true,
6070         .secure_submission_supported = true,
6071         .get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6072         .get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6073         .set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6074         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6075                 5 + /* COND_EXEC */
6076                 9 + /* SET_Q_PREEMPTION_MODE */
6077                 7 + /* PIPELINE_SYNC */
6078                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6079                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6080                 2 + /* VM_FLUSH */
6081                 8 + /* FENCE for VM_FLUSH */
6082                 20 + /* GDS switch */
6083                 5 + /* COND_EXEC */
6084                 7 + /* HDP_flush */
6085                 4 + /* VGT_flush */
6086                 31 + /* DE_META */
6087                 3 + /* CNTX_CTRL */
6088                 5 + /* HDP_INVL */
6089                 8 + 8 + /* FENCE x2 */
6090                 8, /* gfx_v11_0_emit_mem_sync */
6091         .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
6092         .emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6093         .emit_fence = gfx_v11_0_ring_emit_fence,
6094         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6095         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6096         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6097         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6098         .test_ring = gfx_v11_0_ring_test_ring,
6099         .test_ib = gfx_v11_0_ring_test_ib,
6100         .insert_nop = amdgpu_ring_insert_nop,
6101         .pad_ib = amdgpu_ring_generic_pad_ib,
6102         .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6103         .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
6104         .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6105         .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
6106         .preempt_ib = gfx_v11_0_ring_preempt_ib,
6107         .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6108         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6109         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6110         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6111         .soft_recovery = gfx_v11_0_ring_soft_recovery,
6112         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6113 };
6114
6115 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6116         .type = AMDGPU_RING_TYPE_COMPUTE,
6117         .align_mask = 0xff,
6118         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6119         .support_64bit_ptrs = true,
6120         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6121         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6122         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6123         .emit_frame_size =
6124                 20 + /* gfx_v11_0_ring_emit_gds_switch */
6125                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
6126                 5 + /* hdp invalidate */
6127                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6128                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6129                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6130                 2 + /* gfx_v11_0_ring_emit_vm_flush */
6131                 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6132                 8, /* gfx_v11_0_emit_mem_sync */
6133         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6134         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6135         .emit_fence = gfx_v11_0_ring_emit_fence,
6136         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6137         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6138         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6139         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6140         .test_ring = gfx_v11_0_ring_test_ring,
6141         .test_ib = gfx_v11_0_ring_test_ib,
6142         .insert_nop = amdgpu_ring_insert_nop,
6143         .pad_ib = amdgpu_ring_generic_pad_ib,
6144         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6145         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6146         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6147         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6148 };
6149
6150 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6151         .type = AMDGPU_RING_TYPE_KIQ,
6152         .align_mask = 0xff,
6153         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6154         .support_64bit_ptrs = true,
6155         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6156         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6157         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6158         .emit_frame_size =
6159                 20 + /* gfx_v11_0_ring_emit_gds_switch */
6160                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
6161                 5 + /*hdp invalidate */
6162                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6163                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6164                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6165                 2 + /* gfx_v11_0_ring_emit_vm_flush */
6166                 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6167         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6168         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6169         .emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6170         .test_ring = gfx_v11_0_ring_test_ring,
6171         .test_ib = gfx_v11_0_ring_test_ib,
6172         .insert_nop = amdgpu_ring_insert_nop,
6173         .pad_ib = amdgpu_ring_generic_pad_ib,
6174         .emit_rreg = gfx_v11_0_ring_emit_rreg,
6175         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6176         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6177         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6178 };
6179
6180 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
6181 {
6182         int i;
6183
6184         adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
6185
6186         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6187                 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
6188
6189         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6190                 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
6191 }
6192
6193 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
6194         .set = gfx_v11_0_set_eop_interrupt_state,
6195         .process = gfx_v11_0_eop_irq,
6196 };
6197
6198 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
6199         .set = gfx_v11_0_set_priv_reg_fault_state,
6200         .process = gfx_v11_0_priv_reg_irq,
6201 };
6202
6203 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
6204         .set = gfx_v11_0_set_priv_inst_fault_state,
6205         .process = gfx_v11_0_priv_inst_irq,
6206 };
6207
6208 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
6209         .process = gfx_v11_0_rlc_gc_fed_irq,
6210 };
6211
6212 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
6213 {
6214         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6215         adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
6216
6217         adev->gfx.priv_reg_irq.num_types = 1;
6218         adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
6219
6220         adev->gfx.priv_inst_irq.num_types = 1;
6221         adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
6222
6223         adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
6224         adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
6225
6226 }
6227
6228 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
6229 {
6230         if (adev->flags & AMD_IS_APU)
6231                 adev->gfx.imu.mode = MISSION_MODE;
6232         else
6233                 adev->gfx.imu.mode = DEBUG_MODE;
6234
6235         adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
6236 }
6237
6238 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
6239 {
6240         adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
6241 }
6242
6243 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
6244 {
6245         unsigned total_cu = adev->gfx.config.max_cu_per_sh *
6246                             adev->gfx.config.max_sh_per_se *
6247                             adev->gfx.config.max_shader_engines;
6248
6249         adev->gds.gds_size = 0x1000;
6250         adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
6251         adev->gds.gws_size = 64;
6252         adev->gds.oa_size = 16;
6253 }
6254
6255 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
6256 {
6257         /* set gfx eng mqd */
6258         adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
6259                 sizeof(struct v11_gfx_mqd);
6260         adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
6261                 gfx_v11_0_gfx_mqd_init;
6262         /* set compute eng mqd */
6263         adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
6264                 sizeof(struct v11_compute_mqd);
6265         adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
6266                 gfx_v11_0_compute_mqd_init;
6267 }
6268
6269 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
6270                                                           u32 bitmap)
6271 {
6272         u32 data;
6273
6274         if (!bitmap)
6275                 return;
6276
6277         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6278         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6279
6280         WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
6281 }
6282
6283 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
6284 {
6285         u32 data, wgp_bitmask;
6286         data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
6287         data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
6288
6289         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6290         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6291
6292         wgp_bitmask =
6293                 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
6294
6295         return (~data) & wgp_bitmask;
6296 }
6297
6298 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
6299 {
6300         u32 wgp_idx, wgp_active_bitmap;
6301         u32 cu_bitmap_per_wgp, cu_active_bitmap;
6302
6303         wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
6304         cu_active_bitmap = 0;
6305
6306         for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
6307                 /* if there is one WGP enabled, it means 2 CUs will be enabled */
6308                 cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
6309                 if (wgp_active_bitmap & (1 << wgp_idx))
6310                         cu_active_bitmap |= cu_bitmap_per_wgp;
6311         }
6312
6313         return cu_active_bitmap;
6314 }
6315
6316 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
6317                                  struct amdgpu_cu_info *cu_info)
6318 {
6319         int i, j, k, counter, active_cu_number = 0;
6320         u32 mask, bitmap;
6321         unsigned disable_masks[8 * 2];
6322
6323         if (!adev || !cu_info)
6324                 return -EINVAL;
6325
6326         amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
6327
6328         mutex_lock(&adev->grbm_idx_mutex);
6329         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6330                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6331                         mask = 1;
6332                         counter = 0;
6333                         gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
6334                         if (i < 8 && j < 2)
6335                                 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
6336                                         adev, disable_masks[i * 2 + j]);
6337                         bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
6338
6339                         /**
6340                          * GFX11 could support more than 4 SEs, while the bitmap
6341                          * in cu_info struct is 4x4 and ioctl interface struct
6342                          * drm_amdgpu_info_device should keep stable.
6343                          * So we use last two columns of bitmap to store cu mask for
6344                          * SEs 4 to 7, the layout of the bitmap is as below:
6345                          *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
6346                          *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
6347                          *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
6348                          *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
6349                          *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
6350                          *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
6351                          *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
6352                          *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
6353                          */
6354                         cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
6355
6356                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
6357                                 if (bitmap & mask)
6358                                         counter++;
6359
6360                                 mask <<= 1;
6361                         }
6362                         active_cu_number += counter;
6363                 }
6364         }
6365         gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6366         mutex_unlock(&adev->grbm_idx_mutex);
6367
6368         cu_info->number = active_cu_number;
6369         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6370
6371         return 0;
6372 }
6373
6374 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
6375 {
6376         .type = AMD_IP_BLOCK_TYPE_GFX,
6377         .major = 11,
6378         .minor = 0,
6379         .rev = 0,
6380         .funcs = &gfx_v11_0_ip_funcs,
6381 };
This page took 0.432659 seconds and 4 git commands to generate.