]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
Merge tag 'sched_ext-for-6.12-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v12_0.c
1 /*
2  * Copyright 2023 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v12_0.h"
34 #include "soc24.h"
35 #include "nvd.h"
36
37 #include "gc/gc_12_0_0_offset.h"
38 #include "gc/gc_12_0_0_sh_mask.h"
39 #include "soc24_enum.h"
40 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
41
42 #include "soc15.h"
43 #include "soc15d.h"
44 #include "clearstate_gfx12.h"
45 #include "v12_structs.h"
46 #include "gfx_v12_0.h"
47 #include "nbif_v6_3_1.h"
48 #include "mes_v12_0.h"
49
50 #define GFX12_NUM_GFX_RINGS     1
51 #define GFX12_MEC_HPD_SIZE      2048
52
53 #define RLCG_UCODE_LOADING_START_ADDRESS        0x00002000L
54
55 MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin");
56 MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin");
57 MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin");
58 MODULE_FIRMWARE("amdgpu/gc_12_0_0_rlc.bin");
59 MODULE_FIRMWARE("amdgpu/gc_12_0_0_toc.bin");
60 MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin");
61 MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin");
62 MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin");
63 MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin");
64 MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin");
65
66 static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
67         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
68         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
69         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
70         SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
71         SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
72         SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
73         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
74         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
75         SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
76         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
77         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
78         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
79         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
80         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
81         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
82         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
83         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
84         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
85         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
86         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
87         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
88         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
89         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
90         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
91         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
92         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
93         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
94         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
95         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
96         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
97         SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
98         SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
99         SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
100         SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
101         SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
102         SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
103         SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
104         SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
105         SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
106         SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
107         SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
108         SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32),
109         SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_HI32),
110         SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
111         SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
112         SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
113         SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
114         SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
115         SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
116         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
117         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
118         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
119         SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
120
121         /* cp header registers */
122         SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
123         SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
124         SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
125         SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
126         /* SE status registers */
127         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
128         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
129         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
130         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3)
131 };
132
133 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
134         /* compute registers */
135         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
136         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
137         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
138         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
139         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
140         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
141         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
142         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
143         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
144         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
145         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
146         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
147         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
148         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
149         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
150         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
151         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
152         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
153         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
154         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
155         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
156         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
157         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
158         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
159         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
160         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
161         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
162         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
163         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
164         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
165         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
166         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
167         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
168         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
169         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
170         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
171         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
172         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
173         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
174 };
175
176 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
177         /* gfx queue registers */
178         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
179         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
180         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
181         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
182         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
183         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
184         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
185         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
186         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
187         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
188         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
189         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
190         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
191         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
192         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
193         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
194         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
195         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
196         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
197         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
198         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
199         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
200         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
201         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
202         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
203 };
204
205 static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
206         SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f),
207         SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000),
208         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020)
209 };
210
211 static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
212         SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000),
213 };
214
215 #define DEFAULT_SH_MEM_CONFIG \
216         ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
217          (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
218          (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
219
220 static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev);
221 static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev);
222 static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev);
223 static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev);
224 static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev);
225 static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev);
226 static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
227                                  struct amdgpu_cu_info *cu_info);
228 static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev);
229 static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
230                                    u32 sh_num, u32 instance, int xcc_id);
231 static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
232
233 static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
234 static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
235                                      uint32_t val);
236 static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
237 static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
238                                            uint16_t pasid, uint32_t flush_type,
239                                            bool all_hub, uint8_t dst_sel);
240 static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
241 static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
242 static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
243                                       bool enable);
244
245 static void gfx_v12_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
246                                         uint64_t queue_mask)
247 {
248         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
249         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
250                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
251         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
252         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
253         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
254         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
255         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
256         amdgpu_ring_write(kiq_ring, 0);
257 }
258
259 static void gfx_v12_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
260                                      struct amdgpu_ring *ring)
261 {
262         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
263         uint64_t wptr_addr = ring->wptr_gpu_addr;
264         uint32_t me = 0, eng_sel = 0;
265
266         switch (ring->funcs->type) {
267         case AMDGPU_RING_TYPE_COMPUTE:
268                 me = 1;
269                 eng_sel = 0;
270                 break;
271         case AMDGPU_RING_TYPE_GFX:
272                 me = 0;
273                 eng_sel = 4;
274                 break;
275         case AMDGPU_RING_TYPE_MES:
276                 me = 2;
277                 eng_sel = 5;
278                 break;
279         default:
280                 WARN_ON(1);
281         }
282
283         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
284         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
285         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
286                           PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
287                           PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
288                           PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
289                           PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
290                           PACKET3_MAP_QUEUES_ME((me)) |
291                           PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
292                           PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
293                           PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
294                           PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
295         amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
296         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
297         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
298         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
299         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
300 }
301
302 static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
303                                        struct amdgpu_ring *ring,
304                                        enum amdgpu_unmap_queues_action action,
305                                        u64 gpu_addr, u64 seq)
306 {
307         struct amdgpu_device *adev = kiq_ring->adev;
308         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
309
310         if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
311                 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
312                 return;
313         }
314
315         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
316         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
317                           PACKET3_UNMAP_QUEUES_ACTION(action) |
318                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
319                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
320                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
321         amdgpu_ring_write(kiq_ring,
322                   PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
323
324         if (action == PREEMPT_QUEUES_NO_UNMAP) {
325                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
326                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
327                 amdgpu_ring_write(kiq_ring, seq);
328         } else {
329                 amdgpu_ring_write(kiq_ring, 0);
330                 amdgpu_ring_write(kiq_ring, 0);
331                 amdgpu_ring_write(kiq_ring, 0);
332         }
333 }
334
335 static void gfx_v12_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
336                                        struct amdgpu_ring *ring,
337                                        u64 addr, u64 seq)
338 {
339         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
340
341         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
342         amdgpu_ring_write(kiq_ring,
343                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
344                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
345                           PACKET3_QUERY_STATUS_COMMAND(2));
346         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
347                           PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
348                           PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
349         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
350         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
351         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
352         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
353 }
354
355 static void gfx_v12_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
356                                           uint16_t pasid,
357                                           uint32_t flush_type,
358                                           bool all_hub)
359 {
360         gfx_v12_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
361 }
362
363 static const struct kiq_pm4_funcs gfx_v12_0_kiq_pm4_funcs = {
364         .kiq_set_resources = gfx_v12_0_kiq_set_resources,
365         .kiq_map_queues = gfx_v12_0_kiq_map_queues,
366         .kiq_unmap_queues = gfx_v12_0_kiq_unmap_queues,
367         .kiq_query_status = gfx_v12_0_kiq_query_status,
368         .kiq_invalidate_tlbs = gfx_v12_0_kiq_invalidate_tlbs,
369         .set_resources_size = 8,
370         .map_queues_size = 7,
371         .unmap_queues_size = 6,
372         .query_status_size = 7,
373         .invalidate_tlbs_size = 2,
374 };
375
376 static void gfx_v12_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
377 {
378         adev->gfx.kiq[0].pmf = &gfx_v12_0_kiq_pm4_funcs;
379 }
380
381 static void gfx_v12_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
382                                    int mem_space, int opt, uint32_t addr0,
383                                    uint32_t addr1, uint32_t ref,
384                                    uint32_t mask, uint32_t inv)
385 {
386         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
387         amdgpu_ring_write(ring,
388                           /* memory (1) or register (0) */
389                           (WAIT_REG_MEM_MEM_SPACE(mem_space) |
390                            WAIT_REG_MEM_OPERATION(opt) | /* wait */
391                            WAIT_REG_MEM_FUNCTION(3) |  /* equal */
392                            WAIT_REG_MEM_ENGINE(eng_sel)));
393
394         if (mem_space)
395                 BUG_ON(addr0 & 0x3); /* Dword align */
396         amdgpu_ring_write(ring, addr0);
397         amdgpu_ring_write(ring, addr1);
398         amdgpu_ring_write(ring, ref);
399         amdgpu_ring_write(ring, mask);
400         amdgpu_ring_write(ring, inv); /* poll interval */
401 }
402
403 static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring)
404 {
405         struct amdgpu_device *adev = ring->adev;
406         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
407         uint32_t tmp = 0;
408         unsigned i;
409         int r;
410
411         WREG32(scratch, 0xCAFEDEAD);
412         r = amdgpu_ring_alloc(ring, 5);
413         if (r) {
414                 dev_err(adev->dev,
415                         "amdgpu: cp failed to lock ring %d (%d).\n",
416                         ring->idx, r);
417                 return r;
418         }
419
420         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
421                 gfx_v12_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
422         } else {
423                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
424                 amdgpu_ring_write(ring, scratch -
425                                   PACKET3_SET_UCONFIG_REG_START);
426                 amdgpu_ring_write(ring, 0xDEADBEEF);
427         }
428         amdgpu_ring_commit(ring);
429
430         for (i = 0; i < adev->usec_timeout; i++) {
431                 tmp = RREG32(scratch);
432                 if (tmp == 0xDEADBEEF)
433                         break;
434                 if (amdgpu_emu_mode == 1)
435                         msleep(1);
436                 else
437                         udelay(1);
438         }
439
440         if (i >= adev->usec_timeout)
441                 r = -ETIMEDOUT;
442         return r;
443 }
444
445 static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
446 {
447         struct amdgpu_device *adev = ring->adev;
448         struct amdgpu_ib ib;
449         struct dma_fence *f = NULL;
450         unsigned index;
451         uint64_t gpu_addr;
452         volatile uint32_t *cpu_ptr;
453         long r;
454
455         /* MES KIQ fw hasn't indirect buffer support for now */
456         if (adev->enable_mes_kiq &&
457             ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
458                 return 0;
459
460         memset(&ib, 0, sizeof(ib));
461
462         if (ring->is_mes_queue) {
463                 uint32_t padding, offset;
464
465                 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
466                 padding = amdgpu_mes_ctx_get_offs(ring,
467                                                   AMDGPU_MES_CTX_PADDING_OFFS);
468
469                 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
470                 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
471
472                 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
473                 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
474                 *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
475         } else {
476                 r = amdgpu_device_wb_get(adev, &index);
477                 if (r)
478                         return r;
479
480                 gpu_addr = adev->wb.gpu_addr + (index * 4);
481                 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
482                 cpu_ptr = &adev->wb.wb[index];
483
484                 r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
485                 if (r) {
486                         dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
487                         goto err1;
488                 }
489         }
490
491         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
492         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
493         ib.ptr[2] = lower_32_bits(gpu_addr);
494         ib.ptr[3] = upper_32_bits(gpu_addr);
495         ib.ptr[4] = 0xDEADBEEF;
496         ib.length_dw = 5;
497
498         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
499         if (r)
500                 goto err2;
501
502         r = dma_fence_wait_timeout(f, false, timeout);
503         if (r == 0) {
504                 r = -ETIMEDOUT;
505                 goto err2;
506         } else if (r < 0) {
507                 goto err2;
508         }
509
510         if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
511                 r = 0;
512         else
513                 r = -EINVAL;
514 err2:
515         if (!ring->is_mes_queue)
516                 amdgpu_ib_free(adev, &ib, NULL);
517         dma_fence_put(f);
518 err1:
519         if (!ring->is_mes_queue)
520                 amdgpu_device_wb_free(adev, index);
521         return r;
522 }
523
524 static void gfx_v12_0_free_microcode(struct amdgpu_device *adev)
525 {
526         amdgpu_ucode_release(&adev->gfx.pfp_fw);
527         amdgpu_ucode_release(&adev->gfx.me_fw);
528         amdgpu_ucode_release(&adev->gfx.rlc_fw);
529         amdgpu_ucode_release(&adev->gfx.mec_fw);
530
531         kfree(adev->gfx.rlc.register_list_format);
532 }
533
534 static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
535 {
536         const struct psp_firmware_header_v1_0 *toc_hdr;
537         int err = 0;
538
539         err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
540                                    "amdgpu/%s_toc.bin", ucode_prefix);
541         if (err)
542                 goto out;
543
544         toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
545         adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
546         adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
547         adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
548         adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
549                         le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
550         return 0;
551 out:
552         amdgpu_ucode_release(&adev->psp.toc_fw);
553         return err;
554 }
555
556 static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
557 {
558         char ucode_prefix[15];
559         int err;
560         const struct rlc_firmware_header_v2_0 *rlc_hdr;
561         uint16_t version_major;
562         uint16_t version_minor;
563
564         DRM_DEBUG("\n");
565
566         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
567
568         err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
569                                    "amdgpu/%s_pfp.bin", ucode_prefix);
570         if (err)
571                 goto out;
572         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
573         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
574
575         err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
576                                    "amdgpu/%s_me.bin", ucode_prefix);
577         if (err)
578                 goto out;
579         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
580         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
581
582         if (!amdgpu_sriov_vf(adev)) {
583                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
584                                            "amdgpu/%s_rlc.bin", ucode_prefix);
585                 if (err)
586                         goto out;
587                 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
588                 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
589                 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
590                 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
591                 if (err)
592                         goto out;
593         }
594
595         err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
596                                    "amdgpu/%s_mec.bin", ucode_prefix);
597         if (err)
598                 goto out;
599         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
600         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
601         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
602
603         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
604                 err = gfx_v12_0_init_toc_microcode(adev, ucode_prefix);
605
606         /* only one MEC for gfx 12 */
607         adev->gfx.mec2_fw = NULL;
608
609         if (adev->gfx.imu.funcs) {
610                 if (adev->gfx.imu.funcs->init_microcode) {
611                         err = adev->gfx.imu.funcs->init_microcode(adev);
612                         if (err)
613                                 dev_err(adev->dev, "Failed to load imu firmware!\n");
614                 }
615         }
616
617 out:
618         if (err) {
619                 amdgpu_ucode_release(&adev->gfx.pfp_fw);
620                 amdgpu_ucode_release(&adev->gfx.me_fw);
621                 amdgpu_ucode_release(&adev->gfx.rlc_fw);
622                 amdgpu_ucode_release(&adev->gfx.mec_fw);
623         }
624
625         return err;
626 }
627
628 static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev)
629 {
630         u32 count = 0;
631         const struct cs_section_def *sect = NULL;
632         const struct cs_extent_def *ext = NULL;
633
634         count += 1;
635
636         for (sect = gfx12_cs_data; sect->section != NULL; ++sect) {
637                 if (sect->id == SECT_CONTEXT) {
638                         for (ext = sect->section; ext->extent != NULL; ++ext)
639                                 count += 2 + ext->reg_count;
640                 } else
641                         return 0;
642         }
643
644         return count;
645 }
646
647 static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev,
648                                      volatile u32 *buffer)
649 {
650         u32 count = 0, clustercount = 0, i;
651         const struct cs_section_def *sect = NULL;
652         const struct cs_extent_def *ext = NULL;
653
654         if (adev->gfx.rlc.cs_data == NULL)
655                 return;
656         if (buffer == NULL)
657                 return;
658
659         count += 1;
660
661         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
662                 if (sect->id == SECT_CONTEXT) {
663                         for (ext = sect->section; ext->extent != NULL; ++ext) {
664                                 clustercount++;
665                                 buffer[count++] = ext->reg_count;
666                                 buffer[count++] = ext->reg_index;
667
668                                 for (i = 0; i < ext->reg_count; i++)
669                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
670                         }
671                 } else
672                         return;
673         }
674
675         buffer[0] = clustercount;
676 }
677
678 static void gfx_v12_0_rlc_fini(struct amdgpu_device *adev)
679 {
680         /* clear state block */
681         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
682                         &adev->gfx.rlc.clear_state_gpu_addr,
683                         (void **)&adev->gfx.rlc.cs_ptr);
684
685         /* jump table block */
686         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
687                         &adev->gfx.rlc.cp_table_gpu_addr,
688                         (void **)&adev->gfx.rlc.cp_table_ptr);
689 }
690
691 static void gfx_v12_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
692 {
693         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
694
695         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
696         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
697         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
698         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
699         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
700         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
701         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
702         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
703         adev->gfx.rlc.rlcg_reg_access_supported = true;
704 }
705
706 static int gfx_v12_0_rlc_init(struct amdgpu_device *adev)
707 {
708         const struct cs_section_def *cs_data;
709         int r;
710
711         adev->gfx.rlc.cs_data = gfx12_cs_data;
712
713         cs_data = adev->gfx.rlc.cs_data;
714
715         if (cs_data) {
716                 /* init clear state block */
717                 r = amdgpu_gfx_rlc_init_csb(adev);
718                 if (r)
719                         return r;
720         }
721
722         /* init spm vmid with 0xf */
723         if (adev->gfx.rlc.funcs->update_spm_vmid)
724                 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
725
726         return 0;
727 }
728
729 static void gfx_v12_0_mec_fini(struct amdgpu_device *adev)
730 {
731         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
732         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
733         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
734 }
735
736 static void gfx_v12_0_me_init(struct amdgpu_device *adev)
737 {
738         bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
739
740         amdgpu_gfx_graphics_queue_acquire(adev);
741 }
742
743 static int gfx_v12_0_mec_init(struct amdgpu_device *adev)
744 {
745         int r;
746         u32 *hpd;
747         size_t mec_hpd_size;
748
749         bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
750
751         /* take ownership of the relevant compute queues */
752         amdgpu_gfx_compute_queue_acquire(adev);
753         mec_hpd_size = adev->gfx.num_compute_rings * GFX12_MEC_HPD_SIZE;
754
755         if (mec_hpd_size) {
756                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
757                                               AMDGPU_GEM_DOMAIN_GTT,
758                                               &adev->gfx.mec.hpd_eop_obj,
759                                               &adev->gfx.mec.hpd_eop_gpu_addr,
760                                               (void **)&hpd);
761                 if (r) {
762                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
763                         gfx_v12_0_mec_fini(adev);
764                         return r;
765                 }
766
767                 memset(hpd, 0, mec_hpd_size);
768
769                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
770                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
771         }
772
773         return 0;
774 }
775
776 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
777 {
778         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
779                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
780                 (address << SQ_IND_INDEX__INDEX__SHIFT));
781         return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
782 }
783
784 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
785                            uint32_t thread, uint32_t regno,
786                            uint32_t num, uint32_t *out)
787 {
788         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
789                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
790                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
791                 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
792                 (SQ_IND_INDEX__AUTO_INCR_MASK));
793         while (num--)
794                 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
795 }
796
797 static void gfx_v12_0_read_wave_data(struct amdgpu_device *adev,
798                                      uint32_t xcc_id,
799                                      uint32_t simd, uint32_t wave,
800                                      uint32_t *dst, int *no_fields)
801 {
802         /* in gfx12 the SIMD_ID is specified as part of the INSTANCE
803          * field when performing a select_se_sh so it should be
804          * zero here */
805         WARN_ON(simd != 0);
806
807         /* type 4 wave data */
808         dst[(*no_fields)++] = 4;
809         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
810         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
811         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
812         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
813         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
814         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
815         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
816         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
817         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
818         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
819         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
820         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
821         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
822         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
823         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATE_PRIV);
824         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_PRIV);
825         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_USER);
826         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAP_CTRL);
827         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_ACTIVE);
828         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_VALID_AND_IDLE);
829         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_LO);
830         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_HI);
831         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_SCHED_MODE);
832 }
833
834 static void gfx_v12_0_read_wave_sgprs(struct amdgpu_device *adev,
835                                       uint32_t xcc_id, uint32_t simd,
836                                       uint32_t wave, uint32_t start,
837                                       uint32_t size, uint32_t *dst)
838 {
839         WARN_ON(simd != 0);
840
841         wave_read_regs(
842                 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
843                 dst);
844 }
845
846 static void gfx_v12_0_read_wave_vgprs(struct amdgpu_device *adev,
847                                       uint32_t xcc_id, uint32_t simd,
848                                       uint32_t wave, uint32_t thread,
849                                       uint32_t start, uint32_t size,
850                                       uint32_t *dst)
851 {
852         wave_read_regs(
853                 adev, wave, thread,
854                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
855 }
856
857 static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
858                                        u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
859 {
860         soc24_grbm_select(adev, me, pipe, q, vm);
861 }
862
863 static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
864         .get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
865         .select_se_sh = &gfx_v12_0_select_se_sh,
866         .read_wave_data = &gfx_v12_0_read_wave_data,
867         .read_wave_sgprs = &gfx_v12_0_read_wave_sgprs,
868         .read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
869         .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
870         .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
871 };
872
873 static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
874 {
875
876         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
877         case IP_VERSION(12, 0, 0):
878         case IP_VERSION(12, 0, 1):
879                 adev->gfx.config.max_hw_contexts = 8;
880                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
881                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
882                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
883                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
884                 break;
885         default:
886                 BUG();
887                 break;
888         }
889
890         return 0;
891 }
892
893 static int gfx_v12_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
894                                    int me, int pipe, int queue)
895 {
896         int r;
897         struct amdgpu_ring *ring;
898         unsigned int irq_type;
899
900         ring = &adev->gfx.gfx_ring[ring_id];
901
902         ring->me = me;
903         ring->pipe = pipe;
904         ring->queue = queue;
905
906         ring->ring_obj = NULL;
907         ring->use_doorbell = true;
908
909         if (!ring_id)
910                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
911         else
912                 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
913         ring->vm_hub = AMDGPU_GFXHUB(0);
914         sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
915
916         irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
917         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
918                              AMDGPU_RING_PRIO_DEFAULT, NULL);
919         if (r)
920                 return r;
921         return 0;
922 }
923
924 static int gfx_v12_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
925                                        int mec, int pipe, int queue)
926 {
927         int r;
928         unsigned irq_type;
929         struct amdgpu_ring *ring;
930         unsigned int hw_prio;
931
932         ring = &adev->gfx.compute_ring[ring_id];
933
934         /* mec0 is me1 */
935         ring->me = mec + 1;
936         ring->pipe = pipe;
937         ring->queue = queue;
938
939         ring->ring_obj = NULL;
940         ring->use_doorbell = true;
941         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
942         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
943                                 + (ring_id * GFX12_MEC_HPD_SIZE);
944         ring->vm_hub = AMDGPU_GFXHUB(0);
945         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
946
947         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
948                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
949                 + ring->pipe;
950         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
951                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
952         /* type-2 packets are deprecated on MEC, use type-3 instead */
953         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
954                              hw_prio, NULL);
955         if (r)
956                 return r;
957
958         return 0;
959 }
960
961 static struct {
962         SOC24_FIRMWARE_ID       id;
963         unsigned int            offset;
964         unsigned int            size;
965         unsigned int            size_x16;
966 } rlc_autoload_info[SOC24_FIRMWARE_ID_MAX];
967
968 #define RLC_TOC_OFFSET_DWUNIT   8
969 #define RLC_SIZE_MULTIPLE       1024
970 #define RLC_TOC_UMF_SIZE_inM    23ULL
971 #define RLC_TOC_FORMAT_API      165ULL
972
973 static void gfx_v12_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
974 {
975         RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc;
976
977         while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) {
978                 rlc_autoload_info[ucode->id].id = ucode->id;
979                 rlc_autoload_info[ucode->id].offset =
980                         ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4;
981                 rlc_autoload_info[ucode->id].size =
982                         ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 :
983                                           ucode->size * 4;
984                 ucode++;
985         }
986 }
987
988 static uint32_t gfx_v12_0_calc_toc_total_size(struct amdgpu_device *adev)
989 {
990         uint32_t total_size = 0;
991         SOC24_FIRMWARE_ID id;
992
993         gfx_v12_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
994
995         for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++)
996                 total_size += rlc_autoload_info[id].size;
997
998         /* In case the offset in rlc toc ucode is aligned */
999         if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset)
1000                 total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset +
1001                         rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size;
1002         if (total_size < (RLC_TOC_UMF_SIZE_inM << 20))
1003                 total_size = RLC_TOC_UMF_SIZE_inM << 20;
1004
1005         return total_size;
1006 }
1007
1008 static int gfx_v12_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1009 {
1010         int r;
1011         uint32_t total_size;
1012
1013         total_size = gfx_v12_0_calc_toc_total_size(adev);
1014
1015         r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1016                                       AMDGPU_GEM_DOMAIN_VRAM,
1017                                       &adev->gfx.rlc.rlc_autoload_bo,
1018                                       &adev->gfx.rlc.rlc_autoload_gpu_addr,
1019                                       (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1020
1021         if (r) {
1022                 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1023                 return r;
1024         }
1025
1026         return 0;
1027 }
1028
1029 static void gfx_v12_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1030                                                        SOC24_FIRMWARE_ID id,
1031                                                        const void *fw_data,
1032                                                        uint32_t fw_size)
1033 {
1034         uint32_t toc_offset;
1035         uint32_t toc_fw_size;
1036         char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1037
1038         if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX)
1039                 return;
1040
1041         toc_offset = rlc_autoload_info[id].offset;
1042         toc_fw_size = rlc_autoload_info[id].size;
1043
1044         if (fw_size == 0)
1045                 fw_size = toc_fw_size;
1046
1047         if (fw_size > toc_fw_size)
1048                 fw_size = toc_fw_size;
1049
1050         memcpy(ptr + toc_offset, fw_data, fw_size);
1051
1052         if (fw_size < toc_fw_size)
1053                 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1054 }
1055
1056 static void
1057 gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
1058 {
1059         void *data;
1060         uint32_t size;
1061         uint32_t *toc_ptr;
1062
1063         data = adev->psp.toc.start_addr;
1064         size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size;
1065
1066         toc_ptr = (uint32_t *)data + size / 4 - 2;
1067         *toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1;
1068
1069         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC,
1070                                                    data, size);
1071 }
1072
1073 static void
1074 gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
1075 {
1076         const __le32 *fw_data;
1077         uint32_t fw_size;
1078         const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1079         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1080         const struct rlc_firmware_header_v2_1 *rlcv21_hdr;
1081         const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1082         uint16_t version_major, version_minor;
1083
1084         /* pfp ucode */
1085         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1086                 adev->gfx.pfp_fw->data;
1087         /* instruction */
1088         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1089                 le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1090         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1091         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP,
1092                                                    fw_data, fw_size);
1093         /* data */
1094         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1095                 le32_to_cpu(cpv2_hdr->data_offset_bytes));
1096         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1097         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK,
1098                                                    fw_data, fw_size);
1099         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK,
1100                                                    fw_data, fw_size);
1101         /* me ucode */
1102         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1103                 adev->gfx.me_fw->data;
1104         /* instruction */
1105         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1106                 le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1107         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1108         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME,
1109                                                    fw_data, fw_size);
1110         /* data */
1111         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1112                 le32_to_cpu(cpv2_hdr->data_offset_bytes));
1113         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1114         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P0_STACK,
1115                                                    fw_data, fw_size);
1116         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P1_STACK,
1117                                                    fw_data, fw_size);
1118         /* mec ucode */
1119         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1120                 adev->gfx.mec_fw->data;
1121         /* instruction */
1122         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1123                 le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1124         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1125         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC,
1126                                                    fw_data, fw_size);
1127         /* data */
1128         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1129                 le32_to_cpu(cpv2_hdr->data_offset_bytes));
1130         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1131         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK,
1132                                                    fw_data, fw_size);
1133         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK,
1134                                                    fw_data, fw_size);
1135         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK,
1136                                                    fw_data, fw_size);
1137         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK,
1138                                                    fw_data, fw_size);
1139
1140         /* rlc ucode */
1141         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1142                 adev->gfx.rlc_fw->data;
1143         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1144                         le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1145         fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1146         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE,
1147                                                    fw_data, fw_size);
1148
1149         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1150         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1151         if (version_major == 2) {
1152                 if (version_minor >= 1) {
1153                         rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1154
1155                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1156                                         le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes));
1157                         fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes);
1158                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH,
1159                                                    fw_data, fw_size);
1160
1161                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1162                                         le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes));
1163                         fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes);
1164                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM,
1165                                                    fw_data, fw_size);
1166                 }
1167                 if (version_minor >= 2) {
1168                         rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1169
1170                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1171                                         le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1172                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1173                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE,
1174                                                    fw_data, fw_size);
1175
1176                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1177                                         le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1178                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1179                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT,
1180                                                    fw_data, fw_size);
1181                 }
1182         }
1183 }
1184
1185 static void
1186 gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
1187 {
1188         const __le32 *fw_data;
1189         uint32_t fw_size;
1190         const struct sdma_firmware_header_v3_0 *sdma_hdr;
1191
1192         sdma_hdr = (const struct sdma_firmware_header_v3_0 *)
1193                 adev->sdma.instance[0].fw->data;
1194         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1195                         le32_to_cpu(sdma_hdr->ucode_offset_bytes));
1196         fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes);
1197
1198         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0,
1199                                                    fw_data, fw_size);
1200 }
1201
1202 static void
1203 gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev)
1204 {
1205         const __le32 *fw_data;
1206         unsigned fw_size;
1207         const struct mes_firmware_header_v1_0 *mes_hdr;
1208         int pipe, ucode_id, data_id;
1209
1210         for (pipe = 0; pipe < 2; pipe++) {
1211                 if (pipe == 0) {
1212                         ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0;
1213                         data_id  = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK;
1214                 } else {
1215                         ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1;
1216                         data_id  = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK;
1217                 }
1218
1219                 mes_hdr = (const struct mes_firmware_header_v1_0 *)
1220                         adev->mes.fw[pipe]->data;
1221
1222                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1223                                 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1224                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1225
1226                 gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size);
1227
1228                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1229                                 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1230                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1231
1232                 gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size);
1233         }
1234 }
1235
1236 static int gfx_v12_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1237 {
1238         uint32_t rlc_g_offset, rlc_g_size;
1239         uint64_t gpu_addr;
1240         uint32_t data;
1241
1242         /* RLC autoload sequence 2: copy ucode */
1243         gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
1244         gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
1245         gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(adev);
1246         gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
1247
1248         rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset;
1249         rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size;
1250         gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start;
1251
1252         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1253         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1254
1255         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1256
1257         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
1258                 /* RLC autoload sequence 3: load IMU fw */
1259                 if (adev->gfx.imu.funcs->load_microcode)
1260                         adev->gfx.imu.funcs->load_microcode(adev);
1261                 /* RLC autoload sequence 4 init IMU fw */
1262                 if (adev->gfx.imu.funcs->setup_imu)
1263                         adev->gfx.imu.funcs->setup_imu(adev);
1264                 if (adev->gfx.imu.funcs->start_imu)
1265                         adev->gfx.imu.funcs->start_imu(adev);
1266
1267                 /* RLC autoload sequence 5 disable gpa mode */
1268                 gfx_v12_0_disable_gpa_mode(adev);
1269         } else {
1270                 /* unhalt rlc to start autoload without imu */
1271                 data = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
1272                 data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1);
1273                 data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1274                 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, data);
1275                 WREG32_SOC15(GC, 0, regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
1276         }
1277
1278         return 0;
1279 }
1280
1281 static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev)
1282 {
1283         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
1284         uint32_t *ptr;
1285         uint32_t inst;
1286
1287         ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
1288         if (!ptr) {
1289                 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
1290                 adev->gfx.ip_dump_core = NULL;
1291         } else {
1292                 adev->gfx.ip_dump_core = ptr;
1293         }
1294
1295         /* Allocate memory for compute queue registers for all the instances */
1296         reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
1297         inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
1298                 adev->gfx.mec.num_queue_per_pipe;
1299
1300         ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1301         if (!ptr) {
1302                 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
1303                 adev->gfx.ip_dump_compute_queues = NULL;
1304         } else {
1305                 adev->gfx.ip_dump_compute_queues = ptr;
1306         }
1307
1308         /* Allocate memory for gfx queue registers for all the instances */
1309         reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
1310         inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
1311                 adev->gfx.me.num_queue_per_pipe;
1312
1313         ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1314         if (!ptr) {
1315                 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
1316                 adev->gfx.ip_dump_gfx_queues = NULL;
1317         } else {
1318                 adev->gfx.ip_dump_gfx_queues = ptr;
1319         }
1320 }
1321
1322 static int gfx_v12_0_sw_init(void *handle)
1323 {
1324         int i, j, k, r, ring_id = 0;
1325         unsigned num_compute_rings;
1326         int xcc_id = 0;
1327         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1328
1329         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1330         case IP_VERSION(12, 0, 0):
1331         case IP_VERSION(12, 0, 1):
1332                 adev->gfx.me.num_me = 1;
1333                 adev->gfx.me.num_pipe_per_me = 1;
1334                 adev->gfx.me.num_queue_per_pipe = 1;
1335                 adev->gfx.mec.num_mec = 2;
1336                 adev->gfx.mec.num_pipe_per_mec = 2;
1337                 adev->gfx.mec.num_queue_per_pipe = 4;
1338                 break;
1339         default:
1340                 adev->gfx.me.num_me = 1;
1341                 adev->gfx.me.num_pipe_per_me = 1;
1342                 adev->gfx.me.num_queue_per_pipe = 1;
1343                 adev->gfx.mec.num_mec = 1;
1344                 adev->gfx.mec.num_pipe_per_mec = 4;
1345                 adev->gfx.mec.num_queue_per_pipe = 8;
1346                 break;
1347         }
1348
1349         /* recalculate compute rings to use based on hardware configuration */
1350         num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
1351                              adev->gfx.mec.num_queue_per_pipe) / 2;
1352         adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
1353                                           num_compute_rings);
1354
1355         /* EOP Event */
1356         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1357                               GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1358                               &adev->gfx.eop_irq);
1359         if (r)
1360                 return r;
1361
1362         /* Bad opcode Event */
1363         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1364                               GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
1365                               &adev->gfx.bad_op_irq);
1366         if (r)
1367                 return r;
1368
1369         /* Privileged reg */
1370         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1371                               GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1372                               &adev->gfx.priv_reg_irq);
1373         if (r)
1374                 return r;
1375
1376         /* Privileged inst */
1377         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1378                               GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1379                               &adev->gfx.priv_inst_irq);
1380         if (r)
1381                 return r;
1382
1383         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1384
1385         gfx_v12_0_me_init(adev);
1386
1387         r = gfx_v12_0_rlc_init(adev);
1388         if (r) {
1389                 dev_err(adev->dev, "Failed to init rlc BOs!\n");
1390                 return r;
1391         }
1392
1393         r = gfx_v12_0_mec_init(adev);
1394         if (r) {
1395                 dev_err(adev->dev, "Failed to init MEC BOs!\n");
1396                 return r;
1397         }
1398
1399         /* set up the gfx ring */
1400         for (i = 0; i < adev->gfx.me.num_me; i++) {
1401                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1402                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1403                                 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1404                                         continue;
1405
1406                                 r = gfx_v12_0_gfx_ring_init(adev, ring_id,
1407                                                             i, k, j);
1408                                 if (r)
1409                                         return r;
1410                                 ring_id++;
1411                         }
1412                 }
1413         }
1414
1415         ring_id = 0;
1416         /* set up the compute queues - allocate horizontally across pipes */
1417         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1418                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1419                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1420                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev,
1421                                                                 0, i, k, j))
1422                                         continue;
1423
1424                                 r = gfx_v12_0_compute_ring_init(adev, ring_id,
1425                                                                 i, k, j);
1426                                 if (r)
1427                                         return r;
1428
1429                                 ring_id++;
1430                         }
1431                 }
1432         }
1433
1434         if (!adev->enable_mes_kiq) {
1435                 r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0);
1436                 if (r) {
1437                         dev_err(adev->dev, "Failed to init KIQ BOs!\n");
1438                         return r;
1439                 }
1440
1441                 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
1442                 if (r)
1443                         return r;
1444         }
1445
1446         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_compute_mqd), 0);
1447         if (r)
1448                 return r;
1449
1450         /* allocate visible FB for rlc auto-loading fw */
1451         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1452                 r = gfx_v12_0_rlc_autoload_buffer_init(adev);
1453                 if (r)
1454                         return r;
1455         }
1456
1457         r = gfx_v12_0_gpu_early_init(adev);
1458         if (r)
1459                 return r;
1460
1461         gfx_v12_0_alloc_ip_dump(adev);
1462
1463         return 0;
1464 }
1465
1466 static void gfx_v12_0_pfp_fini(struct amdgpu_device *adev)
1467 {
1468         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1469                               &adev->gfx.pfp.pfp_fw_gpu_addr,
1470                               (void **)&adev->gfx.pfp.pfp_fw_ptr);
1471
1472         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1473                               &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1474                               (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1475 }
1476
1477 static void gfx_v12_0_me_fini(struct amdgpu_device *adev)
1478 {
1479         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1480                               &adev->gfx.me.me_fw_gpu_addr,
1481                               (void **)&adev->gfx.me.me_fw_ptr);
1482
1483         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1484                                &adev->gfx.me.me_fw_data_gpu_addr,
1485                                (void **)&adev->gfx.me.me_fw_data_ptr);
1486 }
1487
1488 static void gfx_v12_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1489 {
1490         amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1491                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
1492                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1493 }
1494
1495 static int gfx_v12_0_sw_fini(void *handle)
1496 {
1497         int i;
1498         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1499
1500         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1501                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1502         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1503                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1504
1505         amdgpu_gfx_mqd_sw_fini(adev, 0);
1506
1507         if (!adev->enable_mes_kiq) {
1508                 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
1509                 amdgpu_gfx_kiq_fini(adev, 0);
1510         }
1511
1512         gfx_v12_0_pfp_fini(adev);
1513         gfx_v12_0_me_fini(adev);
1514         gfx_v12_0_rlc_fini(adev);
1515         gfx_v12_0_mec_fini(adev);
1516
1517         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1518                 gfx_v12_0_rlc_autoload_buffer_fini(adev);
1519
1520         gfx_v12_0_free_microcode(adev);
1521
1522         kfree(adev->gfx.ip_dump_core);
1523         kfree(adev->gfx.ip_dump_compute_queues);
1524         kfree(adev->gfx.ip_dump_gfx_queues);
1525
1526         return 0;
1527 }
1528
1529 static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1530                                    u32 sh_num, u32 instance, int xcc_id)
1531 {
1532         u32 data;
1533
1534         if (instance == 0xffffffff)
1535                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1536                                      INSTANCE_BROADCAST_WRITES, 1);
1537         else
1538                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1539                                      instance);
1540
1541         if (se_num == 0xffffffff)
1542                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1543                                      1);
1544         else
1545                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1546
1547         if (sh_num == 0xffffffff)
1548                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1549                                      1);
1550         else
1551                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1552
1553         WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1554 }
1555
1556 static u32 gfx_v12_0_get_sa_active_bitmap(struct amdgpu_device *adev)
1557 {
1558         u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
1559
1560         gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_CC_GC_SA_UNIT_DISABLE);
1561         gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
1562                                             GRBM_CC_GC_SA_UNIT_DISABLE,
1563                                             SA_DISABLE);
1564         gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_GC_USER_SA_UNIT_DISABLE);
1565         gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
1566                                                  GRBM_GC_USER_SA_UNIT_DISABLE,
1567                                                  SA_DISABLE);
1568         sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
1569                                             adev->gfx.config.max_shader_engines);
1570
1571         return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
1572 }
1573
1574 static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1575 {
1576         u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
1577         u32 rb_mask;
1578
1579         gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1580         gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
1581                                             CC_RB_BACKEND_DISABLE,
1582                                             BACKEND_DISABLE);
1583         gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1584         gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
1585                                                  GC_USER_RB_BACKEND_DISABLE,
1586                                                  BACKEND_DISABLE);
1587         rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
1588                                             adev->gfx.config.max_shader_engines);
1589
1590         return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
1591 }
1592
1593 static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
1594 {
1595         u32 rb_bitmap_width_per_sa;
1596         u32 max_sa;
1597         u32 active_sa_bitmap;
1598         u32 global_active_rb_bitmap;
1599         u32 active_rb_bitmap = 0;
1600         u32 i;
1601
1602         /* query sa bitmap from SA_UNIT_DISABLE registers */
1603         active_sa_bitmap = gfx_v12_0_get_sa_active_bitmap(adev);
1604         /* query rb bitmap from RB_BACKEND_DISABLE registers */
1605         global_active_rb_bitmap = gfx_v12_0_get_rb_active_bitmap(adev);
1606
1607         /* generate active rb bitmap according to active sa bitmap */
1608         max_sa = adev->gfx.config.max_shader_engines *
1609                  adev->gfx.config.max_sh_per_se;
1610         rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
1611                                  adev->gfx.config.max_sh_per_se;
1612         for (i = 0; i < max_sa; i++) {
1613                 if (active_sa_bitmap & (1 << i))
1614                         active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
1615         }
1616
1617         active_rb_bitmap |= global_active_rb_bitmap;
1618         adev->gfx.config.backend_enable_mask = active_rb_bitmap;
1619         adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
1620 }
1621
1622 #define LDS_APP_BASE           0x1
1623 #define SCRATCH_APP_BASE       0x2
1624
1625 static void gfx_v12_0_init_compute_vmid(struct amdgpu_device *adev)
1626 {
1627         int i;
1628         uint32_t sh_mem_bases;
1629         uint32_t data;
1630
1631         /*
1632          * Configure apertures:
1633          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1634          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1635          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1636          */
1637         sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1638                         SCRATCH_APP_BASE;
1639
1640         mutex_lock(&adev->srbm_mutex);
1641         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1642                 soc24_grbm_select(adev, 0, 0, 0, i);
1643                 /* CP and shaders */
1644                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1645                 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1646
1647                 /* Enable trap for each kfd vmid. */
1648                 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1649                 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1650                 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
1651         }
1652         soc24_grbm_select(adev, 0, 0, 0, 0);
1653         mutex_unlock(&adev->srbm_mutex);
1654 }
1655
1656 static void gfx_v12_0_tcp_harvest(struct amdgpu_device *adev)
1657 {
1658         /* TODO: harvest feature to be added later. */
1659 }
1660
1661 static void gfx_v12_0_get_tcc_info(struct amdgpu_device *adev)
1662 {
1663 }
1664
1665 static void gfx_v12_0_constants_init(struct amdgpu_device *adev)
1666 {
1667         u32 tmp;
1668         int i;
1669
1670         if (!amdgpu_sriov_vf(adev))
1671                 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1672
1673         gfx_v12_0_setup_rb(adev);
1674         gfx_v12_0_get_cu_info(adev, &adev->gfx.cu_info);
1675         gfx_v12_0_get_tcc_info(adev);
1676         adev->gfx.config.pa_sc_tile_steering_override = 0;
1677
1678         /* XXX SH_MEM regs */
1679         /* where to put LDS, scratch, GPUVM in FSA64 space */
1680         mutex_lock(&adev->srbm_mutex);
1681         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
1682                 soc24_grbm_select(adev, 0, 0, 0, i);
1683                 /* CP and shaders */
1684                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1685                 if (i != 0) {
1686                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1687                                 (adev->gmc.private_aperture_start >> 48));
1688                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1689                                 (adev->gmc.shared_aperture_start >> 48));
1690                         WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1691                 }
1692         }
1693         soc24_grbm_select(adev, 0, 0, 0, 0);
1694
1695         mutex_unlock(&adev->srbm_mutex);
1696
1697         gfx_v12_0_init_compute_vmid(adev);
1698 }
1699
1700 static u32 gfx_v12_0_get_cpg_int_cntl(struct amdgpu_device *adev,
1701                                       int me, int pipe)
1702 {
1703         if (me != 0)
1704                 return 0;
1705
1706         switch (pipe) {
1707         case 0:
1708                 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
1709         default:
1710                 return 0;
1711         }
1712 }
1713
1714 static u32 gfx_v12_0_get_cpc_int_cntl(struct amdgpu_device *adev,
1715                                       int me, int pipe)
1716 {
1717         /*
1718          * amdgpu controls only the first MEC. That's why this function only
1719          * handles the setting of interrupts for this specific MEC. All other
1720          * pipes' interrupts are set by amdkfd.
1721          */
1722         if (me != 1)
1723                 return 0;
1724
1725         switch (pipe) {
1726         case 0:
1727                 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
1728         case 1:
1729                 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
1730         default:
1731                 return 0;
1732         }
1733 }
1734
1735 static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1736                                                bool enable)
1737 {
1738         u32 tmp, cp_int_cntl_reg;
1739         int i, j;
1740
1741         if (amdgpu_sriov_vf(adev))
1742                 return;
1743
1744         for (i = 0; i < adev->gfx.me.num_me; i++) {
1745                 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
1746                         cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
1747
1748                         if (cp_int_cntl_reg) {
1749                                 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
1750                                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1751                                                     enable ? 1 : 0);
1752                                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1753                                                     enable ? 1 : 0);
1754                                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1755                                                     enable ? 1 : 0);
1756                                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1757                                                     enable ? 1 : 0);
1758                                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
1759                         }
1760                 }
1761         }
1762 }
1763
1764 static int gfx_v12_0_init_csb(struct amdgpu_device *adev)
1765 {
1766         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1767
1768         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1769                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
1770         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1771                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1772         WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1773
1774         return 0;
1775 }
1776
1777 static void gfx_v12_0_rlc_stop(struct amdgpu_device *adev)
1778 {
1779         u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1780
1781         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1782         WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1783 }
1784
1785 static void gfx_v12_0_rlc_reset(struct amdgpu_device *adev)
1786 {
1787         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1788         udelay(50);
1789         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1790         udelay(50);
1791 }
1792
1793 static void gfx_v12_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1794                                              bool enable)
1795 {
1796         uint32_t rlc_pg_cntl;
1797
1798         rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1799
1800         if (!enable) {
1801                 /* RLC_PG_CNTL[23] = 0 (default)
1802                  * RLC will wait for handshake acks with SMU
1803                  * GFXOFF will be enabled
1804                  * RLC_PG_CNTL[23] = 1
1805                  * RLC will not issue any message to SMU
1806                  * hence no handshake between SMU & RLC
1807                  * GFXOFF will be disabled
1808                  */
1809                 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1810         } else
1811                 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1812         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1813 }
1814
1815 static void gfx_v12_0_rlc_start(struct amdgpu_device *adev)
1816 {
1817         /* TODO: enable rlc & smu handshake until smu
1818          * and gfxoff feature works as expected */
1819         if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1820                 gfx_v12_0_rlc_smu_handshake_cntl(adev, false);
1821
1822         WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1823         udelay(50);
1824 }
1825
1826 static void gfx_v12_0_rlc_enable_srm(struct amdgpu_device *adev)
1827 {
1828         uint32_t tmp;
1829
1830         /* enable Save Restore Machine */
1831         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1832         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1833         tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1834         WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1835 }
1836
1837 static void gfx_v12_0_load_rlcg_microcode(struct amdgpu_device *adev)
1838 {
1839         const struct rlc_firmware_header_v2_0 *hdr;
1840         const __le32 *fw_data;
1841         unsigned i, fw_size;
1842
1843         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1844         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1845                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1846         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1847
1848         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1849                      RLCG_UCODE_LOADING_START_ADDRESS);
1850
1851         for (i = 0; i < fw_size; i++)
1852                 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1853                              le32_to_cpup(fw_data++));
1854
1855         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1856 }
1857
1858 static void gfx_v12_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1859 {
1860         const struct rlc_firmware_header_v2_2 *hdr;
1861         const __le32 *fw_data;
1862         unsigned i, fw_size;
1863         u32 tmp;
1864
1865         hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1866
1867         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1868                         le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1869         fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1870
1871         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1872
1873         for (i = 0; i < fw_size; i++) {
1874                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1875                         msleep(1);
1876                 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1877                                 le32_to_cpup(fw_data++));
1878         }
1879
1880         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1881
1882         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1883                         le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1884         fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1885
1886         WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1887         for (i = 0; i < fw_size; i++) {
1888                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1889                         msleep(1);
1890                 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1891                                 le32_to_cpup(fw_data++));
1892         }
1893
1894         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1895
1896         tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1897         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1898         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1899         WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1900 }
1901
1902 static int gfx_v12_0_rlc_load_microcode(struct amdgpu_device *adev)
1903 {
1904         const struct rlc_firmware_header_v2_0 *hdr;
1905         uint16_t version_major;
1906         uint16_t version_minor;
1907
1908         if (!adev->gfx.rlc_fw)
1909                 return -EINVAL;
1910
1911         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1912         amdgpu_ucode_print_rlc_hdr(&hdr->header);
1913
1914         version_major = le16_to_cpu(hdr->header.header_version_major);
1915         version_minor = le16_to_cpu(hdr->header.header_version_minor);
1916
1917         if (version_major == 2) {
1918                 gfx_v12_0_load_rlcg_microcode(adev);
1919                 if (amdgpu_dpm == 1) {
1920                         if (version_minor >= 2)
1921                                 gfx_v12_0_load_rlc_iram_dram_microcode(adev);
1922                 }
1923
1924                 return 0;
1925         }
1926
1927         return -EINVAL;
1928 }
1929
1930 static int gfx_v12_0_rlc_resume(struct amdgpu_device *adev)
1931 {
1932         int r;
1933
1934         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1935                 gfx_v12_0_init_csb(adev);
1936
1937                 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
1938                         gfx_v12_0_rlc_enable_srm(adev);
1939         } else {
1940                 if (amdgpu_sriov_vf(adev)) {
1941                         gfx_v12_0_init_csb(adev);
1942                         return 0;
1943                 }
1944
1945                 adev->gfx.rlc.funcs->stop(adev);
1946
1947                 /* disable CG */
1948                 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
1949
1950                 /* disable PG */
1951                 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
1952
1953                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1954                         /* legacy rlc firmware loading */
1955                         r = gfx_v12_0_rlc_load_microcode(adev);
1956                         if (r)
1957                                 return r;
1958                 }
1959
1960                 gfx_v12_0_init_csb(adev);
1961
1962                 adev->gfx.rlc.funcs->start(adev);
1963         }
1964
1965         return 0;
1966 }
1967
1968 static void gfx_v12_0_config_gfx_rs64(struct amdgpu_device *adev)
1969 {
1970         const struct gfx_firmware_header_v2_0 *pfp_hdr;
1971         const struct gfx_firmware_header_v2_0 *me_hdr;
1972         const struct gfx_firmware_header_v2_0 *mec_hdr;
1973         uint32_t pipe_id, tmp;
1974
1975         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
1976                 adev->gfx.mec_fw->data;
1977         me_hdr = (const struct gfx_firmware_header_v2_0 *)
1978                 adev->gfx.me_fw->data;
1979         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
1980                 adev->gfx.pfp_fw->data;
1981
1982         /* config pfp program start addr */
1983         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
1984                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
1985                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
1986                         (pfp_hdr->ucode_start_addr_hi << 30) |
1987                         (pfp_hdr->ucode_start_addr_lo >> 2));
1988                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
1989                         pfp_hdr->ucode_start_addr_hi >> 2);
1990         }
1991         soc24_grbm_select(adev, 0, 0, 0, 0);
1992
1993         /* reset pfp pipe */
1994         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
1995         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
1996         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
1997         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
1998
1999         /* clear pfp pipe reset */
2000         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2001         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2002         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2003
2004         /* config me program start addr */
2005         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2006                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2007                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2008                         (me_hdr->ucode_start_addr_hi << 30) |
2009                         (me_hdr->ucode_start_addr_lo >> 2));
2010                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2011                         me_hdr->ucode_start_addr_hi>>2);
2012         }
2013         soc24_grbm_select(adev, 0, 0, 0, 0);
2014
2015         /* reset me pipe */
2016         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2017         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2018         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2019         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2020
2021         /* clear me pipe reset */
2022         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2023         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2024         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2025
2026         /* config mec program start addr */
2027         for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2028                 soc24_grbm_select(adev, 1, pipe_id, 0, 0);
2029                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2030                                         mec_hdr->ucode_start_addr_lo >> 2 |
2031                                         mec_hdr->ucode_start_addr_hi << 30);
2032                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2033                                         mec_hdr->ucode_start_addr_hi >> 2);
2034         }
2035         soc24_grbm_select(adev, 0, 0, 0, 0);
2036
2037         /* reset mec pipe */
2038         tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2039         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2040         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2041         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2042         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2043         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2044
2045         /* clear mec pipe reset */
2046         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2047         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2048         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2049         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2050         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2051 }
2052
2053 static void gfx_v12_0_set_pfp_ucode_start_addr(struct amdgpu_device *adev)
2054 {
2055         const struct gfx_firmware_header_v2_0 *cp_hdr;
2056         unsigned pipe_id, tmp;
2057
2058         cp_hdr = (const struct gfx_firmware_header_v2_0 *)
2059                 adev->gfx.pfp_fw->data;
2060         mutex_lock(&adev->srbm_mutex);
2061         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2062                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2063                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2064                              (cp_hdr->ucode_start_addr_hi << 30) |
2065                              (cp_hdr->ucode_start_addr_lo >> 2));
2066                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2067                              cp_hdr->ucode_start_addr_hi>>2);
2068
2069                 /*
2070                  * Program CP_ME_CNTL to reset given PIPE to take
2071                  * effect of CP_PFP_PRGRM_CNTR_START.
2072                  */
2073                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2074                 if (pipe_id == 0)
2075                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2076                                         PFP_PIPE0_RESET, 1);
2077                 else
2078                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2079                                         PFP_PIPE1_RESET, 1);
2080                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2081
2082                 /* Clear pfp pipe0 reset bit. */
2083                 if (pipe_id == 0)
2084                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2085                                         PFP_PIPE0_RESET, 0);
2086                 else
2087                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2088                                         PFP_PIPE1_RESET, 0);
2089                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2090         }
2091         soc24_grbm_select(adev, 0, 0, 0, 0);
2092         mutex_unlock(&adev->srbm_mutex);
2093 }
2094
2095 static void gfx_v12_0_set_me_ucode_start_addr(struct amdgpu_device *adev)
2096 {
2097         const struct gfx_firmware_header_v2_0 *cp_hdr;
2098         unsigned pipe_id, tmp;
2099
2100         cp_hdr = (const struct gfx_firmware_header_v2_0 *)
2101                 adev->gfx.me_fw->data;
2102         mutex_lock(&adev->srbm_mutex);
2103         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2104                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2105                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2106                              (cp_hdr->ucode_start_addr_hi << 30) |
2107                              (cp_hdr->ucode_start_addr_lo >> 2) );
2108                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2109                              cp_hdr->ucode_start_addr_hi>>2);
2110
2111                 /*
2112                  * Program CP_ME_CNTL to reset given PIPE to take
2113                  * effect of CP_ME_PRGRM_CNTR_START.
2114                  */
2115                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2116                 if (pipe_id == 0)
2117                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2118                                         ME_PIPE0_RESET, 1);
2119                 else
2120                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2121                                         ME_PIPE1_RESET, 1);
2122                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2123
2124                 /* Clear pfp pipe0 reset bit. */
2125                 if (pipe_id == 0)
2126                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2127                                         ME_PIPE0_RESET, 0);
2128                 else
2129                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2130                                         ME_PIPE1_RESET, 0);
2131                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2132         }
2133         soc24_grbm_select(adev, 0, 0, 0, 0);
2134         mutex_unlock(&adev->srbm_mutex);
2135 }
2136
2137 static void gfx_v12_0_set_mec_ucode_start_addr(struct amdgpu_device *adev)
2138 {
2139         const struct gfx_firmware_header_v2_0 *cp_hdr;
2140         unsigned pipe_id;
2141
2142         cp_hdr = (const struct gfx_firmware_header_v2_0 *)
2143                 adev->gfx.mec_fw->data;
2144         mutex_lock(&adev->srbm_mutex);
2145         for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) {
2146                 soc24_grbm_select(adev, 1, pipe_id, 0, 0);
2147                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2148                              cp_hdr->ucode_start_addr_lo >> 2 |
2149                              cp_hdr->ucode_start_addr_hi << 30);
2150                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2151                              cp_hdr->ucode_start_addr_hi >> 2);
2152         }
2153         soc24_grbm_select(adev, 0, 0, 0, 0);
2154         mutex_unlock(&adev->srbm_mutex);
2155 }
2156
2157 static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2158 {
2159         uint32_t cp_status;
2160         uint32_t bootload_status;
2161         int i;
2162
2163         for (i = 0; i < adev->usec_timeout; i++) {
2164                 cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2165                 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2166
2167                 if ((cp_status == 0) &&
2168                     (REG_GET_FIELD(bootload_status,
2169                         RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2170                         break;
2171                 }
2172                 udelay(1);
2173                 if (amdgpu_emu_mode)
2174                         msleep(10);
2175         }
2176
2177         if (i >= adev->usec_timeout) {
2178                 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2179                 return -ETIMEDOUT;
2180         }
2181
2182         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2183                 gfx_v12_0_set_pfp_ucode_start_addr(adev);
2184                 gfx_v12_0_set_me_ucode_start_addr(adev);
2185                 gfx_v12_0_set_mec_ucode_start_addr(adev);
2186         }
2187
2188         return 0;
2189 }
2190
2191 static int gfx_v12_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2192 {
2193         int i;
2194         u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2195
2196         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2197         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2198         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2199
2200         for (i = 0; i < adev->usec_timeout; i++) {
2201                 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2202                         break;
2203                 udelay(1);
2204         }
2205
2206         if (i >= adev->usec_timeout)
2207                 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2208
2209         return 0;
2210 }
2211
2212 static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2213 {
2214         int r;
2215         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2216         const __le32 *fw_ucode, *fw_data;
2217         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2218         uint32_t tmp;
2219         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2220
2221         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2222                 adev->gfx.pfp_fw->data;
2223
2224         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2225
2226         /* instruction */
2227         fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2228                 le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2229         fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2230         /* data */
2231         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2232                 le32_to_cpu(pfp_hdr->data_offset_bytes));
2233         fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2234
2235         /* 64kb align */
2236         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2237                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2238                                       &adev->gfx.pfp.pfp_fw_obj,
2239                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2240                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2241         if (r) {
2242                 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2243                 gfx_v12_0_pfp_fini(adev);
2244                 return r;
2245         }
2246
2247         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2248                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2249                                       &adev->gfx.pfp.pfp_fw_data_obj,
2250                                       &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2251                                       (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2252         if (r) {
2253                 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2254                 gfx_v12_0_pfp_fini(adev);
2255                 return r;
2256         }
2257
2258         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2259         memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2260
2261         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2262         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2263         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2264         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2265
2266         if (amdgpu_emu_mode == 1)
2267                 adev->hdp.funcs->flush_hdp(adev, NULL);
2268
2269         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2270                 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2271         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2272                 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2273
2274         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2275         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2276         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2277         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2278         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2279
2280         /*
2281          * Programming any of the CP_PFP_IC_BASE registers
2282          * forces invalidation of the ME L1 I$. Wait for the
2283          * invalidation complete
2284          */
2285         for (i = 0; i < usec_timeout; i++) {
2286                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2287                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2288                         INVALIDATE_CACHE_COMPLETE))
2289                         break;
2290                 udelay(1);
2291         }
2292
2293         if (i >= usec_timeout) {
2294                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2295                 return -EINVAL;
2296         }
2297
2298         /* Prime the L1 instruction caches */
2299         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2300         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2301         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2302         /* Waiting for cache primed*/
2303         for (i = 0; i < usec_timeout; i++) {
2304                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2305                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2306                         ICACHE_PRIMED))
2307                         break;
2308                 udelay(1);
2309         }
2310
2311         if (i >= usec_timeout) {
2312                 dev_err(adev->dev, "failed to prime instruction cache\n");
2313                 return -EINVAL;
2314         }
2315
2316         mutex_lock(&adev->srbm_mutex);
2317         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2318                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2319
2320                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2321                         lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2322                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2323                         upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2324         }
2325         soc24_grbm_select(adev, 0, 0, 0, 0);
2326         mutex_unlock(&adev->srbm_mutex);
2327
2328         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2329         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2330         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2331         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2332
2333         /* Invalidate the data caches */
2334         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2335         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2336         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2337
2338         for (i = 0; i < usec_timeout; i++) {
2339                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2340                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2341                         INVALIDATE_DCACHE_COMPLETE))
2342                         break;
2343                 udelay(1);
2344         }
2345
2346         if (i >= usec_timeout) {
2347                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2348                 return -EINVAL;
2349         }
2350
2351         gfx_v12_0_set_pfp_ucode_start_addr(adev);
2352
2353         return 0;
2354 }
2355
2356 static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2357 {
2358         int r;
2359         const struct gfx_firmware_header_v2_0 *me_hdr;
2360         const __le32 *fw_ucode, *fw_data;
2361         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2362         uint32_t tmp;
2363         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2364
2365         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2366                 adev->gfx.me_fw->data;
2367
2368         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2369
2370         /* instruction */
2371         fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2372                 le32_to_cpu(me_hdr->ucode_offset_bytes));
2373         fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2374         /* data */
2375         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2376                 le32_to_cpu(me_hdr->data_offset_bytes));
2377         fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2378
2379         /* 64kb align*/
2380         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2381                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2382                                       &adev->gfx.me.me_fw_obj,
2383                                       &adev->gfx.me.me_fw_gpu_addr,
2384                                       (void **)&adev->gfx.me.me_fw_ptr);
2385         if (r) {
2386                 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
2387                 gfx_v12_0_me_fini(adev);
2388                 return r;
2389         }
2390
2391         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2392                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2393                                       &adev->gfx.me.me_fw_data_obj,
2394                                       &adev->gfx.me.me_fw_data_gpu_addr,
2395                                       (void **)&adev->gfx.me.me_fw_data_ptr);
2396         if (r) {
2397                 dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
2398                 gfx_v12_0_pfp_fini(adev);
2399                 return r;
2400         }
2401
2402         memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
2403         memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
2404
2405         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2406         amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
2407         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2408         amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
2409
2410         if (amdgpu_emu_mode == 1)
2411                 adev->hdp.funcs->flush_hdp(adev, NULL);
2412
2413         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2414                 lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
2415         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2416                 upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
2417
2418         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2419         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2420         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2421         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2422         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2423
2424         /*
2425          * Programming any of the CP_ME_IC_BASE registers
2426          * forces invalidation of the ME L1 I$. Wait for the
2427          * invalidation complete
2428          */
2429         for (i = 0; i < usec_timeout; i++) {
2430                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2431                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2432                         INVALIDATE_CACHE_COMPLETE))
2433                         break;
2434                 udelay(1);
2435         }
2436
2437         if (i >= usec_timeout) {
2438                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2439                 return -EINVAL;
2440         }
2441
2442         /* Prime the instruction caches */
2443         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2444         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2445         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2446
2447         /* Waiting for instruction cache primed*/
2448         for (i = 0; i < usec_timeout; i++) {
2449                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2450                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2451                         ICACHE_PRIMED))
2452                         break;
2453                 udelay(1);
2454         }
2455
2456         if (i >= usec_timeout) {
2457                 dev_err(adev->dev, "failed to prime instruction cache\n");
2458                 return -EINVAL;
2459         }
2460
2461         mutex_lock(&adev->srbm_mutex);
2462         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2463                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2464
2465                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2466                         lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2467                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2468                         upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2469         }
2470         soc24_grbm_select(adev, 0, 0, 0, 0);
2471         mutex_unlock(&adev->srbm_mutex);
2472
2473         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2474         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2475         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2476         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2477
2478         /* Invalidate the data caches */
2479         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2480         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2481         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2482
2483         for (i = 0; i < usec_timeout; i++) {
2484                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2485                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2486                         INVALIDATE_DCACHE_COMPLETE))
2487                         break;
2488                 udelay(1);
2489         }
2490
2491         if (i >= usec_timeout) {
2492                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2493                 return -EINVAL;
2494         }
2495
2496         gfx_v12_0_set_me_ucode_start_addr(adev);
2497
2498         return 0;
2499 }
2500
2501 static int gfx_v12_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2502 {
2503         int r;
2504
2505         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
2506                 return -EINVAL;
2507
2508         gfx_v12_0_cp_gfx_enable(adev, false);
2509
2510         r = gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(adev);
2511         if (r) {
2512                 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
2513                 return r;
2514         }
2515
2516         r = gfx_v12_0_cp_gfx_load_me_microcode_rs64(adev);
2517         if (r) {
2518                 dev_err(adev->dev, "(%d) failed to load me fw\n", r);
2519                 return r;
2520         }
2521
2522         return 0;
2523 }
2524
2525 static int gfx_v12_0_cp_gfx_start(struct amdgpu_device *adev)
2526 {
2527         /* init the CP */
2528         WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
2529                      adev->gfx.config.max_hw_contexts - 1);
2530         WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
2531
2532         if (!amdgpu_async_gfx_ring)
2533                 gfx_v12_0_cp_gfx_enable(adev, true);
2534
2535         return 0;
2536 }
2537
2538 static void gfx_v12_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
2539                                          CP_PIPE_ID pipe)
2540 {
2541         u32 tmp;
2542
2543         tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
2544         tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
2545
2546         WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
2547 }
2548
2549 static void gfx_v12_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
2550                                           struct amdgpu_ring *ring)
2551 {
2552         u32 tmp;
2553
2554         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
2555         if (ring->use_doorbell) {
2556                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2557                                     DOORBELL_OFFSET, ring->doorbell_index);
2558                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2559                                     DOORBELL_EN, 1);
2560         } else {
2561                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2562                                     DOORBELL_EN, 0);
2563         }
2564         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
2565
2566         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2567                             DOORBELL_RANGE_LOWER, ring->doorbell_index);
2568         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
2569
2570         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
2571                      CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2572 }
2573
2574 static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
2575 {
2576         struct amdgpu_ring *ring;
2577         u32 tmp;
2578         u32 rb_bufsz;
2579         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2580         u32 i;
2581
2582         /* Set the write pointer delay */
2583         WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
2584
2585         /* set the RB to use vmid 0 */
2586         WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
2587
2588         /* Init gfx ring 0 for pipe 0 */
2589         mutex_lock(&adev->srbm_mutex);
2590         gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
2591
2592         /* Set ring buffer size */
2593         ring = &adev->gfx.gfx_ring[0];
2594         rb_bufsz = order_base_2(ring->ring_size / 8);
2595         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2596         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2597         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
2598
2599         /* Initialize the ring buffer's write pointers */
2600         ring->wptr = 0;
2601         WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
2602         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2603
2604         /* set the wb address wether it's enabled or not */
2605         rptr_addr = ring->rptr_gpu_addr;
2606         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2607         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
2608                      CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2609
2610         wptr_gpu_addr = ring->wptr_gpu_addr;
2611         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
2612                      lower_32_bits(wptr_gpu_addr));
2613         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
2614                      upper_32_bits(wptr_gpu_addr));
2615
2616         mdelay(1);
2617         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
2618
2619         rb_addr = ring->gpu_addr >> 8;
2620         WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
2621         WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2622
2623         WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
2624
2625         gfx_v12_0_cp_gfx_set_doorbell(adev, ring);
2626         mutex_unlock(&adev->srbm_mutex);
2627
2628         /* Switch to pipe 0 */
2629         mutex_lock(&adev->srbm_mutex);
2630         gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
2631         mutex_unlock(&adev->srbm_mutex);
2632
2633         /* start the ring */
2634         gfx_v12_0_cp_gfx_start(adev);
2635
2636         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2637                 ring = &adev->gfx.gfx_ring[i];
2638                 ring->sched.ready = true;
2639         }
2640
2641         return 0;
2642 }
2643
2644 static void gfx_v12_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2645 {
2646         u32 data;
2647
2648         data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2649         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
2650                                                  enable ? 0 : 1);
2651         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
2652                                                  enable ? 0 : 1);
2653         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
2654                                                  enable ? 0 : 1);
2655         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
2656                                                  enable ? 0 : 1);
2657         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
2658                                                  enable ? 0 : 1);
2659         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
2660                                                  enable ? 1 : 0);
2661         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
2662                                                  enable ? 1 : 0);
2663         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
2664                                                  enable ? 1 : 0);
2665         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
2666                                                  enable ? 1 : 0);
2667         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
2668                                                  enable ? 0 : 1);
2669         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
2670
2671         adev->gfx.kiq[0].ring.sched.ready = enable;
2672
2673         udelay(50);
2674 }
2675
2676 static int gfx_v12_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
2677 {
2678         const struct gfx_firmware_header_v2_0 *mec_hdr;
2679         const __le32 *fw_ucode, *fw_data;
2680         u32 tmp, fw_ucode_size, fw_data_size;
2681         u32 i, usec_timeout = 50000; /* Wait for 50 ms */
2682         u32 *fw_ucode_ptr, *fw_data_ptr;
2683         int r;
2684
2685         if (!adev->gfx.mec_fw)
2686                 return -EINVAL;
2687
2688         gfx_v12_0_cp_compute_enable(adev, false);
2689
2690         mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
2691         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2692
2693         fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
2694                                 le32_to_cpu(mec_hdr->ucode_offset_bytes));
2695         fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
2696
2697         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
2698                                 le32_to_cpu(mec_hdr->data_offset_bytes));
2699         fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
2700
2701         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2702                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2703                                       &adev->gfx.mec.mec_fw_obj,
2704                                       &adev->gfx.mec.mec_fw_gpu_addr,
2705                                       (void **)&fw_ucode_ptr);
2706         if (r) {
2707                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
2708                 gfx_v12_0_mec_fini(adev);
2709                 return r;
2710         }
2711
2712         r = amdgpu_bo_create_reserved(adev,
2713                                       ALIGN(fw_data_size, 64 * 1024) *
2714                                       adev->gfx.mec.num_pipe_per_mec,
2715                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2716                                       &adev->gfx.mec.mec_fw_data_obj,
2717                                       &adev->gfx.mec.mec_fw_data_gpu_addr,
2718                                       (void **)&fw_data_ptr);
2719         if (r) {
2720                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
2721                 gfx_v12_0_mec_fini(adev);
2722                 return r;
2723         }
2724
2725         memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
2726         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2727                 memcpy(fw_data_ptr + i * ALIGN(fw_data_size, 64 * 1024) / 4, fw_data, fw_data_size);
2728         }
2729
2730         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2731         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
2732         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2733         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
2734
2735         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2736         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2737         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2738         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2739         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2740
2741         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2742         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2743         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2744         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2745
2746         mutex_lock(&adev->srbm_mutex);
2747         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2748                 soc24_grbm_select(adev, 1, i, 0, 0);
2749
2750                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO,
2751                              lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
2752                                            i * ALIGN(fw_data_size, 64 * 1024)));
2753                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2754                              upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
2755                                            i * ALIGN(fw_data_size, 64 * 1024)));
2756
2757                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2758                              lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2759                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2760                              upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2761         }
2762         mutex_unlock(&adev->srbm_mutex);
2763         soc24_grbm_select(adev, 0, 0, 0, 0);
2764
2765         /* Trigger an invalidation of the L1 instruction caches */
2766         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2767         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2768         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2769
2770         /* Wait for invalidation complete */
2771         for (i = 0; i < usec_timeout; i++) {
2772                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2773                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2774                                        INVALIDATE_DCACHE_COMPLETE))
2775                         break;
2776                 udelay(1);
2777         }
2778
2779         if (i >= usec_timeout) {
2780                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2781                 return -EINVAL;
2782         }
2783
2784         /* Trigger an invalidation of the L1 instruction caches */
2785         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2786         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2787         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2788
2789         /* Wait for invalidation complete */
2790         for (i = 0; i < usec_timeout; i++) {
2791                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2792                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2793                                        INVALIDATE_CACHE_COMPLETE))
2794                         break;
2795                 udelay(1);
2796         }
2797
2798         if (i >= usec_timeout) {
2799                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2800                 return -EINVAL;
2801         }
2802
2803         gfx_v12_0_set_mec_ucode_start_addr(adev);
2804
2805         return 0;
2806 }
2807
2808 static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring)
2809 {
2810         uint32_t tmp;
2811         struct amdgpu_device *adev = ring->adev;
2812
2813         /* tell RLC which is KIQ queue */
2814         tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
2815         tmp &= 0xffffff00;
2816         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2817         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
2818         tmp |= 0x80;
2819         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
2820 }
2821
2822 static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev)
2823 {
2824         /* set graphics engine doorbell range */
2825         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
2826                      (adev->doorbell_index.gfx_ring0 * 2) << 2);
2827         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
2828                      (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
2829
2830         /* set compute engine doorbell range */
2831         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
2832                      (adev->doorbell_index.kiq * 2) << 2);
2833         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
2834                      (adev->doorbell_index.userqueue_end * 2) << 2);
2835 }
2836
2837 static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
2838                                   struct amdgpu_mqd_prop *prop)
2839 {
2840         struct v12_gfx_mqd *mqd = m;
2841         uint64_t hqd_gpu_addr, wb_gpu_addr;
2842         uint32_t tmp;
2843         uint32_t rb_bufsz;
2844
2845         /* set up gfx hqd wptr */
2846         mqd->cp_gfx_hqd_wptr = 0;
2847         mqd->cp_gfx_hqd_wptr_hi = 0;
2848
2849         /* set the pointer to the MQD */
2850         mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
2851         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
2852
2853         /* set up mqd control */
2854         tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
2855         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
2856         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
2857         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
2858         mqd->cp_gfx_mqd_control = tmp;
2859
2860         /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
2861         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
2862         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
2863         mqd->cp_gfx_hqd_vmid = 0;
2864
2865         /* set up default queue priority level
2866          * 0x0 = low priority, 0x1 = high priority */
2867         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
2868         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
2869         mqd->cp_gfx_hqd_queue_priority = tmp;
2870
2871         /* set up time quantum */
2872         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
2873         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
2874         mqd->cp_gfx_hqd_quantum = tmp;
2875
2876         /* set up gfx hqd base. this is similar as CP_RB_BASE */
2877         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
2878         mqd->cp_gfx_hqd_base = hqd_gpu_addr;
2879         mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
2880
2881         /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
2882         wb_gpu_addr = prop->rptr_gpu_addr;
2883         mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
2884         mqd->cp_gfx_hqd_rptr_addr_hi =
2885                 upper_32_bits(wb_gpu_addr) & 0xffff;
2886
2887         /* set up rb_wptr_poll addr */
2888         wb_gpu_addr = prop->wptr_gpu_addr;
2889         mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2890         mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2891
2892         /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
2893         rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
2894         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
2895         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
2896         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
2897 #ifdef __BIG_ENDIAN
2898         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
2899 #endif
2900         mqd->cp_gfx_hqd_cntl = tmp;
2901
2902         /* set up cp_doorbell_control */
2903         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
2904         if (prop->use_doorbell) {
2905                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2906                                     DOORBELL_OFFSET, prop->doorbell_index);
2907                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2908                                     DOORBELL_EN, 1);
2909         } else
2910                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2911                                     DOORBELL_EN, 0);
2912         mqd->cp_rb_doorbell_control = tmp;
2913
2914         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2915         mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
2916
2917         /* active the queue */
2918         mqd->cp_gfx_hqd_active = 1;
2919
2920         return 0;
2921 }
2922
2923 static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
2924 {
2925         struct amdgpu_device *adev = ring->adev;
2926         struct v12_gfx_mqd *mqd = ring->mqd_ptr;
2927         int mqd_idx = ring - &adev->gfx.gfx_ring[0];
2928
2929         if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
2930                 memset((void *)mqd, 0, sizeof(*mqd));
2931                 mutex_lock(&adev->srbm_mutex);
2932                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2933                 amdgpu_ring_init_mqd(ring);
2934                 soc24_grbm_select(adev, 0, 0, 0, 0);
2935                 mutex_unlock(&adev->srbm_mutex);
2936                 if (adev->gfx.me.mqd_backup[mqd_idx])
2937                         memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
2938         } else {
2939                 /* restore mqd with the backup copy */
2940                 if (adev->gfx.me.mqd_backup[mqd_idx])
2941                         memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
2942                 /* reset the ring */
2943                 ring->wptr = 0;
2944                 *ring->wptr_cpu_addr = 0;
2945                 amdgpu_ring_clear_ring(ring);
2946         }
2947
2948         return 0;
2949 }
2950
2951 static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
2952 {
2953         int r, i;
2954         struct amdgpu_ring *ring;
2955
2956         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2957                 ring = &adev->gfx.gfx_ring[i];
2958
2959                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
2960                 if (unlikely(r != 0))
2961                         goto done;
2962
2963                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2964                 if (!r) {
2965                         r = gfx_v12_0_kgq_init_queue(ring, false);
2966                         amdgpu_bo_kunmap(ring->mqd_obj);
2967                         ring->mqd_ptr = NULL;
2968                 }
2969                 amdgpu_bo_unreserve(ring->mqd_obj);
2970                 if (r)
2971                         goto done;
2972         }
2973
2974         r = amdgpu_gfx_enable_kgq(adev, 0);
2975         if (r)
2976                 goto done;
2977
2978         r = gfx_v12_0_cp_gfx_start(adev);
2979         if (r)
2980                 goto done;
2981
2982         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2983                 ring = &adev->gfx.gfx_ring[i];
2984                 ring->sched.ready = true;
2985         }
2986 done:
2987         return r;
2988 }
2989
2990 static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
2991                                       struct amdgpu_mqd_prop *prop)
2992 {
2993         struct v12_compute_mqd *mqd = m;
2994         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2995         uint32_t tmp;
2996
2997         mqd->header = 0xC0310800;
2998         mqd->compute_pipelinestat_enable = 0x00000001;
2999         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3000         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3001         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3002         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3003         mqd->compute_misc_reserved = 0x00000007;
3004
3005         eop_base_addr = prop->eop_gpu_addr >> 8;
3006         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3007         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3008
3009         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3010         tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3011         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3012                         (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1));
3013
3014         mqd->cp_hqd_eop_control = tmp;
3015
3016         /* enable doorbell? */
3017         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3018
3019         if (prop->use_doorbell) {
3020                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3021                                     DOORBELL_OFFSET, prop->doorbell_index);
3022                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3023                                     DOORBELL_EN, 1);
3024                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3025                                     DOORBELL_SOURCE, 0);
3026                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3027                                     DOORBELL_HIT, 0);
3028         } else {
3029                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3030                                     DOORBELL_EN, 0);
3031         }
3032
3033         mqd->cp_hqd_pq_doorbell_control = tmp;
3034
3035         /* disable the queue if it's active */
3036         mqd->cp_hqd_dequeue_request = 0;
3037         mqd->cp_hqd_pq_rptr = 0;
3038         mqd->cp_hqd_pq_wptr_lo = 0;
3039         mqd->cp_hqd_pq_wptr_hi = 0;
3040
3041         /* set the pointer to the MQD */
3042         mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
3043         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3044
3045         /* set MQD vmid to 0 */
3046         tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
3047         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3048         mqd->cp_mqd_control = tmp;
3049
3050         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3051         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3052         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3053         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3054
3055         /* set up the HQD, this is similar to CP_RB0_CNTL */
3056         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3057         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3058                             (order_base_2(prop->queue_size / 4) - 1));
3059         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3060                             (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3061         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
3062         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3063         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3064         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3065         mqd->cp_hqd_pq_control = tmp;
3066
3067         /* set the wb address whether it's enabled or not */
3068         wb_gpu_addr = prop->rptr_gpu_addr;
3069         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3070         mqd->cp_hqd_pq_rptr_report_addr_hi =
3071                 upper_32_bits(wb_gpu_addr) & 0xffff;
3072
3073         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3074         wb_gpu_addr = prop->wptr_gpu_addr;
3075         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3076         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3077
3078         tmp = 0;
3079         /* enable the doorbell if requested */
3080         if (prop->use_doorbell) {
3081                 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3082                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3083                                 DOORBELL_OFFSET, prop->doorbell_index);
3084
3085                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3086                                     DOORBELL_EN, 1);
3087                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3088                                     DOORBELL_SOURCE, 0);
3089                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3090                                     DOORBELL_HIT, 0);
3091         }
3092
3093         mqd->cp_hqd_pq_doorbell_control = tmp;
3094
3095         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3096         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3097
3098         /* set the vmid for the queue */
3099         mqd->cp_hqd_vmid = 0;
3100
3101         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3102         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
3103         mqd->cp_hqd_persistent_state = tmp;
3104
3105         /* set MIN_IB_AVAIL_SIZE */
3106         tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
3107         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3108         mqd->cp_hqd_ib_control = tmp;
3109
3110         /* set static priority for a compute queue/ring */
3111         mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
3112         mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
3113
3114         mqd->cp_hqd_active = prop->hqd_active;
3115
3116         return 0;
3117 }
3118
3119 static int gfx_v12_0_kiq_init_register(struct amdgpu_ring *ring)
3120 {
3121         struct amdgpu_device *adev = ring->adev;
3122         struct v12_compute_mqd *mqd = ring->mqd_ptr;
3123         int j;
3124
3125         /* inactivate the queue */
3126         if (amdgpu_sriov_vf(adev))
3127                 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
3128
3129         /* disable wptr polling */
3130         WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3131
3132         /* write the EOP addr */
3133         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
3134                mqd->cp_hqd_eop_base_addr_lo);
3135         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
3136                mqd->cp_hqd_eop_base_addr_hi);
3137
3138         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3139         WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
3140                mqd->cp_hqd_eop_control);
3141
3142         /* enable doorbell? */
3143         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3144                mqd->cp_hqd_pq_doorbell_control);
3145
3146         /* disable the queue if it's active */
3147         if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
3148                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
3149                 for (j = 0; j < adev->usec_timeout; j++) {
3150                         if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
3151                                 break;
3152                         udelay(1);
3153                 }
3154                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
3155                        mqd->cp_hqd_dequeue_request);
3156                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
3157                        mqd->cp_hqd_pq_rptr);
3158                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3159                        mqd->cp_hqd_pq_wptr_lo);
3160                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3161                        mqd->cp_hqd_pq_wptr_hi);
3162         }
3163
3164         /* set the pointer to the MQD */
3165         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
3166                mqd->cp_mqd_base_addr_lo);
3167         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
3168                mqd->cp_mqd_base_addr_hi);
3169
3170         /* set MQD vmid to 0 */
3171         WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
3172                mqd->cp_mqd_control);
3173
3174         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3175         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
3176                mqd->cp_hqd_pq_base_lo);
3177         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
3178                mqd->cp_hqd_pq_base_hi);
3179
3180         /* set up the HQD, this is similar to CP_RB0_CNTL */
3181         WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
3182                mqd->cp_hqd_pq_control);
3183
3184         /* set the wb address whether it's enabled or not */
3185         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
3186                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3187         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3188                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3189
3190         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3191         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
3192                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3193         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3194                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3195
3196         /* enable the doorbell if requested */
3197         if (ring->use_doorbell) {
3198                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3199                         (adev->doorbell_index.kiq * 2) << 2);
3200                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3201                         (adev->doorbell_index.userqueue_end * 2) << 2);
3202         }
3203
3204         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3205                mqd->cp_hqd_pq_doorbell_control);
3206
3207         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3208         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3209                mqd->cp_hqd_pq_wptr_lo);
3210         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3211                mqd->cp_hqd_pq_wptr_hi);
3212
3213         /* set the vmid for the queue */
3214         WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
3215
3216         WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
3217                mqd->cp_hqd_persistent_state);
3218
3219         /* activate the queue */
3220         WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
3221                mqd->cp_hqd_active);
3222
3223         if (ring->use_doorbell)
3224                 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3225
3226         return 0;
3227 }
3228
3229 static int gfx_v12_0_kiq_init_queue(struct amdgpu_ring *ring)
3230 {
3231         struct amdgpu_device *adev = ring->adev;
3232         struct v12_compute_mqd *mqd = ring->mqd_ptr;
3233         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3234
3235         gfx_v12_0_kiq_setting(ring);
3236
3237         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3238                 /* reset MQD to a clean status */
3239                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3240                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
3241
3242                 /* reset ring buffer */
3243                 ring->wptr = 0;
3244                 amdgpu_ring_clear_ring(ring);
3245
3246                 mutex_lock(&adev->srbm_mutex);
3247                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3248                 gfx_v12_0_kiq_init_register(ring);
3249                 soc24_grbm_select(adev, 0, 0, 0, 0);
3250                 mutex_unlock(&adev->srbm_mutex);
3251         } else {
3252                 memset((void *)mqd, 0, sizeof(*mqd));
3253                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3254                         amdgpu_ring_clear_ring(ring);
3255                 mutex_lock(&adev->srbm_mutex);
3256                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3257                 amdgpu_ring_init_mqd(ring);
3258                 gfx_v12_0_kiq_init_register(ring);
3259                 soc24_grbm_select(adev, 0, 0, 0, 0);
3260                 mutex_unlock(&adev->srbm_mutex);
3261
3262                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3263                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3264         }
3265
3266         return 0;
3267 }
3268
3269 static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
3270 {
3271         struct amdgpu_device *adev = ring->adev;
3272         struct v12_compute_mqd *mqd = ring->mqd_ptr;
3273         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3274
3275         if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
3276                 memset((void *)mqd, 0, sizeof(*mqd));
3277                 mutex_lock(&adev->srbm_mutex);
3278                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3279                 amdgpu_ring_init_mqd(ring);
3280                 soc24_grbm_select(adev, 0, 0, 0, 0);
3281                 mutex_unlock(&adev->srbm_mutex);
3282
3283                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3284                         memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3285         } else {
3286                 /* restore MQD to a clean status */
3287                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3288                         memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
3289                 /* reset ring buffer */
3290                 ring->wptr = 0;
3291                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3292                 amdgpu_ring_clear_ring(ring);
3293         }
3294
3295         return 0;
3296 }
3297
3298 static int gfx_v12_0_kiq_resume(struct amdgpu_device *adev)
3299 {
3300         struct amdgpu_ring *ring;
3301         int r;
3302
3303         ring = &adev->gfx.kiq[0].ring;
3304
3305         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3306         if (unlikely(r != 0))
3307                 return r;
3308
3309         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3310         if (unlikely(r != 0)) {
3311                 amdgpu_bo_unreserve(ring->mqd_obj);
3312                 return r;
3313         }
3314
3315         gfx_v12_0_kiq_init_queue(ring);
3316         amdgpu_bo_kunmap(ring->mqd_obj);
3317         ring->mqd_ptr = NULL;
3318         amdgpu_bo_unreserve(ring->mqd_obj);
3319         ring->sched.ready = true;
3320         return 0;
3321 }
3322
3323 static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev)
3324 {
3325         struct amdgpu_ring *ring = NULL;
3326         int r = 0, i;
3327
3328         if (!amdgpu_async_gfx_ring)
3329                 gfx_v12_0_cp_compute_enable(adev, true);
3330
3331         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3332                 ring = &adev->gfx.compute_ring[i];
3333
3334                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3335                 if (unlikely(r != 0))
3336                         goto done;
3337                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3338                 if (!r) {
3339                         r = gfx_v12_0_kcq_init_queue(ring, false);
3340                         amdgpu_bo_kunmap(ring->mqd_obj);
3341                         ring->mqd_ptr = NULL;
3342                 }
3343                 amdgpu_bo_unreserve(ring->mqd_obj);
3344                 if (r)
3345                         goto done;
3346         }
3347
3348         r = amdgpu_gfx_enable_kcq(adev, 0);
3349 done:
3350         return r;
3351 }
3352
3353 static int gfx_v12_0_cp_resume(struct amdgpu_device *adev)
3354 {
3355         int r, i;
3356         struct amdgpu_ring *ring;
3357
3358         if (!(adev->flags & AMD_IS_APU))
3359                 gfx_v12_0_enable_gui_idle_interrupt(adev, false);
3360
3361         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
3362                 /* legacy firmware loading */
3363                 r = gfx_v12_0_cp_gfx_load_microcode(adev);
3364                 if (r)
3365                         return r;
3366
3367                 r = gfx_v12_0_cp_compute_load_microcode_rs64(adev);
3368                 if (r)
3369                         return r;
3370         }
3371
3372         gfx_v12_0_cp_set_doorbell_range(adev);
3373
3374         if (amdgpu_async_gfx_ring) {
3375                 gfx_v12_0_cp_compute_enable(adev, true);
3376                 gfx_v12_0_cp_gfx_enable(adev, true);
3377         }
3378
3379         if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
3380                 r = amdgpu_mes_kiq_hw_init(adev);
3381         else
3382                 r = gfx_v12_0_kiq_resume(adev);
3383         if (r)
3384                 return r;
3385
3386         r = gfx_v12_0_kcq_resume(adev);
3387         if (r)
3388                 return r;
3389
3390         if (!amdgpu_async_gfx_ring) {
3391                 r = gfx_v12_0_cp_gfx_resume(adev);
3392                 if (r)
3393                         return r;
3394         } else {
3395                 r = gfx_v12_0_cp_async_gfx_ring_resume(adev);
3396                 if (r)
3397                         return r;
3398         }
3399
3400         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3401                 ring = &adev->gfx.gfx_ring[i];
3402                 r = amdgpu_ring_test_helper(ring);
3403                 if (r)
3404                         return r;
3405         }
3406
3407         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3408                 ring = &adev->gfx.compute_ring[i];
3409                 r = amdgpu_ring_test_helper(ring);
3410                 if (r)
3411                         return r;
3412         }
3413
3414         return 0;
3415 }
3416
3417 static void gfx_v12_0_cp_enable(struct amdgpu_device *adev, bool enable)
3418 {
3419         gfx_v12_0_cp_gfx_enable(adev, enable);
3420         gfx_v12_0_cp_compute_enable(adev, enable);
3421 }
3422
3423 static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev)
3424 {
3425         int r;
3426         bool value;
3427
3428         r = adev->gfxhub.funcs->gart_enable(adev);
3429         if (r)
3430                 return r;
3431
3432         adev->hdp.funcs->flush_hdp(adev, NULL);
3433
3434         value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
3435                 false : true;
3436
3437         adev->gfxhub.funcs->set_fault_enable_default(adev, value);
3438         /* TODO investigate why this and the hdp flush above is needed,
3439          * are we missing a flush somewhere else? */
3440         adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
3441
3442         return 0;
3443 }
3444
3445 static int get_gb_addr_config(struct amdgpu_device *adev)
3446 {
3447         u32 gb_addr_config;
3448
3449         gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
3450         if (gb_addr_config == 0)
3451                 return -EINVAL;
3452
3453         adev->gfx.config.gb_addr_config_fields.num_pkrs =
3454                 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
3455
3456         adev->gfx.config.gb_addr_config = gb_addr_config;
3457
3458         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
3459                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3460                                       GB_ADDR_CONFIG, NUM_PIPES);
3461
3462         adev->gfx.config.max_tile_pipes =
3463                 adev->gfx.config.gb_addr_config_fields.num_pipes;
3464
3465         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
3466                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3467                                       GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
3468         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
3469                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3470                                       GB_ADDR_CONFIG, NUM_RB_PER_SE);
3471         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
3472                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3473                                       GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
3474         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
3475                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3476                                       GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
3477
3478         return 0;
3479 }
3480
3481 static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev)
3482 {
3483         uint32_t data;
3484
3485         data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
3486         data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
3487         WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
3488
3489         data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
3490         data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
3491         WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
3492 }
3493
3494 static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev)
3495 {
3496         if (amdgpu_sriov_vf(adev))
3497                 return;
3498
3499         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3500         case IP_VERSION(12, 0, 0):
3501         case IP_VERSION(12, 0, 1):
3502                 soc15_program_register_sequence(adev,
3503                                                 golden_settings_gc_12_0,
3504                                                 (const u32)ARRAY_SIZE(golden_settings_gc_12_0));
3505
3506                 if (adev->rev_id == 0)
3507                         soc15_program_register_sequence(adev,
3508                                         golden_settings_gc_12_0_rev0,
3509                                         (const u32)ARRAY_SIZE(golden_settings_gc_12_0_rev0));
3510                 break;
3511         default:
3512                 break;
3513         }
3514 }
3515
3516 static int gfx_v12_0_hw_init(void *handle)
3517 {
3518         int r;
3519         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3520
3521         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
3522                 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
3523                         /* RLC autoload sequence 1: Program rlc ram */
3524                         if (adev->gfx.imu.funcs->program_rlc_ram)
3525                                 adev->gfx.imu.funcs->program_rlc_ram(adev);
3526                 }
3527                 /* rlc autoload firmware */
3528                 r = gfx_v12_0_rlc_backdoor_autoload_enable(adev);
3529                 if (r)
3530                         return r;
3531         } else {
3532                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
3533                         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
3534                                 if (adev->gfx.imu.funcs->load_microcode)
3535                                         adev->gfx.imu.funcs->load_microcode(adev);
3536                                 if (adev->gfx.imu.funcs->setup_imu)
3537                                         adev->gfx.imu.funcs->setup_imu(adev);
3538                                 if (adev->gfx.imu.funcs->start_imu)
3539                                         adev->gfx.imu.funcs->start_imu(adev);
3540                         }
3541
3542                         /* disable gpa mode in backdoor loading */
3543                         gfx_v12_0_disable_gpa_mode(adev);
3544                 }
3545         }
3546
3547         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
3548             (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
3549                 r = gfx_v12_0_wait_for_rlc_autoload_complete(adev);
3550                 if (r) {
3551                         dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
3552                         return r;
3553                 }
3554         }
3555
3556         if (!amdgpu_emu_mode)
3557                 gfx_v12_0_init_golden_registers(adev);
3558
3559         adev->gfx.is_poweron = true;
3560
3561         if (get_gb_addr_config(adev))
3562                 DRM_WARN("Invalid gb_addr_config !\n");
3563
3564         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
3565                 gfx_v12_0_config_gfx_rs64(adev);
3566
3567         r = gfx_v12_0_gfxhub_enable(adev);
3568         if (r)
3569                 return r;
3570
3571         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT ||
3572              adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) &&
3573              (amdgpu_dpm == 1)) {
3574                 /**
3575                  * For gfx 12, rlc firmware loading relies on smu firmware is
3576                  * loaded firstly, so in direct type, it has to load smc ucode
3577                  * here before rlc.
3578                  */
3579                 r = amdgpu_pm_load_smu_firmware(adev, NULL);
3580                 if (r)
3581                         return r;
3582         }
3583
3584         gfx_v12_0_constants_init(adev);
3585
3586         if (adev->nbio.funcs->gc_doorbell_init)
3587                 adev->nbio.funcs->gc_doorbell_init(adev);
3588
3589         r = gfx_v12_0_rlc_resume(adev);
3590         if (r)
3591                 return r;
3592
3593         /*
3594          * init golden registers and rlc resume may override some registers,
3595          * reconfig them here
3596          */
3597         gfx_v12_0_tcp_harvest(adev);
3598
3599         r = gfx_v12_0_cp_resume(adev);
3600         if (r)
3601                 return r;
3602
3603         return r;
3604 }
3605
3606 static int gfx_v12_0_hw_fini(void *handle)
3607 {
3608         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3609         uint32_t tmp;
3610
3611         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3612         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3613         amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
3614
3615         if (!adev->no_hw_access) {
3616                 if (amdgpu_async_gfx_ring) {
3617                         if (amdgpu_gfx_disable_kgq(adev, 0))
3618                                 DRM_ERROR("KGQ disable failed\n");
3619                 }
3620
3621                 if (amdgpu_gfx_disable_kcq(adev, 0))
3622                         DRM_ERROR("KCQ disable failed\n");
3623
3624                 amdgpu_mes_kiq_hw_fini(adev);
3625         }
3626
3627         if (amdgpu_sriov_vf(adev)) {
3628                 gfx_v12_0_cp_gfx_enable(adev, false);
3629                 /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
3630                 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3631                 tmp &= 0xffffff00;
3632                 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3633
3634                 return 0;
3635         }
3636         gfx_v12_0_cp_enable(adev, false);
3637         gfx_v12_0_enable_gui_idle_interrupt(adev, false);
3638
3639         adev->gfxhub.funcs->gart_disable(adev);
3640
3641         adev->gfx.is_poweron = false;
3642
3643         return 0;
3644 }
3645
3646 static int gfx_v12_0_suspend(void *handle)
3647 {
3648         return gfx_v12_0_hw_fini(handle);
3649 }
3650
3651 static int gfx_v12_0_resume(void *handle)
3652 {
3653         return gfx_v12_0_hw_init(handle);
3654 }
3655
3656 static bool gfx_v12_0_is_idle(void *handle)
3657 {
3658         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3659
3660         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
3661                                 GRBM_STATUS, GUI_ACTIVE))
3662                 return false;
3663         else
3664                 return true;
3665 }
3666
3667 static int gfx_v12_0_wait_for_idle(void *handle)
3668 {
3669         unsigned i;
3670         u32 tmp;
3671         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3672
3673         for (i = 0; i < adev->usec_timeout; i++) {
3674                 /* read MC_STATUS */
3675                 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
3676                         GRBM_STATUS__GUI_ACTIVE_MASK;
3677
3678                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3679                         return 0;
3680                 udelay(1);
3681         }
3682         return -ETIMEDOUT;
3683 }
3684
3685 static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3686 {
3687         uint64_t clock = 0;
3688
3689         if (adev->smuio.funcs &&
3690             adev->smuio.funcs->get_gpu_clock_counter)
3691                 clock = adev->smuio.funcs->get_gpu_clock_counter(adev);
3692         else
3693                 dev_warn(adev->dev, "query gpu clock counter is not supported\n");
3694
3695         return clock;
3696 }
3697
3698 static int gfx_v12_0_early_init(void *handle)
3699 {
3700         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3701
3702         adev->gfx.funcs = &gfx_v12_0_gfx_funcs;
3703
3704         adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
3705         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
3706                                           AMDGPU_MAX_COMPUTE_RINGS);
3707
3708         gfx_v12_0_set_kiq_pm4_funcs(adev);
3709         gfx_v12_0_set_ring_funcs(adev);
3710         gfx_v12_0_set_irq_funcs(adev);
3711         gfx_v12_0_set_rlc_funcs(adev);
3712         gfx_v12_0_set_mqd_funcs(adev);
3713         gfx_v12_0_set_imu_funcs(adev);
3714
3715         gfx_v12_0_init_rlcg_reg_access_ctrl(adev);
3716
3717         return gfx_v12_0_init_microcode(adev);
3718 }
3719
3720 static int gfx_v12_0_late_init(void *handle)
3721 {
3722         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3723         int r;
3724
3725         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3726         if (r)
3727                 return r;
3728
3729         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3730         if (r)
3731                 return r;
3732
3733         r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
3734         if (r)
3735                 return r;
3736
3737         return 0;
3738 }
3739
3740 static bool gfx_v12_0_is_rlc_enabled(struct amdgpu_device *adev)
3741 {
3742         uint32_t rlc_cntl;
3743
3744         /* if RLC is not enabled, do nothing */
3745         rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
3746         return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
3747 }
3748
3749 static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev,
3750                                     int xcc_id)
3751 {
3752         uint32_t data;
3753         unsigned i;
3754
3755         data = RLC_SAFE_MODE__CMD_MASK;
3756         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3757
3758         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
3759
3760         /* wait for RLC_SAFE_MODE */
3761         for (i = 0; i < adev->usec_timeout; i++) {
3762                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
3763                                    RLC_SAFE_MODE, CMD))
3764                         break;
3765                 udelay(1);
3766         }
3767 }
3768
3769 static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev,
3770                                       int xcc_id)
3771 {
3772         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
3773 }
3774
3775 static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
3776                                       bool enable)
3777 {
3778         uint32_t def, data;
3779
3780         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
3781                 return;
3782
3783         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
3784
3785         if (enable)
3786                 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
3787         else
3788                 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
3789
3790         if (def != data)
3791                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
3792 }
3793
3794 static void gfx_v12_0_update_spm_vmid(struct amdgpu_device *adev,
3795                                       struct amdgpu_ring *ring,
3796                                       unsigned vmid)
3797 {
3798         u32 reg, data;
3799
3800         reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
3801         if (amdgpu_sriov_is_pp_one_vf(adev))
3802                 data = RREG32_NO_KIQ(reg);
3803         else
3804                 data = RREG32(reg);
3805
3806         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
3807         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
3808
3809         if (amdgpu_sriov_is_pp_one_vf(adev))
3810                 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
3811         else
3812                 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
3813
3814         if (ring
3815             && amdgpu_sriov_is_pp_one_vf(adev)
3816             && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
3817                 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
3818                 uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
3819                 amdgpu_ring_emit_wreg(ring, reg, data);
3820         }
3821 }
3822
3823 static const struct amdgpu_rlc_funcs gfx_v12_0_rlc_funcs = {
3824         .is_rlc_enabled = gfx_v12_0_is_rlc_enabled,
3825         .set_safe_mode = gfx_v12_0_set_safe_mode,
3826         .unset_safe_mode = gfx_v12_0_unset_safe_mode,
3827         .init = gfx_v12_0_rlc_init,
3828         .get_csb_size = gfx_v12_0_get_csb_size,
3829         .get_csb_buffer = gfx_v12_0_get_csb_buffer,
3830         .resume = gfx_v12_0_rlc_resume,
3831         .stop = gfx_v12_0_rlc_stop,
3832         .reset = gfx_v12_0_rlc_reset,
3833         .start = gfx_v12_0_rlc_start,
3834         .update_spm_vmid = gfx_v12_0_update_spm_vmid,
3835 };
3836
3837 #if 0
3838 static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable)
3839 {
3840         /* TODO */
3841 }
3842
3843 static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
3844 {
3845         /* TODO */
3846 }
3847 #endif
3848
3849 static int gfx_v12_0_set_powergating_state(void *handle,
3850                                            enum amd_powergating_state state)
3851 {
3852         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3853         bool enable = (state == AMD_PG_STATE_GATE);
3854
3855         if (amdgpu_sriov_vf(adev))
3856                 return 0;
3857
3858         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3859         case IP_VERSION(12, 0, 0):
3860         case IP_VERSION(12, 0, 1):
3861                 amdgpu_gfx_off_ctrl(adev, enable);
3862                 break;
3863         default:
3864                 break;
3865         }
3866
3867         return 0;
3868 }
3869
3870 static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
3871                                                        bool enable)
3872 {
3873         uint32_t def, data;
3874
3875         if (!(adev->cg_flags &
3876               (AMD_CG_SUPPORT_GFX_CGCG |
3877               AMD_CG_SUPPORT_GFX_CGLS |
3878               AMD_CG_SUPPORT_GFX_3D_CGCG |
3879               AMD_CG_SUPPORT_GFX_3D_CGLS)))
3880                 return;
3881
3882         if (enable) {
3883                 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
3884
3885                 /* unset CGCG override */
3886                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
3887                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
3888                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3889                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
3890                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
3891                     adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
3892                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
3893
3894                 /* update CGCG override bits */
3895                 if (def != data)
3896                         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
3897
3898                 /* enable cgcg FSM(0x0000363F) */
3899                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
3900
3901                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
3902                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
3903                         data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3904                                  RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3905                 }
3906
3907                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
3908                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
3909                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3910                                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3911                 }
3912
3913                 if (def != data)
3914                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
3915
3916                 /* Program RLC_CGCG_CGLS_CTRL_3D */
3917                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
3918
3919                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
3920                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
3921                         data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3922                                  RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
3923                 }
3924
3925                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
3926                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
3927                         data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3928                                  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
3929                 }
3930
3931                 if (def != data)
3932                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
3933
3934                 /* set IDLE_POLL_COUNT(0x00900100) */
3935                 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
3936
3937                 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
3938                 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
3939                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3940
3941                 if (def != data)
3942                         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
3943
3944                 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
3945                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
3946                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
3947                 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
3948                 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
3949                 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
3950
3951                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
3952                 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
3953                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
3954
3955                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
3956                 if (adev->sdma.num_instances > 1) {
3957                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
3958                         data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
3959                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
3960                 }
3961         } else {
3962                 /* Program RLC_CGCG_CGLS_CTRL */
3963                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
3964
3965                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
3966                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3967
3968                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3969                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3970
3971                 if (def != data)
3972                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
3973
3974                 /* Program RLC_CGCG_CGLS_CTRL_3D */
3975                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
3976
3977                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
3978                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
3979                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
3980                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
3981
3982                 if (def != data)
3983                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
3984
3985                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
3986                 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
3987                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
3988
3989                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
3990                 if (adev->sdma.num_instances > 1) {
3991                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
3992                         data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
3993                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
3994                 }
3995         }
3996 }
3997
3998 static void gfx_v12_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3999                                                        bool enable)
4000 {
4001         uint32_t data, def;
4002         if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
4003                 return;
4004
4005         /* It is disabled by HW by default */
4006         if (enable) {
4007                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4008                         /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4009                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4010
4011                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4012                                   RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4013                                   RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4014
4015                         if (def != data)
4016                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4017                 }
4018         } else {
4019                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4020                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4021
4022                         data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4023                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4024                                  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4025
4026                         if (def != data)
4027                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4028                 }
4029         }
4030 }
4031
4032 static void gfx_v12_0_update_repeater_fgcg(struct amdgpu_device *adev,
4033                                            bool enable)
4034 {
4035         uint32_t def, data;
4036
4037         if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
4038                 return;
4039
4040         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4041
4042         if (enable)
4043                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
4044                                   RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK);
4045         else
4046                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
4047                                 RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK;
4048
4049         if (def != data)
4050                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4051 }
4052
4053 static void gfx_v12_0_update_sram_fgcg(struct amdgpu_device *adev,
4054                                        bool enable)
4055 {
4056         uint32_t def, data;
4057
4058         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4059                 return;
4060
4061         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4062
4063         if (enable)
4064                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4065         else
4066                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4067
4068         if (def != data)
4069                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4070 }
4071
4072 static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4073                                             bool enable)
4074 {
4075         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4076
4077         gfx_v12_0_update_coarse_grain_clock_gating(adev, enable);
4078
4079         gfx_v12_0_update_medium_grain_clock_gating(adev, enable);
4080
4081         gfx_v12_0_update_repeater_fgcg(adev, enable);
4082
4083         gfx_v12_0_update_sram_fgcg(adev, enable);
4084
4085         gfx_v12_0_update_perf_clk(adev, enable);
4086
4087         if (adev->cg_flags &
4088             (AMD_CG_SUPPORT_GFX_MGCG |
4089              AMD_CG_SUPPORT_GFX_CGLS |
4090              AMD_CG_SUPPORT_GFX_CGCG |
4091              AMD_CG_SUPPORT_GFX_3D_CGCG |
4092              AMD_CG_SUPPORT_GFX_3D_CGLS))
4093                 gfx_v12_0_enable_gui_idle_interrupt(adev, enable);
4094
4095         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4096
4097         return 0;
4098 }
4099
4100 static int gfx_v12_0_set_clockgating_state(void *handle,
4101                                            enum amd_clockgating_state state)
4102 {
4103         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4104
4105         if (amdgpu_sriov_vf(adev))
4106                 return 0;
4107
4108         switch (adev->ip_versions[GC_HWIP][0]) {
4109         case IP_VERSION(12, 0, 0):
4110         case IP_VERSION(12, 0, 1):
4111                 gfx_v12_0_update_gfx_clock_gating(adev,
4112                                                   state == AMD_CG_STATE_GATE);
4113                 break;
4114         default:
4115                 break;
4116         }
4117
4118         return 0;
4119 }
4120
4121 static void gfx_v12_0_get_clockgating_state(void *handle, u64 *flags)
4122 {
4123         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4124         int data;
4125
4126         /* AMD_CG_SUPPORT_GFX_MGCG */
4127         data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4128         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4129                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4130
4131         /* AMD_CG_SUPPORT_REPEATER_FGCG */
4132         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
4133                 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
4134
4135         /* AMD_CG_SUPPORT_GFX_FGCG */
4136         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
4137                 *flags |= AMD_CG_SUPPORT_GFX_FGCG;
4138
4139         /* AMD_CG_SUPPORT_GFX_PERF_CLK */
4140         if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
4141                 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
4142
4143         /* AMD_CG_SUPPORT_GFX_CGCG */
4144         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4145         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4146                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4147
4148         /* AMD_CG_SUPPORT_GFX_CGLS */
4149         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4150                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4151
4152         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4153         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4154         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4155                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4156
4157         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4158         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4159                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4160 }
4161
4162 static u64 gfx_v12_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4163 {
4164         /* gfx12 is 32bit rptr*/
4165         return *(uint32_t *)ring->rptr_cpu_addr;
4166 }
4167
4168 static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4169 {
4170         struct amdgpu_device *adev = ring->adev;
4171         u64 wptr;
4172
4173         /* XXX check if swapping is necessary on BE */
4174         if (ring->use_doorbell) {
4175                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
4176         } else {
4177                 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
4178                 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
4179         }
4180
4181         return wptr;
4182 }
4183
4184 static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4185 {
4186         struct amdgpu_device *adev = ring->adev;
4187         uint32_t *wptr_saved;
4188         uint32_t *is_queue_unmap;
4189         uint64_t aggregated_db_index;
4190         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
4191         uint64_t wptr_tmp;
4192
4193         if (ring->is_mes_queue) {
4194                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
4195                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
4196                                               sizeof(uint32_t));
4197                 aggregated_db_index =
4198                         amdgpu_mes_get_aggregated_doorbell_index(adev,
4199                                                                  ring->hw_prio);
4200
4201                 wptr_tmp = ring->wptr & ring->buf_mask;
4202                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
4203                 *wptr_saved = wptr_tmp;
4204                 /* assume doorbell always being used by mes mapped queue */
4205                 if (*is_queue_unmap) {
4206                         WDOORBELL64(aggregated_db_index, wptr_tmp);
4207                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4208                 } else {
4209                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4210
4211                         if (*is_queue_unmap)
4212                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
4213                 }
4214         } else {
4215                 if (ring->use_doorbell) {
4216                         /* XXX check if swapping is necessary on BE */
4217                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
4218                                      ring->wptr);
4219                         WDOORBELL64(ring->doorbell_index, ring->wptr);
4220                 } else {
4221                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
4222                                      lower_32_bits(ring->wptr));
4223                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
4224                                      upper_32_bits(ring->wptr));
4225                 }
4226         }
4227 }
4228
4229 static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4230 {
4231         /* gfx12 hardware is 32bit rptr */
4232         return *(uint32_t *)ring->rptr_cpu_addr;
4233 }
4234
4235 static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4236 {
4237         u64 wptr;
4238
4239         /* XXX check if swapping is necessary on BE */
4240         if (ring->use_doorbell)
4241                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
4242         else
4243                 BUG();
4244         return wptr;
4245 }
4246
4247 static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4248 {
4249         struct amdgpu_device *adev = ring->adev;
4250         uint32_t *wptr_saved;
4251         uint32_t *is_queue_unmap;
4252         uint64_t aggregated_db_index;
4253         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
4254         uint64_t wptr_tmp;
4255
4256         if (ring->is_mes_queue) {
4257                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
4258                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
4259                                               sizeof(uint32_t));
4260                 aggregated_db_index =
4261                         amdgpu_mes_get_aggregated_doorbell_index(adev,
4262                                                                  ring->hw_prio);
4263
4264                 wptr_tmp = ring->wptr & ring->buf_mask;
4265                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
4266                 *wptr_saved = wptr_tmp;
4267                 /* assume doorbell always used by mes mapped queue */
4268                 if (*is_queue_unmap) {
4269                         WDOORBELL64(aggregated_db_index, wptr_tmp);
4270                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4271                 } else {
4272                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4273
4274                         if (*is_queue_unmap)
4275                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
4276                 }
4277         } else {
4278                 /* XXX check if swapping is necessary on BE */
4279                 if (ring->use_doorbell) {
4280                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
4281                                      ring->wptr);
4282                         WDOORBELL64(ring->doorbell_index, ring->wptr);
4283                 } else {
4284                         BUG(); /* only DOORBELL method supported on gfx12 now */
4285                 }
4286         }
4287 }
4288
4289 static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4290 {
4291         struct amdgpu_device *adev = ring->adev;
4292         u32 ref_and_mask, reg_mem_engine;
4293         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4294
4295         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4296                 switch (ring->me) {
4297                 case 1:
4298                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4299                         break;
4300                 case 2:
4301                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4302                         break;
4303                 default:
4304                         return;
4305                 }
4306                 reg_mem_engine = 0;
4307         } else {
4308                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4309                 reg_mem_engine = 1; /* pfp */
4310         }
4311
4312         gfx_v12_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4313                                adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4314                                adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4315                                ref_and_mask, ref_and_mask, 0x20);
4316 }
4317
4318 static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4319                                        struct amdgpu_job *job,
4320                                        struct amdgpu_ib *ib,
4321                                        uint32_t flags)
4322 {
4323         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4324         u32 header, control = 0;
4325
4326         BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
4327
4328         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4329
4330         control |= ib->length_dw | (vmid << 24);
4331
4332         if (ring->is_mes_queue)
4333                 /* inherit vmid from mqd */
4334                 control |= 0x400000;
4335
4336         amdgpu_ring_write(ring, header);
4337         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4338         amdgpu_ring_write(ring,
4339 #ifdef __BIG_ENDIAN
4340                 (2 << 0) |
4341 #endif
4342                 lower_32_bits(ib->gpu_addr));
4343         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4344         amdgpu_ring_write(ring, control);
4345 }
4346
4347 static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4348                                            struct amdgpu_job *job,
4349                                            struct amdgpu_ib *ib,
4350                                            uint32_t flags)
4351 {
4352         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4353         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4354
4355         if (ring->is_mes_queue)
4356                 /* inherit vmid from mqd */
4357                 control |= 0x40000000;
4358
4359         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4360         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4361         amdgpu_ring_write(ring,
4362 #ifdef __BIG_ENDIAN
4363                                 (2 << 0) |
4364 #endif
4365                                 lower_32_bits(ib->gpu_addr));
4366         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4367         amdgpu_ring_write(ring, control);
4368 }
4369
4370 static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4371                                      u64 seq, unsigned flags)
4372 {
4373         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4374         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4375
4376         /* RELEASE_MEM - flush caches, send int */
4377         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4378         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
4379                                  PACKET3_RELEASE_MEM_GCR_GL2_WB |
4380                                  PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
4381                                  PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4382                                  PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
4383         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
4384                                  PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
4385
4386         /*
4387          * the address should be Qword aligned if 64bit write, Dword
4388          * aligned if only send 32bit data low (discard data high)
4389          */
4390         if (write64bit)
4391                 BUG_ON(addr & 0x7);
4392         else
4393                 BUG_ON(addr & 0x3);
4394         amdgpu_ring_write(ring, lower_32_bits(addr));
4395         amdgpu_ring_write(ring, upper_32_bits(addr));
4396         amdgpu_ring_write(ring, lower_32_bits(seq));
4397         amdgpu_ring_write(ring, upper_32_bits(seq));
4398         amdgpu_ring_write(ring, ring->is_mes_queue ?
4399                          (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
4400 }
4401
4402 static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4403 {
4404         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4405         uint32_t seq = ring->fence_drv.sync_seq;
4406         uint64_t addr = ring->fence_drv.gpu_addr;
4407
4408         gfx_v12_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
4409                                upper_32_bits(addr), seq, 0xffffffff, 4);
4410 }
4411
4412 static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
4413                                    uint16_t pasid, uint32_t flush_type,
4414                                    bool all_hub, uint8_t dst_sel)
4415 {
4416         amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
4417         amdgpu_ring_write(ring,
4418                           PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
4419                           PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
4420                           PACKET3_INVALIDATE_TLBS_PASID(pasid) |
4421                           PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
4422 }
4423
4424 static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4425                                          unsigned vmid, uint64_t pd_addr)
4426 {
4427         if (ring->is_mes_queue)
4428                 gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
4429         else
4430                 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4431
4432         /* compute doesn't have PFP */
4433         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4434                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4435                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4436                 amdgpu_ring_write(ring, 0x0);
4437         }
4438 }
4439
4440 static void gfx_v12_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4441                                           u64 seq, unsigned int flags)
4442 {
4443         struct amdgpu_device *adev = ring->adev;
4444
4445         /* we only allocate 32bit for each seq wb address */
4446         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4447
4448         /* write fence seq to the "addr" */
4449         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4450         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4451                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4452         amdgpu_ring_write(ring, lower_32_bits(addr));
4453         amdgpu_ring_write(ring, upper_32_bits(addr));
4454         amdgpu_ring_write(ring, lower_32_bits(seq));
4455
4456         if (flags & AMDGPU_FENCE_FLAG_INT) {
4457                 /* set register to trigger INT */
4458                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4459                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4460                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4461                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
4462                 amdgpu_ring_write(ring, 0);
4463                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4464         }
4465 }
4466
4467 static void gfx_v12_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
4468                                          uint32_t flags)
4469 {
4470         uint32_t dw2 = 0;
4471
4472         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4473         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4474                 /* set load_global_config & load_global_uconfig */
4475                 dw2 |= 0x8001;
4476                 /* set load_cs_sh_regs */
4477                 dw2 |= 0x01000000;
4478                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4479                 dw2 |= 0x10002;
4480         }
4481
4482         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4483         amdgpu_ring_write(ring, dw2);
4484         amdgpu_ring_write(ring, 0);
4485 }
4486
4487 static unsigned gfx_v12_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
4488                                                    uint64_t addr)
4489 {
4490         unsigned ret;
4491
4492         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4493         amdgpu_ring_write(ring, lower_32_bits(addr));
4494         amdgpu_ring_write(ring, upper_32_bits(addr));
4495         /* discard following DWs if *cond_exec_gpu_addr==0 */
4496         amdgpu_ring_write(ring, 0);
4497         ret = ring->wptr & ring->buf_mask;
4498         /* patch dummy value later */
4499         amdgpu_ring_write(ring, 0);
4500
4501         return ret;
4502 }
4503
4504 static int gfx_v12_0_ring_preempt_ib(struct amdgpu_ring *ring)
4505 {
4506         int i, r = 0;
4507         struct amdgpu_device *adev = ring->adev;
4508         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4509         struct amdgpu_ring *kiq_ring = &kiq->ring;
4510         unsigned long flags;
4511
4512         if (adev->enable_mes)
4513                 return -EINVAL;
4514
4515         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4516                 return -EINVAL;
4517
4518         spin_lock_irqsave(&kiq->ring_lock, flags);
4519
4520         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
4521                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4522                 return -ENOMEM;
4523         }
4524
4525         /* assert preemption condition */
4526         amdgpu_ring_set_preempt_cond_exec(ring, false);
4527
4528         /* assert IB preemption, emit the trailing fence */
4529         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
4530                                    ring->trail_fence_gpu_addr,
4531                                    ++ring->trail_seq);
4532         amdgpu_ring_commit(kiq_ring);
4533
4534         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4535
4536         /* poll the trailing fence */
4537         for (i = 0; i < adev->usec_timeout; i++) {
4538                 if (ring->trail_seq ==
4539                     le32_to_cpu(*(ring->trail_fence_cpu_addr)))
4540                         break;
4541                 udelay(1);
4542         }
4543
4544         if (i >= adev->usec_timeout) {
4545                 r = -EINVAL;
4546                 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
4547         }
4548
4549         /* deassert preemption condition */
4550         amdgpu_ring_set_preempt_cond_exec(ring, true);
4551         return r;
4552 }
4553
4554 static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring,
4555                                            bool start,
4556                                            bool secure)
4557 {
4558         uint32_t v = secure ? FRAME_TMZ : 0;
4559
4560         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4561         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
4562 }
4563
4564 static void gfx_v12_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
4565                                      uint32_t reg_val_offs)
4566 {
4567         struct amdgpu_device *adev = ring->adev;
4568
4569         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4570         amdgpu_ring_write(ring, 0 |     /* src: register*/
4571                                 (5 << 8) |      /* dst: memory */
4572                                 (1 << 20));     /* write confirm */
4573         amdgpu_ring_write(ring, reg);
4574         amdgpu_ring_write(ring, 0);
4575         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4576                                 reg_val_offs * 4));
4577         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4578                                 reg_val_offs * 4));
4579 }
4580
4581 static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring,
4582                                      uint32_t reg,
4583                                      uint32_t val)
4584 {
4585         uint32_t cmd = 0;
4586
4587         switch (ring->funcs->type) {
4588         case AMDGPU_RING_TYPE_GFX:
4589                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4590                 break;
4591         case AMDGPU_RING_TYPE_KIQ:
4592                 cmd = (1 << 16); /* no inc addr */
4593                 break;
4594         default:
4595                 cmd = WR_CONFIRM;
4596                 break;
4597         }
4598         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4599         amdgpu_ring_write(ring, cmd);
4600         amdgpu_ring_write(ring, reg);
4601         amdgpu_ring_write(ring, 0);
4602         amdgpu_ring_write(ring, val);
4603 }
4604
4605 static void gfx_v12_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4606                                         uint32_t val, uint32_t mask)
4607 {
4608         gfx_v12_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4609 }
4610
4611 static void gfx_v12_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4612                                                    uint32_t reg0, uint32_t reg1,
4613                                                    uint32_t ref, uint32_t mask)
4614 {
4615         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4616
4617         gfx_v12_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4618                                ref, mask, 0x20);
4619 }
4620
4621 static void gfx_v12_0_ring_soft_recovery(struct amdgpu_ring *ring,
4622                                          unsigned vmid)
4623 {
4624         struct amdgpu_device *adev = ring->adev;
4625         uint32_t value = 0;
4626
4627         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4628         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4629         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4630         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4631         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4632         WREG32_SOC15(GC, 0, regSQ_CMD, value);
4633         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4634 }
4635
4636 static void
4637 gfx_v12_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4638                                       uint32_t me, uint32_t pipe,
4639                                       enum amdgpu_interrupt_state state)
4640 {
4641         uint32_t cp_int_cntl, cp_int_cntl_reg;
4642
4643         if (!me) {
4644                 switch (pipe) {
4645                 case 0:
4646                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
4647                         break;
4648                 default:
4649                         DRM_DEBUG("invalid pipe %d\n", pipe);
4650                         return;
4651                 }
4652         } else {
4653                 DRM_DEBUG("invalid me %d\n", me);
4654                 return;
4655         }
4656
4657         switch (state) {
4658         case AMDGPU_IRQ_STATE_DISABLE:
4659                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4660                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4661                                             TIME_STAMP_INT_ENABLE, 0);
4662                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4663                                             GENERIC0_INT_ENABLE, 0);
4664                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4665                 break;
4666         case AMDGPU_IRQ_STATE_ENABLE:
4667                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4668                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4669                                             TIME_STAMP_INT_ENABLE, 1);
4670                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4671                                             GENERIC0_INT_ENABLE, 1);
4672                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4673                 break;
4674         default:
4675                 break;
4676         }
4677 }
4678
4679 static void gfx_v12_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4680                                                      int me, int pipe,
4681                                                      enum amdgpu_interrupt_state state)
4682 {
4683         u32 mec_int_cntl, mec_int_cntl_reg;
4684
4685         /*
4686          * amdgpu controls only the first MEC. That's why this function only
4687          * handles the setting of interrupts for this specific MEC. All other
4688          * pipes' interrupts are set by amdkfd.
4689          */
4690
4691         if (me == 1) {
4692                 switch (pipe) {
4693                 case 0:
4694                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
4695                         break;
4696                 case 1:
4697                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
4698                         break;
4699                 default:
4700                         DRM_DEBUG("invalid pipe %d\n", pipe);
4701                         return;
4702                 }
4703         } else {
4704                 DRM_DEBUG("invalid me %d\n", me);
4705                 return;
4706         }
4707
4708         switch (state) {
4709         case AMDGPU_IRQ_STATE_DISABLE:
4710                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
4711                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4712                                              TIME_STAMP_INT_ENABLE, 0);
4713                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4714                                              GENERIC0_INT_ENABLE, 0);
4715                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
4716                 break;
4717         case AMDGPU_IRQ_STATE_ENABLE:
4718                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
4719                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4720                                              TIME_STAMP_INT_ENABLE, 1);
4721                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4722                                              GENERIC0_INT_ENABLE, 1);
4723                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
4724                 break;
4725         default:
4726                 break;
4727         }
4728 }
4729
4730 static int gfx_v12_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4731                                             struct amdgpu_irq_src *src,
4732                                             unsigned type,
4733                                             enum amdgpu_interrupt_state state)
4734 {
4735         switch (type) {
4736         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
4737                 gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
4738                 break;
4739         case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
4740                 gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
4741                 break;
4742         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4743                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4744                 break;
4745         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4746                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4747                 break;
4748         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4749                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4750                 break;
4751         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4752                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4753                 break;
4754         default:
4755                 break;
4756         }
4757         return 0;
4758 }
4759
4760 static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
4761                              struct amdgpu_irq_src *source,
4762                              struct amdgpu_iv_entry *entry)
4763 {
4764         int i;
4765         u8 me_id, pipe_id, queue_id;
4766         struct amdgpu_ring *ring;
4767         uint32_t mes_queue_id = entry->src_data[0];
4768
4769         DRM_DEBUG("IH: CP EOP\n");
4770
4771         if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
4772                 struct amdgpu_mes_queue *queue;
4773
4774                 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
4775
4776                 spin_lock(&adev->mes.queue_id_lock);
4777                 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
4778                 if (queue) {
4779                         DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
4780                         amdgpu_fence_process(queue->ring);
4781                 }
4782                 spin_unlock(&adev->mes.queue_id_lock);
4783         } else {
4784                 me_id = (entry->ring_id & 0x0c) >> 2;
4785                 pipe_id = (entry->ring_id & 0x03) >> 0;
4786                 queue_id = (entry->ring_id & 0x70) >> 4;
4787
4788                 switch (me_id) {
4789                 case 0:
4790                         if (pipe_id == 0)
4791                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4792                         else
4793                                 amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
4794                         break;
4795                 case 1:
4796                 case 2:
4797                         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4798                                 ring = &adev->gfx.compute_ring[i];
4799                                 /* Per-queue interrupt is supported for MEC starting from VI.
4800                                  * The interrupt can only be enabled/disabled per pipe instead
4801                                  * of per queue.
4802                                  */
4803                                 if ((ring->me == me_id) &&
4804                                     (ring->pipe == pipe_id) &&
4805                                     (ring->queue == queue_id))
4806                                         amdgpu_fence_process(ring);
4807                         }
4808                         break;
4809                 }
4810         }
4811
4812         return 0;
4813 }
4814
4815 static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4816                                               struct amdgpu_irq_src *source,
4817                                               unsigned int type,
4818                                               enum amdgpu_interrupt_state state)
4819 {
4820         u32 cp_int_cntl_reg, cp_int_cntl;
4821         int i, j;
4822
4823         switch (state) {
4824         case AMDGPU_IRQ_STATE_DISABLE:
4825         case AMDGPU_IRQ_STATE_ENABLE:
4826                 for (i = 0; i < adev->gfx.me.num_me; i++) {
4827                         for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
4828                                 cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
4829
4830                                 if (cp_int_cntl_reg) {
4831                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4832                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4833                                                                     PRIV_REG_INT_ENABLE,
4834                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4835                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4836                                 }
4837                         }
4838                 }
4839                 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
4840                         for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
4841                                 /* MECs start at 1 */
4842                                 cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
4843
4844                                 if (cp_int_cntl_reg) {
4845                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4846                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4847                                                                     PRIV_REG_INT_ENABLE,
4848                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4849                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4850                                 }
4851                         }
4852                 }
4853                 break;
4854         default:
4855                 break;
4856         }
4857
4858         return 0;
4859 }
4860
4861 static int gfx_v12_0_set_bad_op_fault_state(struct amdgpu_device *adev,
4862                                             struct amdgpu_irq_src *source,
4863                                             unsigned type,
4864                                             enum amdgpu_interrupt_state state)
4865 {
4866         u32 cp_int_cntl_reg, cp_int_cntl;
4867         int i, j;
4868
4869         switch (state) {
4870         case AMDGPU_IRQ_STATE_DISABLE:
4871         case AMDGPU_IRQ_STATE_ENABLE:
4872                 for (i = 0; i < adev->gfx.me.num_me; i++) {
4873                         for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
4874                                 cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
4875
4876                                 if (cp_int_cntl_reg) {
4877                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4878                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4879                                                                     OPCODE_ERROR_INT_ENABLE,
4880                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4881                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4882                                 }
4883                         }
4884                 }
4885                 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
4886                         for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
4887                                 /* MECs start at 1 */
4888                                 cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
4889
4890                                 if (cp_int_cntl_reg) {
4891                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4892                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4893                                                                     OPCODE_ERROR_INT_ENABLE,
4894                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4895                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4896                                 }
4897                         }
4898                 }
4899                 break;
4900         default:
4901                 break;
4902         }
4903         return 0;
4904 }
4905
4906 static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4907                                                struct amdgpu_irq_src *source,
4908                                                unsigned int type,
4909                                                enum amdgpu_interrupt_state state)
4910 {
4911         u32 cp_int_cntl_reg, cp_int_cntl;
4912         int i, j;
4913
4914         switch (state) {
4915         case AMDGPU_IRQ_STATE_DISABLE:
4916         case AMDGPU_IRQ_STATE_ENABLE:
4917                 for (i = 0; i < adev->gfx.me.num_me; i++) {
4918                         for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
4919                                 cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
4920
4921                                 if (cp_int_cntl_reg) {
4922                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4923                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4924                                                                     PRIV_INSTR_INT_ENABLE,
4925                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4926                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4927                                 }
4928                         }
4929                 }
4930                 break;
4931         default:
4932                 break;
4933         }
4934
4935         return 0;
4936 }
4937
4938 static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev,
4939                                         struct amdgpu_iv_entry *entry)
4940 {
4941         u8 me_id, pipe_id, queue_id;
4942         struct amdgpu_ring *ring;
4943         int i;
4944
4945         me_id = (entry->ring_id & 0x0c) >> 2;
4946         pipe_id = (entry->ring_id & 0x03) >> 0;
4947         queue_id = (entry->ring_id & 0x70) >> 4;
4948
4949         switch (me_id) {
4950         case 0:
4951                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4952                         ring = &adev->gfx.gfx_ring[i];
4953                         if (ring->me == me_id && ring->pipe == pipe_id &&
4954                             ring->queue == queue_id)
4955                                 drm_sched_fault(&ring->sched);
4956                 }
4957                 break;
4958         case 1:
4959         case 2:
4960                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4961                         ring = &adev->gfx.compute_ring[i];
4962                         if (ring->me == me_id && ring->pipe == pipe_id &&
4963                             ring->queue == queue_id)
4964                                 drm_sched_fault(&ring->sched);
4965                 }
4966                 break;
4967         default:
4968                 BUG();
4969                 break;
4970         }
4971 }
4972
4973 static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev,
4974                                   struct amdgpu_irq_src *source,
4975                                   struct amdgpu_iv_entry *entry)
4976 {
4977         DRM_ERROR("Illegal register access in command stream\n");
4978         gfx_v12_0_handle_priv_fault(adev, entry);
4979         return 0;
4980 }
4981
4982 static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev,
4983                                 struct amdgpu_irq_src *source,
4984                                 struct amdgpu_iv_entry *entry)
4985 {
4986         DRM_ERROR("Illegal opcode in command stream \n");
4987         gfx_v12_0_handle_priv_fault(adev, entry);
4988         return 0;
4989 }
4990
4991 static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev,
4992                                    struct amdgpu_irq_src *source,
4993                                    struct amdgpu_iv_entry *entry)
4994 {
4995         DRM_ERROR("Illegal instruction in command stream\n");
4996         gfx_v12_0_handle_priv_fault(adev, entry);
4997         return 0;
4998 }
4999
5000 static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring)
5001 {
5002         const unsigned int gcr_cntl =
5003                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
5004                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
5005                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
5006                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
5007                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
5008                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
5009                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
5010                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
5011
5012         /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
5013         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
5014         amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
5015         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
5016         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
5017         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
5018         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
5019         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
5020         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
5021 }
5022
5023 static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
5024 {
5025         int i;
5026
5027         /* Header itself is a NOP packet */
5028         if (num_nop == 1) {
5029                 amdgpu_ring_write(ring, ring->funcs->nop);
5030                 return;
5031         }
5032
5033         /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
5034         amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
5035
5036         /* Header is at index 0, followed by num_nops - 1 NOP packet's */
5037         for (i = 1; i < num_nop; i++)
5038                 amdgpu_ring_write(ring, ring->funcs->nop);
5039 }
5040
5041 static void gfx_v12_ip_print(void *handle, struct drm_printer *p)
5042 {
5043         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5044         uint32_t i, j, k, reg, index = 0;
5045         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
5046
5047         if (!adev->gfx.ip_dump_core)
5048                 return;
5049
5050         for (i = 0; i < reg_count; i++)
5051                 drm_printf(p, "%-50s \t 0x%08x\n",
5052                            gc_reg_list_12_0[i].reg_name,
5053                            adev->gfx.ip_dump_core[i]);
5054
5055         /* print compute queue registers for all instances */
5056         if (!adev->gfx.ip_dump_compute_queues)
5057                 return;
5058
5059         reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
5060         drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
5061                    adev->gfx.mec.num_mec,
5062                    adev->gfx.mec.num_pipe_per_mec,
5063                    adev->gfx.mec.num_queue_per_pipe);
5064
5065         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
5066                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
5067                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
5068                                 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
5069                                 for (reg = 0; reg < reg_count; reg++) {
5070                                         drm_printf(p, "%-50s \t 0x%08x\n",
5071                                                    gc_cp_reg_list_12[reg].reg_name,
5072                                                    adev->gfx.ip_dump_compute_queues[index + reg]);
5073                                 }
5074                                 index += reg_count;
5075                         }
5076                 }
5077         }
5078
5079         /* print gfx queue registers for all instances */
5080         if (!adev->gfx.ip_dump_gfx_queues)
5081                 return;
5082
5083         index = 0;
5084         reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
5085         drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
5086                    adev->gfx.me.num_me,
5087                    adev->gfx.me.num_pipe_per_me,
5088                    adev->gfx.me.num_queue_per_pipe);
5089
5090         for (i = 0; i < adev->gfx.me.num_me; i++) {
5091                 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
5092                         for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
5093                                 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
5094                                 for (reg = 0; reg < reg_count; reg++) {
5095                                         drm_printf(p, "%-50s \t 0x%08x\n",
5096                                                    gc_gfx_queue_reg_list_12[reg].reg_name,
5097                                                    adev->gfx.ip_dump_gfx_queues[index + reg]);
5098                                 }
5099                                 index += reg_count;
5100                         }
5101                 }
5102         }
5103 }
5104
5105 static void gfx_v12_ip_dump(void *handle)
5106 {
5107         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5108         uint32_t i, j, k, reg, index = 0;
5109         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
5110
5111         if (!adev->gfx.ip_dump_core)
5112                 return;
5113
5114         amdgpu_gfx_off_ctrl(adev, false);
5115         for (i = 0; i < reg_count; i++)
5116                 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_12_0[i]));
5117         amdgpu_gfx_off_ctrl(adev, true);
5118
5119         /* dump compute queue registers for all instances */
5120         if (!adev->gfx.ip_dump_compute_queues)
5121                 return;
5122
5123         reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
5124         amdgpu_gfx_off_ctrl(adev, false);
5125         mutex_lock(&adev->srbm_mutex);
5126         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
5127                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
5128                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
5129                                 /* ME0 is for GFX so start from 1 for CP */
5130                                 soc24_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
5131                                 for (reg = 0; reg < reg_count; reg++) {
5132                                         adev->gfx.ip_dump_compute_queues[index + reg] =
5133                                                 RREG32(SOC15_REG_ENTRY_OFFSET(
5134                                                         gc_cp_reg_list_12[reg]));
5135                                 }
5136                                 index += reg_count;
5137                         }
5138                 }
5139         }
5140         soc24_grbm_select(adev, 0, 0, 0, 0);
5141         mutex_unlock(&adev->srbm_mutex);
5142         amdgpu_gfx_off_ctrl(adev, true);
5143
5144         /* dump gfx queue registers for all instances */
5145         if (!adev->gfx.ip_dump_gfx_queues)
5146                 return;
5147
5148         index = 0;
5149         reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
5150         amdgpu_gfx_off_ctrl(adev, false);
5151         mutex_lock(&adev->srbm_mutex);
5152         for (i = 0; i < adev->gfx.me.num_me; i++) {
5153                 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
5154                         for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
5155                                 soc24_grbm_select(adev, i, j, k, 0);
5156
5157                                 for (reg = 0; reg < reg_count; reg++) {
5158                                         adev->gfx.ip_dump_gfx_queues[index + reg] =
5159                                                 RREG32(SOC15_REG_ENTRY_OFFSET(
5160                                                         gc_gfx_queue_reg_list_12[reg]));
5161                                 }
5162                                 index += reg_count;
5163                         }
5164                 }
5165         }
5166         soc24_grbm_select(adev, 0, 0, 0, 0);
5167         mutex_unlock(&adev->srbm_mutex);
5168         amdgpu_gfx_off_ctrl(adev, true);
5169 }
5170
5171 static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
5172 {
5173         struct amdgpu_device *adev = ring->adev;
5174         int r;
5175
5176         if (amdgpu_sriov_vf(adev))
5177                 return -EINVAL;
5178
5179         r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
5180         if (r) {
5181                 dev_err(adev->dev, "reset via MES failed %d\n", r);
5182                 return r;
5183         }
5184
5185         r = amdgpu_bo_reserve(ring->mqd_obj, false);
5186         if (unlikely(r != 0)) {
5187                 dev_err(adev->dev, "fail to resv mqd_obj\n");
5188                 return r;
5189         }
5190         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
5191         if (!r) {
5192                 r = gfx_v12_0_kgq_init_queue(ring, true);
5193                 amdgpu_bo_kunmap(ring->mqd_obj);
5194                 ring->mqd_ptr = NULL;
5195         }
5196         amdgpu_bo_unreserve(ring->mqd_obj);
5197         if (r) {
5198                 DRM_ERROR("fail to unresv mqd_obj\n");
5199                 return r;
5200         }
5201
5202         r = amdgpu_mes_map_legacy_queue(adev, ring);
5203         if (r) {
5204                 dev_err(adev->dev, "failed to remap kgq\n");
5205                 return r;
5206         }
5207
5208         return amdgpu_ring_test_ring(ring);
5209 }
5210
5211 static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
5212 {
5213         struct amdgpu_device *adev = ring->adev;
5214         int r, i;
5215
5216         if (amdgpu_sriov_vf(adev))
5217                 return -EINVAL;
5218
5219         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5220         mutex_lock(&adev->srbm_mutex);
5221         soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5222         WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
5223         WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
5224         for (i = 0; i < adev->usec_timeout; i++) {
5225                 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
5226                         break;
5227                 udelay(1);
5228         }
5229         soc24_grbm_select(adev, 0, 0, 0, 0);
5230         mutex_unlock(&adev->srbm_mutex);
5231         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5232
5233         r = amdgpu_bo_reserve(ring->mqd_obj, false);
5234         if (unlikely(r != 0)) {
5235                 DRM_ERROR("fail to resv mqd_obj\n");
5236                 return r;
5237         }
5238         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
5239         if (!r) {
5240                 r = gfx_v12_0_kcq_init_queue(ring, true);
5241                 amdgpu_bo_kunmap(ring->mqd_obj);
5242                 ring->mqd_ptr = NULL;
5243         }
5244         amdgpu_bo_unreserve(ring->mqd_obj);
5245         if (r) {
5246                 DRM_ERROR("fail to unresv mqd_obj\n");
5247                 return r;
5248         }
5249         r = amdgpu_mes_map_legacy_queue(adev, ring);
5250         if (r) {
5251                 dev_err(adev->dev, "failed to remap kcq\n");
5252                 return r;
5253         }
5254
5255         return amdgpu_ring_test_ring(ring);
5256 }
5257
5258 static const struct amd_ip_funcs gfx_v12_0_ip_funcs = {
5259         .name = "gfx_v12_0",
5260         .early_init = gfx_v12_0_early_init,
5261         .late_init = gfx_v12_0_late_init,
5262         .sw_init = gfx_v12_0_sw_init,
5263         .sw_fini = gfx_v12_0_sw_fini,
5264         .hw_init = gfx_v12_0_hw_init,
5265         .hw_fini = gfx_v12_0_hw_fini,
5266         .suspend = gfx_v12_0_suspend,
5267         .resume = gfx_v12_0_resume,
5268         .is_idle = gfx_v12_0_is_idle,
5269         .wait_for_idle = gfx_v12_0_wait_for_idle,
5270         .set_clockgating_state = gfx_v12_0_set_clockgating_state,
5271         .set_powergating_state = gfx_v12_0_set_powergating_state,
5272         .get_clockgating_state = gfx_v12_0_get_clockgating_state,
5273         .dump_ip_state = gfx_v12_ip_dump,
5274         .print_ip_state = gfx_v12_ip_print,
5275 };
5276
5277 static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
5278         .type = AMDGPU_RING_TYPE_GFX,
5279         .align_mask = 0xff,
5280         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5281         .support_64bit_ptrs = true,
5282         .secure_submission_supported = true,
5283         .get_rptr = gfx_v12_0_ring_get_rptr_gfx,
5284         .get_wptr = gfx_v12_0_ring_get_wptr_gfx,
5285         .set_wptr = gfx_v12_0_ring_set_wptr_gfx,
5286         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5287                 5 + /* COND_EXEC */
5288                 7 + /* PIPELINE_SYNC */
5289                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5290                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5291                 2 + /* VM_FLUSH */
5292                 8 + /* FENCE for VM_FLUSH */
5293                 5 + /* COND_EXEC */
5294                 7 + /* HDP_flush */
5295                 4 + /* VGT_flush */
5296                 31 + /* DE_META */
5297                 3 + /* CNTX_CTRL */
5298                 5 + /* HDP_INVL */
5299                 8 + 8 + /* FENCE x2 */
5300                 8, /* gfx_v12_0_emit_mem_sync */
5301         .emit_ib_size = 4, /* gfx_v12_0_ring_emit_ib_gfx */
5302         .emit_ib = gfx_v12_0_ring_emit_ib_gfx,
5303         .emit_fence = gfx_v12_0_ring_emit_fence,
5304         .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
5305         .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
5306         .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
5307         .test_ring = gfx_v12_0_ring_test_ring,
5308         .test_ib = gfx_v12_0_ring_test_ib,
5309         .insert_nop = gfx_v12_ring_insert_nop,
5310         .pad_ib = amdgpu_ring_generic_pad_ib,
5311         .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl,
5312         .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec,
5313         .preempt_ib = gfx_v12_0_ring_preempt_ib,
5314         .emit_frame_cntl = gfx_v12_0_ring_emit_frame_cntl,
5315         .emit_wreg = gfx_v12_0_ring_emit_wreg,
5316         .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
5317         .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
5318         .soft_recovery = gfx_v12_0_ring_soft_recovery,
5319         .emit_mem_sync = gfx_v12_0_emit_mem_sync,
5320         .reset = gfx_v12_0_reset_kgq,
5321 };
5322
5323 static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
5324         .type = AMDGPU_RING_TYPE_COMPUTE,
5325         .align_mask = 0xff,
5326         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5327         .support_64bit_ptrs = true,
5328         .get_rptr = gfx_v12_0_ring_get_rptr_compute,
5329         .get_wptr = gfx_v12_0_ring_get_wptr_compute,
5330         .set_wptr = gfx_v12_0_ring_set_wptr_compute,
5331         .emit_frame_size =
5332                 7 + /* gfx_v12_0_ring_emit_hdp_flush */
5333                 5 + /* hdp invalidate */
5334                 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
5335                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5336                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5337                 2 + /* gfx_v12_0_ring_emit_vm_flush */
5338                 8 + 8 + 8 + /* gfx_v12_0_ring_emit_fence x3 for user fence, vm fence */
5339                 8, /* gfx_v12_0_emit_mem_sync */
5340         .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
5341         .emit_ib = gfx_v12_0_ring_emit_ib_compute,
5342         .emit_fence = gfx_v12_0_ring_emit_fence,
5343         .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
5344         .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
5345         .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
5346         .test_ring = gfx_v12_0_ring_test_ring,
5347         .test_ib = gfx_v12_0_ring_test_ib,
5348         .insert_nop = gfx_v12_ring_insert_nop,
5349         .pad_ib = amdgpu_ring_generic_pad_ib,
5350         .emit_wreg = gfx_v12_0_ring_emit_wreg,
5351         .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
5352         .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
5353         .soft_recovery = gfx_v12_0_ring_soft_recovery,
5354         .emit_mem_sync = gfx_v12_0_emit_mem_sync,
5355         .reset = gfx_v12_0_reset_kcq,
5356 };
5357
5358 static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
5359         .type = AMDGPU_RING_TYPE_KIQ,
5360         .align_mask = 0xff,
5361         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5362         .support_64bit_ptrs = true,
5363         .get_rptr = gfx_v12_0_ring_get_rptr_compute,
5364         .get_wptr = gfx_v12_0_ring_get_wptr_compute,
5365         .set_wptr = gfx_v12_0_ring_set_wptr_compute,
5366         .emit_frame_size =
5367                 7 + /* gfx_v12_0_ring_emit_hdp_flush */
5368                 5 + /*hdp invalidate */
5369                 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
5370                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5371                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5372                 2 + /* gfx_v12_0_ring_emit_vm_flush */
5373                 8 + 8 + 8, /* gfx_v12_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5374         .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
5375         .emit_ib = gfx_v12_0_ring_emit_ib_compute,
5376         .emit_fence = gfx_v12_0_ring_emit_fence_kiq,
5377         .test_ring = gfx_v12_0_ring_test_ring,
5378         .test_ib = gfx_v12_0_ring_test_ib,
5379         .insert_nop = amdgpu_ring_insert_nop,
5380         .pad_ib = amdgpu_ring_generic_pad_ib,
5381         .emit_rreg = gfx_v12_0_ring_emit_rreg,
5382         .emit_wreg = gfx_v12_0_ring_emit_wreg,
5383         .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
5384         .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
5385 };
5386
5387 static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev)
5388 {
5389         int i;
5390
5391         adev->gfx.kiq[0].ring.funcs = &gfx_v12_0_ring_funcs_kiq;
5392
5393         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5394                 adev->gfx.gfx_ring[i].funcs = &gfx_v12_0_ring_funcs_gfx;
5395
5396         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5397                 adev->gfx.compute_ring[i].funcs = &gfx_v12_0_ring_funcs_compute;
5398 }
5399
5400 static const struct amdgpu_irq_src_funcs gfx_v12_0_eop_irq_funcs = {
5401         .set = gfx_v12_0_set_eop_interrupt_state,
5402         .process = gfx_v12_0_eop_irq,
5403 };
5404
5405 static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = {
5406         .set = gfx_v12_0_set_priv_reg_fault_state,
5407         .process = gfx_v12_0_priv_reg_irq,
5408 };
5409
5410 static const struct amdgpu_irq_src_funcs gfx_v12_0_bad_op_irq_funcs = {
5411         .set = gfx_v12_0_set_bad_op_fault_state,
5412         .process = gfx_v12_0_bad_op_irq,
5413 };
5414
5415 static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = {
5416         .set = gfx_v12_0_set_priv_inst_fault_state,
5417         .process = gfx_v12_0_priv_inst_irq,
5418 };
5419
5420 static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev)
5421 {
5422         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5423         adev->gfx.eop_irq.funcs = &gfx_v12_0_eop_irq_funcs;
5424
5425         adev->gfx.priv_reg_irq.num_types = 1;
5426         adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs;
5427
5428         adev->gfx.bad_op_irq.num_types = 1;
5429         adev->gfx.bad_op_irq.funcs = &gfx_v12_0_bad_op_irq_funcs;
5430
5431         adev->gfx.priv_inst_irq.num_types = 1;
5432         adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs;
5433 }
5434
5435 static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev)
5436 {
5437         if (adev->flags & AMD_IS_APU)
5438                 adev->gfx.imu.mode = MISSION_MODE;
5439         else
5440                 adev->gfx.imu.mode = DEBUG_MODE;
5441
5442         adev->gfx.imu.funcs = &gfx_v12_0_imu_funcs;
5443 }
5444
5445 static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev)
5446 {
5447         adev->gfx.rlc.funcs = &gfx_v12_0_rlc_funcs;
5448 }
5449
5450 static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev)
5451 {
5452         /* set gfx eng mqd */
5453         adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
5454                 sizeof(struct v12_gfx_mqd);
5455         adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
5456                 gfx_v12_0_gfx_mqd_init;
5457         /* set compute eng mqd */
5458         adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
5459                 sizeof(struct v12_compute_mqd);
5460         adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
5461                 gfx_v12_0_compute_mqd_init;
5462 }
5463
5464 static void gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
5465                                                           u32 bitmap)
5466 {
5467         u32 data;
5468
5469         if (!bitmap)
5470                 return;
5471
5472         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
5473         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
5474
5475         WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
5476 }
5477
5478 static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
5479 {
5480         u32 data, wgp_bitmask;
5481         data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
5482         data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
5483
5484         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
5485         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
5486
5487         wgp_bitmask =
5488                 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
5489
5490         return (~data) & wgp_bitmask;
5491 }
5492
5493 static u32 gfx_v12_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
5494 {
5495         u32 wgp_idx, wgp_active_bitmap;
5496         u32 cu_bitmap_per_wgp, cu_active_bitmap;
5497
5498         wgp_active_bitmap = gfx_v12_0_get_wgp_active_bitmap_per_sh(adev);
5499         cu_active_bitmap = 0;
5500
5501         for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
5502                 /* if there is one WGP enabled, it means 2 CUs will be enabled */
5503                 cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
5504                 if (wgp_active_bitmap & (1 << wgp_idx))
5505                         cu_active_bitmap |= cu_bitmap_per_wgp;
5506         }
5507
5508         return cu_active_bitmap;
5509 }
5510
5511 static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
5512                                  struct amdgpu_cu_info *cu_info)
5513 {
5514         int i, j, k, counter, active_cu_number = 0;
5515         u32 mask, bitmap;
5516         unsigned disable_masks[8 * 2];
5517
5518         if (!adev || !cu_info)
5519                 return -EINVAL;
5520
5521         amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
5522
5523         mutex_lock(&adev->grbm_idx_mutex);
5524         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5525                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5526                         bitmap = i * adev->gfx.config.max_sh_per_se + j;
5527                         if (!((gfx_v12_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
5528                                 continue;
5529                         mask = 1;
5530                         counter = 0;
5531                         gfx_v12_0_select_se_sh(adev, i, j, 0xffffffff, 0);
5532                         if (i < 8 && j < 2)
5533                                 gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(
5534                                         adev, disable_masks[i * 2 + j]);
5535                         bitmap = gfx_v12_0_get_cu_active_bitmap_per_sh(adev);
5536
5537                         /**
5538                          * GFX12 could support more than 4 SEs, while the bitmap
5539                          * in cu_info struct is 4x4 and ioctl interface struct
5540                          * drm_amdgpu_info_device should keep stable.
5541                          * So we use last two columns of bitmap to store cu mask for
5542                          * SEs 4 to 7, the layout of the bitmap is as below:
5543                          *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
5544                          *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
5545                          *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
5546                          *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
5547                          *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
5548                          *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
5549                          *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
5550                          *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
5551                          */
5552                         cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
5553
5554                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
5555                                 if (bitmap & mask)
5556                                         counter++;
5557
5558                                 mask <<= 1;
5559                         }
5560                         active_cu_number += counter;
5561                 }
5562         }
5563         gfx_v12_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
5564         mutex_unlock(&adev->grbm_idx_mutex);
5565
5566         cu_info->number = active_cu_number;
5567         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5568
5569         return 0;
5570 }
5571
5572 const struct amdgpu_ip_block_version gfx_v12_0_ip_block = {
5573         .type = AMD_IP_BLOCK_TYPE_GFX,
5574         .major = 12,
5575         .minor = 0,
5576         .rev = 0,
5577         .funcs = &gfx_v12_0_ip_funcs,
5578 };
This page took 0.378343 seconds and 4 git commands to generate.