2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
31 #include "amdgpu_gfx.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
40 #include "vega10_enum.h"
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 #include "amdgpu_ras.h"
50 #include "amdgpu_ring_mux.h"
53 #include "gfx_v9_4_2.h"
55 #include "asic_reg/pwr/pwr_10_0_offset.h"
56 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
57 #include "asic_reg/gc/gc_9_0_default.h"
59 #define GFX9_NUM_GFX_RINGS 1
60 #define GFX9_NUM_SW_GFX_RINGS 2
61 #define GFX9_MEC_HPD_SIZE 4096
62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
65 #define mmGCEA_PROBE_MAP 0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX 0
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
134 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
135 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
136 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
137 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
138 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
139 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
140 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
141 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
142 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
143 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
144 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
145 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
147 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1
149 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
152 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
153 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
154 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
155 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
156 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
157 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
158 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
159 SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
160 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
161 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
162 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
163 SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
164 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
165 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
166 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
167 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
168 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
169 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
170 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
171 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
172 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
173 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
174 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
175 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
177 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
178 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
179 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
180 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
181 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
182 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
183 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
184 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
185 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
186 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
187 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
188 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
189 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
190 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
191 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
192 SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
193 SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
194 SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
195 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
196 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
197 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
198 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
199 SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
200 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
201 SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
202 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
203 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
204 SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
205 SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
206 SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
207 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
208 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
209 SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
210 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
211 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
212 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
213 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
214 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
215 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
216 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
217 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
218 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
219 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
220 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
221 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
222 SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
223 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
224 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
225 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
226 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
227 /* cp header registers */
228 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
229 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
230 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
231 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
232 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
233 /* SE status registers */
234 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
235 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
236 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
237 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
240 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
241 /* compute queue registers */
242 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
243 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
244 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
245 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
246 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
247 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
248 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
249 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
250 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
251 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
252 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
253 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
254 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
255 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
256 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
257 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
258 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
259 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
260 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
261 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
262 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
263 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
264 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
265 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
266 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
267 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
268 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
269 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
270 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
271 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
272 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
273 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
274 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
275 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
276 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
277 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
278 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
281 enum ta_ras_gfx_subblock {
283 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
284 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
285 TA_RAS_BLOCK__GFX_CPC_UCODE,
286 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
287 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
288 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
289 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
290 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
291 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
292 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
294 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
295 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
297 TA_RAS_BLOCK__GFX_CPF_TAG,
298 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
300 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
301 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
303 TA_RAS_BLOCK__GFX_CPG_TAG,
304 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
306 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
307 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
309 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
310 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
311 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
312 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
314 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
316 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
317 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318 TA_RAS_BLOCK__GFX_SQ_LDS_D,
319 TA_RAS_BLOCK__GFX_SQ_LDS_I,
320 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
321 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
323 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
325 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
326 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
327 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
328 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
329 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
330 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
331 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
332 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
333 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
334 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
335 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
337 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
338 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
339 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
340 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
341 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
342 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
343 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
344 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
345 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
346 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
347 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
348 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
349 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
351 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
352 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
353 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
354 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
355 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
356 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
357 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
358 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
359 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
360 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
361 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
362 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
363 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
364 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
366 TA_RAS_BLOCK__GFX_TA_INDEX_START,
367 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
368 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
369 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
370 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
371 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
372 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
374 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
375 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
377 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378 /* TCC (5 sub-ranges)*/
379 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
381 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
382 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
383 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
384 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
385 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
386 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
387 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
388 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
389 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
390 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
392 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
393 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
395 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
396 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
398 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
399 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
401 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
402 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
403 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
404 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
405 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
406 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
407 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
408 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
410 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
411 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
413 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
414 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
416 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
417 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
418 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
419 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
420 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
421 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
422 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
424 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
426 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
427 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
429 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
430 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
431 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
432 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
433 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
434 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
436 TA_RAS_BLOCK__GFX_TD_INDEX_START,
437 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
438 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
439 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
440 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441 /* EA (3 sub-ranges)*/
442 TA_RAS_BLOCK__GFX_EA_INDEX_START,
444 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
445 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
446 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
447 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
448 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
449 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
450 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
451 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
452 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
453 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
455 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
456 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
458 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
459 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
460 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
461 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
462 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
463 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
465 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
466 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
468 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
469 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
470 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
473 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
475 TA_RAS_BLOCK__UTC_VML2_WALKER,
476 /* UTC ATC L2 2MB cache*/
477 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
478 /* UTC ATC L2 4KB cache*/
479 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
480 TA_RAS_BLOCK__GFX_MAX
483 struct ras_gfx_subblock {
486 int hw_supported_error_type;
487 int sw_supported_error_type;
490 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
491 [AMDGPU_RAS_BLOCK__##subblock] = { \
493 TA_RAS_BLOCK__##subblock, \
494 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
495 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
498 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
499 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
500 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
501 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
502 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
503 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
505 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
509 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
510 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
511 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
512 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
513 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
514 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
515 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
517 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
519 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
520 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
521 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
522 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
523 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
524 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
525 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
526 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
528 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
530 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
532 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
534 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
536 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
538 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
540 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
542 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
544 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
546 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
548 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
550 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
552 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
554 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
556 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
558 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
560 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
562 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
564 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
566 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
568 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
570 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
572 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
574 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
575 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
576 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
580 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
581 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
582 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
584 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
586 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
588 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
590 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
592 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
593 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
595 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
596 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
597 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
598 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
599 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
600 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
601 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
602 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
603 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
605 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
606 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
608 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
610 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
612 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
613 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
614 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
615 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
616 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
617 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
618 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
619 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
620 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
621 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
622 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
623 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
624 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
625 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
632 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
639 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
643 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
644 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
645 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
648 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
672 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
694 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
709 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
711 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
718 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
719 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
720 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
721 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
722 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
723 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
724 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
725 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
726 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
727 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
728 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
729 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
730 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
731 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
732 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
733 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
734 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
737 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
739 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
740 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
741 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
742 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
743 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
744 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
745 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
748 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
750 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
751 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
752 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
753 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
754 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
755 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
756 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
757 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
758 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
759 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
760 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
761 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
762 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
763 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
764 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
766 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
767 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
768 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
771 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
773 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
774 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
775 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
776 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
777 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
778 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
779 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
780 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
781 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
782 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
783 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
784 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
787 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
789 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
790 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
791 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
794 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
796 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
797 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
798 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
799 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
800 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
801 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
802 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
803 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
804 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
805 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
806 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
807 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
808 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
809 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
810 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
811 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
814 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
816 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
817 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
818 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
819 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
820 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
821 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
822 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
823 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
824 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
825 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
826 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
827 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
828 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
831 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
833 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
834 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
835 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
836 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
837 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
838 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
839 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
840 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
841 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
842 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
843 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
846 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
847 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
848 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
851 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
853 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
854 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
863 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
865 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
866 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
875 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
876 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
877 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
878 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
880 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
881 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
884 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
885 struct amdgpu_cu_info *cu_info);
886 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
887 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
888 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
889 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
890 void *ras_error_status);
891 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
892 void *inject_if, uint32_t instance_mask);
893 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
894 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
897 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
900 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
901 amdgpu_ring_write(kiq_ring,
902 PACKET3_SET_RESOURCES_VMID_MASK(0) |
903 /* vmid_mask:0* queue_type:0 (KIQ) */
904 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
905 amdgpu_ring_write(kiq_ring,
906 lower_32_bits(queue_mask)); /* queue mask lo */
907 amdgpu_ring_write(kiq_ring,
908 upper_32_bits(queue_mask)); /* queue mask hi */
909 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
910 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
911 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
912 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
915 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
916 struct amdgpu_ring *ring)
918 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
919 uint64_t wptr_addr = ring->wptr_gpu_addr;
920 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
922 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
923 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
924 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
925 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
926 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
927 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
928 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
929 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
930 /*queue_type: normal compute queue */
931 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
932 /* alloc format: all_on_one_pipe */
933 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
934 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
935 /* num_queues: must be 1 */
936 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
937 amdgpu_ring_write(kiq_ring,
938 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
939 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
940 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
941 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
942 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
945 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
946 struct amdgpu_ring *ring,
947 enum amdgpu_unmap_queues_action action,
948 u64 gpu_addr, u64 seq)
950 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
952 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
953 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
954 PACKET3_UNMAP_QUEUES_ACTION(action) |
955 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
956 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
957 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
958 amdgpu_ring_write(kiq_ring,
959 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
961 if (action == PREEMPT_QUEUES_NO_UNMAP) {
962 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
963 amdgpu_ring_write(kiq_ring, 0);
964 amdgpu_ring_write(kiq_ring, 0);
967 amdgpu_ring_write(kiq_ring, 0);
968 amdgpu_ring_write(kiq_ring, 0);
969 amdgpu_ring_write(kiq_ring, 0);
973 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
974 struct amdgpu_ring *ring,
978 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
980 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
981 amdgpu_ring_write(kiq_ring,
982 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
983 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
984 PACKET3_QUERY_STATUS_COMMAND(2));
985 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
986 amdgpu_ring_write(kiq_ring,
987 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
988 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
989 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
990 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
991 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
992 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
995 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
996 uint16_t pasid, uint32_t flush_type,
999 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1000 amdgpu_ring_write(kiq_ring,
1001 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1002 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1003 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1004 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1007 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1008 .kiq_set_resources = gfx_v9_0_kiq_set_resources,
1009 .kiq_map_queues = gfx_v9_0_kiq_map_queues,
1010 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1011 .kiq_query_status = gfx_v9_0_kiq_query_status,
1012 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1013 .set_resources_size = 8,
1014 .map_queues_size = 7,
1015 .unmap_queues_size = 6,
1016 .query_status_size = 7,
1017 .invalidate_tlbs_size = 2,
1020 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1022 adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1025 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1027 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1028 case IP_VERSION(9, 0, 1):
1029 soc15_program_register_sequence(adev,
1030 golden_settings_gc_9_0,
1031 ARRAY_SIZE(golden_settings_gc_9_0));
1032 soc15_program_register_sequence(adev,
1033 golden_settings_gc_9_0_vg10,
1034 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1036 case IP_VERSION(9, 2, 1):
1037 soc15_program_register_sequence(adev,
1038 golden_settings_gc_9_2_1,
1039 ARRAY_SIZE(golden_settings_gc_9_2_1));
1040 soc15_program_register_sequence(adev,
1041 golden_settings_gc_9_2_1_vg12,
1042 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1044 case IP_VERSION(9, 4, 0):
1045 soc15_program_register_sequence(adev,
1046 golden_settings_gc_9_0,
1047 ARRAY_SIZE(golden_settings_gc_9_0));
1048 soc15_program_register_sequence(adev,
1049 golden_settings_gc_9_0_vg20,
1050 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1052 case IP_VERSION(9, 4, 1):
1053 soc15_program_register_sequence(adev,
1054 golden_settings_gc_9_4_1_arct,
1055 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1057 case IP_VERSION(9, 2, 2):
1058 case IP_VERSION(9, 1, 0):
1059 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1060 ARRAY_SIZE(golden_settings_gc_9_1));
1061 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1062 soc15_program_register_sequence(adev,
1063 golden_settings_gc_9_1_rv2,
1064 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1066 soc15_program_register_sequence(adev,
1067 golden_settings_gc_9_1_rv1,
1068 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1070 case IP_VERSION(9, 3, 0):
1071 soc15_program_register_sequence(adev,
1072 golden_settings_gc_9_1_rn,
1073 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1074 return; /* for renoir, don't need common goldensetting */
1075 case IP_VERSION(9, 4, 2):
1076 gfx_v9_4_2_init_golden_registers(adev,
1077 adev->smuio.funcs->get_die_id(adev));
1083 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1084 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1085 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1086 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1089 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1090 bool wc, uint32_t reg, uint32_t val)
1092 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1093 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1094 WRITE_DATA_DST_SEL(0) |
1095 (wc ? WR_CONFIRM : 0));
1096 amdgpu_ring_write(ring, reg);
1097 amdgpu_ring_write(ring, 0);
1098 amdgpu_ring_write(ring, val);
1101 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1102 int mem_space, int opt, uint32_t addr0,
1103 uint32_t addr1, uint32_t ref, uint32_t mask,
1106 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1107 amdgpu_ring_write(ring,
1108 /* memory (1) or register (0) */
1109 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1110 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1111 WAIT_REG_MEM_FUNCTION(3) | /* equal */
1112 WAIT_REG_MEM_ENGINE(eng_sel)));
1115 BUG_ON(addr0 & 0x3); /* Dword align */
1116 amdgpu_ring_write(ring, addr0);
1117 amdgpu_ring_write(ring, addr1);
1118 amdgpu_ring_write(ring, ref);
1119 amdgpu_ring_write(ring, mask);
1120 amdgpu_ring_write(ring, inv); /* poll interval */
1123 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1125 struct amdgpu_device *adev = ring->adev;
1126 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1131 WREG32(scratch, 0xCAFEDEAD);
1132 r = amdgpu_ring_alloc(ring, 3);
1136 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1137 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1138 amdgpu_ring_write(ring, 0xDEADBEEF);
1139 amdgpu_ring_commit(ring);
1141 for (i = 0; i < adev->usec_timeout; i++) {
1142 tmp = RREG32(scratch);
1143 if (tmp == 0xDEADBEEF)
1148 if (i >= adev->usec_timeout)
1153 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1155 struct amdgpu_device *adev = ring->adev;
1156 struct amdgpu_ib ib;
1157 struct dma_fence *f = NULL;
1164 r = amdgpu_device_wb_get(adev, &index);
1168 gpu_addr = adev->wb.gpu_addr + (index * 4);
1169 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1170 memset(&ib, 0, sizeof(ib));
1172 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1176 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1177 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1178 ib.ptr[2] = lower_32_bits(gpu_addr);
1179 ib.ptr[3] = upper_32_bits(gpu_addr);
1180 ib.ptr[4] = 0xDEADBEEF;
1183 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1187 r = dma_fence_wait_timeout(f, false, timeout);
1195 tmp = adev->wb.wb[index];
1196 if (tmp == 0xDEADBEEF)
1202 amdgpu_ib_free(adev, &ib, NULL);
1205 amdgpu_device_wb_free(adev, index);
1210 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1212 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1213 amdgpu_ucode_release(&adev->gfx.me_fw);
1214 amdgpu_ucode_release(&adev->gfx.ce_fw);
1215 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1216 amdgpu_ucode_release(&adev->gfx.mec_fw);
1217 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1219 kfree(adev->gfx.rlc.register_list_format);
1222 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1224 adev->gfx.me_fw_write_wait = false;
1225 adev->gfx.mec_fw_write_wait = false;
1227 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1228 ((adev->gfx.mec_fw_version < 0x000001a5) ||
1229 (adev->gfx.mec_feature_version < 46) ||
1230 (adev->gfx.pfp_fw_version < 0x000000b7) ||
1231 (adev->gfx.pfp_feature_version < 46)))
1232 DRM_WARN_ONCE("CP firmware version too old, please update!");
1234 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1235 case IP_VERSION(9, 0, 1):
1236 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1237 (adev->gfx.me_feature_version >= 42) &&
1238 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1239 (adev->gfx.pfp_feature_version >= 42))
1240 adev->gfx.me_fw_write_wait = true;
1242 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
1243 (adev->gfx.mec_feature_version >= 42))
1244 adev->gfx.mec_fw_write_wait = true;
1246 case IP_VERSION(9, 2, 1):
1247 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1248 (adev->gfx.me_feature_version >= 44) &&
1249 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1250 (adev->gfx.pfp_feature_version >= 44))
1251 adev->gfx.me_fw_write_wait = true;
1253 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
1254 (adev->gfx.mec_feature_version >= 44))
1255 adev->gfx.mec_fw_write_wait = true;
1257 case IP_VERSION(9, 4, 0):
1258 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1259 (adev->gfx.me_feature_version >= 44) &&
1260 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1261 (adev->gfx.pfp_feature_version >= 44))
1262 adev->gfx.me_fw_write_wait = true;
1264 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
1265 (adev->gfx.mec_feature_version >= 44))
1266 adev->gfx.mec_fw_write_wait = true;
1268 case IP_VERSION(9, 1, 0):
1269 case IP_VERSION(9, 2, 2):
1270 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1271 (adev->gfx.me_feature_version >= 42) &&
1272 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1273 (adev->gfx.pfp_feature_version >= 42))
1274 adev->gfx.me_fw_write_wait = true;
1276 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
1277 (adev->gfx.mec_feature_version >= 42))
1278 adev->gfx.mec_fw_write_wait = true;
1281 adev->gfx.me_fw_write_wait = true;
1282 adev->gfx.mec_fw_write_wait = true;
1287 struct amdgpu_gfxoff_quirk {
1295 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1296 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1297 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1298 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1299 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1300 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1301 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1302 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1303 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1307 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1309 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1311 while (p && p->chip_device != 0) {
1312 if (pdev->vendor == p->chip_vendor &&
1313 pdev->device == p->chip_device &&
1314 pdev->subsystem_vendor == p->subsys_vendor &&
1315 pdev->subsystem_device == p->subsys_device &&
1316 pdev->revision == p->revision) {
1324 static bool is_raven_kicker(struct amdgpu_device *adev)
1326 if (adev->pm.fw_version >= 0x41e2b)
1332 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1334 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1335 (adev->gfx.me_fw_version >= 0x000000a5) &&
1336 (adev->gfx.me_feature_version >= 52))
1342 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1344 if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1345 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1347 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1348 case IP_VERSION(9, 0, 1):
1349 case IP_VERSION(9, 2, 1):
1350 case IP_VERSION(9, 4, 0):
1352 case IP_VERSION(9, 2, 2):
1353 case IP_VERSION(9, 1, 0):
1354 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1355 (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1356 ((!is_raven_kicker(adev) &&
1357 adev->gfx.rlc_fw_version < 531) ||
1358 (adev->gfx.rlc_feature_version < 1) ||
1359 !adev->gfx.rlc.is_rlc_v2_1))
1360 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1362 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1363 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1365 AMD_PG_SUPPORT_RLC_SMU_HS;
1367 case IP_VERSION(9, 3, 0):
1368 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1369 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1371 AMD_PG_SUPPORT_RLC_SMU_HS;
1378 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1383 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1384 "amdgpu/%s_pfp.bin", chip_name);
1387 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1389 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1390 "amdgpu/%s_me.bin", chip_name);
1393 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1395 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1396 "amdgpu/%s_ce.bin", chip_name);
1399 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1403 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1404 amdgpu_ucode_release(&adev->gfx.me_fw);
1405 amdgpu_ucode_release(&adev->gfx.ce_fw);
1410 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1414 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1415 uint16_t version_major;
1416 uint16_t version_minor;
1417 uint32_t smu_version;
1420 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1421 * instead of picasso_rlc.bin.
1423 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1424 * or revision >= 0xD8 && revision <= 0xDF
1425 * otherwise is PCO FP5
1427 if (!strcmp(chip_name, "picasso") &&
1428 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1429 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1430 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1431 "amdgpu/%s_rlc_am4.bin", chip_name);
1432 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1433 (smu_version >= 0x41e2b))
1435 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1437 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1438 "amdgpu/%s_kicker_rlc.bin", chip_name);
1440 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1441 "amdgpu/%s_rlc.bin", chip_name);
1445 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1446 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1447 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1448 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1451 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1456 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1458 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1459 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1460 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1466 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1471 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1472 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1473 "amdgpu/%s_sjt_mec.bin", chip_name);
1475 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1476 "amdgpu/%s_mec.bin", chip_name);
1480 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1481 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1483 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1484 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1485 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1486 "amdgpu/%s_sjt_mec2.bin", chip_name);
1488 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1489 "amdgpu/%s_mec2.bin", chip_name);
1491 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1492 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1495 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1498 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1499 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1502 gfx_v9_0_check_if_need_gfxoff(adev);
1503 gfx_v9_0_check_fw_write_wait(adev);
1507 amdgpu_ucode_release(&adev->gfx.mec_fw);
1511 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1513 char ucode_prefix[30];
1517 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1519 /* No CPG in Arcturus */
1520 if (adev->gfx.num_gfx_rings) {
1521 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1526 r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1530 r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1537 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1540 const struct cs_section_def *sect = NULL;
1541 const struct cs_extent_def *ext = NULL;
1543 /* begin clear state */
1545 /* context control state */
1548 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1549 for (ext = sect->section; ext->extent != NULL; ++ext) {
1550 if (sect->id == SECT_CONTEXT)
1551 count += 2 + ext->reg_count;
1557 /* end clear state */
1565 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1566 volatile u32 *buffer)
1569 const struct cs_section_def *sect = NULL;
1570 const struct cs_extent_def *ext = NULL;
1572 if (adev->gfx.rlc.cs_data == NULL)
1577 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1578 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1580 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1581 buffer[count++] = cpu_to_le32(0x80000000);
1582 buffer[count++] = cpu_to_le32(0x80000000);
1584 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1585 for (ext = sect->section; ext->extent != NULL; ++ext) {
1586 if (sect->id == SECT_CONTEXT) {
1588 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1589 buffer[count++] = cpu_to_le32(ext->reg_index -
1590 PACKET3_SET_CONTEXT_REG_START);
1591 for (i = 0; i < ext->reg_count; i++)
1592 buffer[count++] = cpu_to_le32(ext->extent[i]);
1599 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1600 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1602 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1603 buffer[count++] = cpu_to_le32(0);
1606 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1608 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1609 uint32_t pg_always_on_cu_num = 2;
1610 uint32_t always_on_cu_num;
1612 uint32_t mask, cu_bitmap, counter;
1614 if (adev->flags & AMD_IS_APU)
1615 always_on_cu_num = 4;
1616 else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1617 always_on_cu_num = 8;
1619 always_on_cu_num = 12;
1621 mutex_lock(&adev->grbm_idx_mutex);
1622 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1623 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1627 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1629 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1630 if (cu_info->bitmap[0][i][j] & mask) {
1631 if (counter == pg_always_on_cu_num)
1632 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1633 if (counter < always_on_cu_num)
1642 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1643 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1646 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1647 mutex_unlock(&adev->grbm_idx_mutex);
1650 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1654 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1655 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1656 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1657 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1658 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1660 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1661 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1663 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1664 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1666 mutex_lock(&adev->grbm_idx_mutex);
1667 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1668 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1669 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1671 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1672 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1673 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1674 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1675 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1677 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1678 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1681 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1684 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1685 * programmed in gfx_v9_0_init_always_on_cu_mask()
1688 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1689 * but used for RLC_LB_CNTL configuration */
1690 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1691 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1692 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1693 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1694 mutex_unlock(&adev->grbm_idx_mutex);
1696 gfx_v9_0_init_always_on_cu_mask(adev);
1699 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1703 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1704 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1705 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1706 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1707 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1709 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1710 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1712 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1713 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1715 mutex_lock(&adev->grbm_idx_mutex);
1716 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1717 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1718 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1720 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1721 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1722 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1723 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1724 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1726 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1727 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1730 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1733 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1734 * programmed in gfx_v9_0_init_always_on_cu_mask()
1737 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1738 * but used for RLC_LB_CNTL configuration */
1739 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1740 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1741 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1742 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1743 mutex_unlock(&adev->grbm_idx_mutex);
1745 gfx_v9_0_init_always_on_cu_mask(adev);
1748 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1750 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1753 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1755 if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1761 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1763 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1765 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1766 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1767 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1768 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1769 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1770 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1771 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1772 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1773 adev->gfx.rlc.rlcg_reg_access_supported = true;
1776 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1778 const struct cs_section_def *cs_data;
1781 adev->gfx.rlc.cs_data = gfx9_cs_data;
1783 cs_data = adev->gfx.rlc.cs_data;
1786 /* init clear state block */
1787 r = amdgpu_gfx_rlc_init_csb(adev);
1792 if (adev->flags & AMD_IS_APU) {
1793 /* TODO: double check the cp_table_size for RV */
1794 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1795 r = amdgpu_gfx_rlc_init_cpt(adev);
1803 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1805 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1806 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1809 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1813 const __le32 *fw_data;
1816 size_t mec_hpd_size;
1818 const struct gfx_firmware_header_v1_0 *mec_hdr;
1820 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1822 /* take ownership of the relevant compute queues */
1823 amdgpu_gfx_compute_queue_acquire(adev);
1824 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1826 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1827 AMDGPU_GEM_DOMAIN_VRAM |
1828 AMDGPU_GEM_DOMAIN_GTT,
1829 &adev->gfx.mec.hpd_eop_obj,
1830 &adev->gfx.mec.hpd_eop_gpu_addr,
1833 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1834 gfx_v9_0_mec_fini(adev);
1838 memset(hpd, 0, mec_hpd_size);
1840 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1841 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1844 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1846 fw_data = (const __le32 *)
1847 (adev->gfx.mec_fw->data +
1848 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1849 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1851 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1852 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1853 &adev->gfx.mec.mec_fw_obj,
1854 &adev->gfx.mec.mec_fw_gpu_addr,
1857 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1858 gfx_v9_0_mec_fini(adev);
1862 memcpy(fw, fw_data, fw_size);
1864 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1865 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1870 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1872 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1873 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1874 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1875 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1876 (SQ_IND_INDEX__FORCE_READ_MASK));
1877 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1880 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1881 uint32_t wave, uint32_t thread,
1882 uint32_t regno, uint32_t num, uint32_t *out)
1884 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1885 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1886 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1887 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1888 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1889 (SQ_IND_INDEX__FORCE_READ_MASK) |
1890 (SQ_IND_INDEX__AUTO_INCR_MASK));
1892 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1895 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1897 /* type 1 wave data */
1898 dst[(*no_fields)++] = 1;
1899 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1900 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1901 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1902 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1903 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1904 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1905 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1906 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1907 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1908 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1909 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1910 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1911 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1912 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1913 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1916 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1917 uint32_t wave, uint32_t start,
1918 uint32_t size, uint32_t *dst)
1921 adev, simd, wave, 0,
1922 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1925 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1926 uint32_t wave, uint32_t thread,
1927 uint32_t start, uint32_t size,
1931 adev, simd, wave, thread,
1932 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1935 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1936 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1938 soc15_grbm_select(adev, me, pipe, q, vm, 0);
1941 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1942 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1943 .select_se_sh = &gfx_v9_0_select_se_sh,
1944 .read_wave_data = &gfx_v9_0_read_wave_data,
1945 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1946 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1947 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1950 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = {
1951 .ras_error_inject = &gfx_v9_0_ras_error_inject,
1952 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1953 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1956 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1958 .hw_ops = &gfx_v9_0_ras_ops,
1962 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1967 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1968 case IP_VERSION(9, 0, 1):
1969 adev->gfx.config.max_hw_contexts = 8;
1970 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1971 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1972 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1973 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1974 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1976 case IP_VERSION(9, 2, 1):
1977 adev->gfx.config.max_hw_contexts = 8;
1978 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1979 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1980 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1981 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1982 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1983 DRM_INFO("fix gfx.config for vega12\n");
1985 case IP_VERSION(9, 4, 0):
1986 adev->gfx.ras = &gfx_v9_0_ras;
1987 adev->gfx.config.max_hw_contexts = 8;
1988 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1989 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1990 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1991 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1992 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1993 gb_addr_config &= ~0xf3e777ff;
1994 gb_addr_config |= 0x22014042;
1995 /* check vbios table if gpu info is not available */
1996 err = amdgpu_atomfirmware_get_gfx_info(adev);
2000 case IP_VERSION(9, 2, 2):
2001 case IP_VERSION(9, 1, 0):
2002 adev->gfx.config.max_hw_contexts = 8;
2003 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2004 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2005 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2006 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2007 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2008 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2010 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2012 case IP_VERSION(9, 4, 1):
2013 adev->gfx.ras = &gfx_v9_4_ras;
2014 adev->gfx.config.max_hw_contexts = 8;
2015 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2016 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2017 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2018 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2019 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2020 gb_addr_config &= ~0xf3e777ff;
2021 gb_addr_config |= 0x22014042;
2023 case IP_VERSION(9, 3, 0):
2024 adev->gfx.config.max_hw_contexts = 8;
2025 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2026 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2027 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2028 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2029 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2030 gb_addr_config &= ~0xf3e777ff;
2031 gb_addr_config |= 0x22010042;
2033 case IP_VERSION(9, 4, 2):
2034 adev->gfx.ras = &gfx_v9_4_2_ras;
2035 adev->gfx.config.max_hw_contexts = 8;
2036 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2041 gb_addr_config &= ~0xf3e777ff;
2042 gb_addr_config |= 0x22014042;
2043 /* check vbios table if gpu info is not available */
2044 err = amdgpu_atomfirmware_get_gfx_info(adev);
2053 adev->gfx.config.gb_addr_config = gb_addr_config;
2055 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2057 adev->gfx.config.gb_addr_config,
2061 adev->gfx.config.max_tile_pipes =
2062 adev->gfx.config.gb_addr_config_fields.num_pipes;
2064 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2066 adev->gfx.config.gb_addr_config,
2069 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2071 adev->gfx.config.gb_addr_config,
2073 MAX_COMPRESSED_FRAGS);
2074 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2076 adev->gfx.config.gb_addr_config,
2079 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2081 adev->gfx.config.gb_addr_config,
2083 NUM_SHADER_ENGINES);
2084 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2086 adev->gfx.config.gb_addr_config,
2088 PIPE_INTERLEAVE_SIZE));
2093 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2094 int mec, int pipe, int queue)
2097 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2098 unsigned int hw_prio;
2100 ring = &adev->gfx.compute_ring[ring_id];
2105 ring->queue = queue;
2107 ring->ring_obj = NULL;
2108 ring->use_doorbell = true;
2109 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2110 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2111 + (ring_id * GFX9_MEC_HPD_SIZE);
2112 ring->vm_hub = AMDGPU_GFXHUB(0);
2113 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2115 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2116 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2118 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2119 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2120 /* type-2 packets are deprecated on MEC, use type-3 instead */
2121 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2125 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2127 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2131 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2133 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2134 adev->gfx.ip_dump_core = NULL;
2136 adev->gfx.ip_dump_core = ptr;
2139 /* Allocate memory for compute queue registers for all the instances */
2140 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2141 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2142 adev->gfx.mec.num_queue_per_pipe;
2144 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2146 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2147 adev->gfx.ip_dump_compute_queues = NULL;
2149 adev->gfx.ip_dump_compute_queues = ptr;
2153 static int gfx_v9_0_sw_init(void *handle)
2155 int i, j, k, r, ring_id;
2157 struct amdgpu_ring *ring;
2158 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2159 unsigned int hw_prio;
2161 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2162 case IP_VERSION(9, 0, 1):
2163 case IP_VERSION(9, 2, 1):
2164 case IP_VERSION(9, 4, 0):
2165 case IP_VERSION(9, 2, 2):
2166 case IP_VERSION(9, 1, 0):
2167 case IP_VERSION(9, 4, 1):
2168 case IP_VERSION(9, 3, 0):
2169 case IP_VERSION(9, 4, 2):
2170 adev->gfx.mec.num_mec = 2;
2173 adev->gfx.mec.num_mec = 1;
2177 adev->gfx.mec.num_pipe_per_mec = 4;
2178 adev->gfx.mec.num_queue_per_pipe = 8;
2181 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2185 /* Privileged reg */
2186 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2187 &adev->gfx.priv_reg_irq);
2191 /* Privileged inst */
2192 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2193 &adev->gfx.priv_inst_irq);
2198 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2199 &adev->gfx.cp_ecc_error_irq);
2204 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2205 &adev->gfx.cp_ecc_error_irq);
2209 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2211 if (adev->gfx.rlc.funcs) {
2212 if (adev->gfx.rlc.funcs->init) {
2213 r = adev->gfx.rlc.funcs->init(adev);
2215 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2221 r = gfx_v9_0_mec_init(adev);
2223 DRM_ERROR("Failed to init MEC BOs!\n");
2227 /* set up the gfx ring */
2228 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2229 ring = &adev->gfx.gfx_ring[i];
2230 ring->ring_obj = NULL;
2232 sprintf(ring->name, "gfx");
2234 sprintf(ring->name, "gfx_%d", i);
2235 ring->use_doorbell = true;
2236 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2238 /* disable scheduler on the real ring */
2239 ring->no_scheduler = adev->gfx.mcbp;
2240 ring->vm_hub = AMDGPU_GFXHUB(0);
2241 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2242 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2243 AMDGPU_RING_PRIO_DEFAULT, NULL);
2248 /* set up the software rings */
2249 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2250 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2251 ring = &adev->gfx.sw_gfx_ring[i];
2252 ring->ring_obj = NULL;
2253 sprintf(ring->name, amdgpu_sw_ring_name(i));
2254 ring->use_doorbell = true;
2255 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2256 ring->is_sw_ring = true;
2257 hw_prio = amdgpu_sw_ring_priority(i);
2258 ring->vm_hub = AMDGPU_GFXHUB(0);
2259 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2260 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2267 /* init the muxer and add software rings */
2268 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2269 GFX9_NUM_SW_GFX_RINGS);
2271 DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2274 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2275 r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2276 &adev->gfx.sw_gfx_ring[i]);
2278 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2284 /* set up the compute queues - allocate horizontally across pipes */
2286 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2287 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2288 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2289 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2293 r = gfx_v9_0_compute_ring_init(adev,
2304 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2306 DRM_ERROR("Failed to init KIQ BOs!\n");
2310 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2314 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2315 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2319 adev->gfx.ce_ram_size = 0x8000;
2321 r = gfx_v9_0_gpu_early_init(adev);
2325 if (amdgpu_gfx_ras_sw_init(adev)) {
2326 dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2330 gfx_v9_0_alloc_ip_dump(adev);
2336 static int gfx_v9_0_sw_fini(void *handle)
2339 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2341 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2342 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2343 amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2344 amdgpu_ring_mux_fini(&adev->gfx.muxer);
2347 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2348 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2349 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2350 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2352 amdgpu_gfx_mqd_sw_fini(adev, 0);
2353 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2354 amdgpu_gfx_kiq_fini(adev, 0);
2356 gfx_v9_0_mec_fini(adev);
2357 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2358 &adev->gfx.rlc.clear_state_gpu_addr,
2359 (void **)&adev->gfx.rlc.cs_ptr);
2360 if (adev->flags & AMD_IS_APU) {
2361 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2362 &adev->gfx.rlc.cp_table_gpu_addr,
2363 (void **)&adev->gfx.rlc.cp_table_ptr);
2365 gfx_v9_0_free_microcode(adev);
2367 kfree(adev->gfx.ip_dump_core);
2368 kfree(adev->gfx.ip_dump_compute_queues);
2374 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2379 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2380 u32 instance, int xcc_id)
2384 if (instance == 0xffffffff)
2385 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2387 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2389 if (se_num == 0xffffffff)
2390 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2392 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2394 if (sh_num == 0xffffffff)
2395 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2397 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2399 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2402 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2406 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2407 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2409 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2410 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2412 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2413 adev->gfx.config.max_sh_per_se);
2415 return (~data) & mask;
2418 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2423 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2424 adev->gfx.config.max_sh_per_se;
2426 mutex_lock(&adev->grbm_idx_mutex);
2427 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2428 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2429 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2430 data = gfx_v9_0_get_rb_active_bitmap(adev);
2431 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2432 rb_bitmap_width_per_sh);
2435 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2436 mutex_unlock(&adev->grbm_idx_mutex);
2438 adev->gfx.config.backend_enable_mask = active_rbs;
2439 adev->gfx.config.num_rbs = hweight32(active_rbs);
2442 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2443 uint32_t first_vmid,
2447 uint32_t trap_config_vmid_mask = 0;
2450 /* Calculate trap config vmid mask */
2451 for (i = first_vmid; i < last_vmid; i++)
2452 trap_config_vmid_mask |= (1 << i);
2454 data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2455 VMID_SEL, trap_config_vmid_mask);
2456 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2458 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2459 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2461 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2462 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2465 #define DEFAULT_SH_MEM_BASES (0x6000)
2466 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2469 uint32_t sh_mem_config;
2470 uint32_t sh_mem_bases;
2473 * Configure apertures:
2474 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2475 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2476 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2478 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2480 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2481 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2482 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2484 mutex_lock(&adev->srbm_mutex);
2485 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2486 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2487 /* CP and shaders */
2488 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2489 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2491 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2492 mutex_unlock(&adev->srbm_mutex);
2494 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2495 access. These should be enabled by FW for target VMIDs. */
2496 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2497 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2498 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2499 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2500 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2504 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2509 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2510 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2511 * the driver can enable them for graphics. VMID0 should maintain
2512 * access so that HWS firmware can save/restore entries.
2514 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2515 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2516 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2517 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2518 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2522 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2526 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2527 case IP_VERSION(9, 4, 1):
2528 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2529 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2530 !READ_ONCE(adev->barrier_has_auto_waitcnt));
2531 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2538 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2543 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2545 gfx_v9_0_tiling_mode_table_init(adev);
2547 if (adev->gfx.num_gfx_rings)
2548 gfx_v9_0_setup_rb(adev);
2549 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2550 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2552 /* XXX SH_MEM regs */
2553 /* where to put LDS, scratch, GPUVM in FSA64 space */
2554 mutex_lock(&adev->srbm_mutex);
2555 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2556 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2557 /* CP and shaders */
2559 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2560 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2561 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2562 !!adev->gmc.noretry);
2563 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2564 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2566 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2567 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2568 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2569 !!adev->gmc.noretry);
2570 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2571 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2572 (adev->gmc.private_aperture_start >> 48));
2573 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2574 (adev->gmc.shared_aperture_start >> 48));
2575 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2578 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2580 mutex_unlock(&adev->srbm_mutex);
2582 gfx_v9_0_init_compute_vmid(adev);
2583 gfx_v9_0_init_gds_vmid(adev);
2584 gfx_v9_0_init_sq_config(adev);
2587 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2592 mutex_lock(&adev->grbm_idx_mutex);
2593 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2594 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2595 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2596 for (k = 0; k < adev->usec_timeout; k++) {
2597 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2601 if (k == adev->usec_timeout) {
2602 amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2603 0xffffffff, 0xffffffff, 0);
2604 mutex_unlock(&adev->grbm_idx_mutex);
2605 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2611 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2612 mutex_unlock(&adev->grbm_idx_mutex);
2614 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2615 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2616 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2617 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2618 for (k = 0; k < adev->usec_timeout; k++) {
2619 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2625 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2630 /* These interrupts should be enabled to drive DS clock */
2632 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2634 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2635 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2636 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2637 if(adev->gfx.num_gfx_rings)
2638 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2640 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2643 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2645 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2647 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2648 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2649 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2650 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2651 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2652 adev->gfx.rlc.clear_state_size);
2655 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2656 int indirect_offset,
2658 int *unique_indirect_regs,
2659 int unique_indirect_reg_count,
2660 int *indirect_start_offsets,
2661 int *indirect_start_offsets_count,
2662 int max_start_offsets_count)
2666 for (; indirect_offset < list_size; indirect_offset++) {
2667 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2668 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2669 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2671 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2672 indirect_offset += 2;
2674 /* look for the matching indice */
2675 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2676 if (unique_indirect_regs[idx] ==
2677 register_list_format[indirect_offset] ||
2678 !unique_indirect_regs[idx])
2682 BUG_ON(idx >= unique_indirect_reg_count);
2684 if (!unique_indirect_regs[idx])
2685 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2692 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2694 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2695 int unique_indirect_reg_count = 0;
2697 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2698 int indirect_start_offsets_count = 0;
2704 u32 *register_list_format =
2705 kmemdup(adev->gfx.rlc.register_list_format,
2706 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2707 if (!register_list_format)
2710 /* setup unique_indirect_regs array and indirect_start_offsets array */
2711 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2712 gfx_v9_1_parse_ind_reg_list(register_list_format,
2713 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2714 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2715 unique_indirect_regs,
2716 unique_indirect_reg_count,
2717 indirect_start_offsets,
2718 &indirect_start_offsets_count,
2719 ARRAY_SIZE(indirect_start_offsets));
2721 /* enable auto inc in case it is disabled */
2722 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2723 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2724 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2726 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2727 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2728 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2729 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2730 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2731 adev->gfx.rlc.register_restore[i]);
2733 /* load indirect register */
2734 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2735 adev->gfx.rlc.reg_list_format_start);
2737 /* direct register portion */
2738 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2739 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2740 register_list_format[i]);
2742 /* indirect register portion */
2743 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2744 if (register_list_format[i] == 0xFFFFFFFF) {
2745 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2749 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2750 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2752 for (j = 0; j < unique_indirect_reg_count; j++) {
2753 if (register_list_format[i] == unique_indirect_regs[j]) {
2754 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2759 BUG_ON(j >= unique_indirect_reg_count);
2764 /* set save/restore list size */
2765 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2766 list_size = list_size >> 1;
2767 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2768 adev->gfx.rlc.reg_restore_list_size);
2769 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2771 /* write the starting offsets to RLC scratch ram */
2772 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2773 adev->gfx.rlc.starting_offsets_start);
2774 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2775 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2776 indirect_start_offsets[i]);
2778 /* load unique indirect regs*/
2779 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2780 if (unique_indirect_regs[i] != 0) {
2781 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2782 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2783 unique_indirect_regs[i] & 0x3FFFF);
2785 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2786 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2787 unique_indirect_regs[i] >> 20);
2791 kfree(register_list_format);
2795 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2797 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2800 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2804 uint32_t default_data = 0;
2806 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2808 /* enable GFXIP control over CGPG */
2809 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2810 if(default_data != data)
2811 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2814 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2815 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2816 if(default_data != data)
2817 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2819 /* restore GFXIP control over GCPG */
2820 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2821 if(default_data != data)
2822 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2826 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2830 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2831 AMD_PG_SUPPORT_GFX_SMG |
2832 AMD_PG_SUPPORT_GFX_DMG)) {
2833 /* init IDLE_POLL_COUNT = 60 */
2834 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2835 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2836 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2837 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2839 /* init RLC PG Delay */
2841 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2842 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2843 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2844 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2845 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2847 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2848 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2849 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2850 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2852 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2853 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2854 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2855 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2857 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2858 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2860 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2861 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2862 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2863 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2864 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2868 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2872 uint32_t default_data = 0;
2874 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2875 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2876 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2878 if (default_data != data)
2879 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2882 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2886 uint32_t default_data = 0;
2888 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2889 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2890 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2892 if(default_data != data)
2893 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2896 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2900 uint32_t default_data = 0;
2902 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2903 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2906 if(default_data != data)
2907 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2910 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2913 uint32_t data, default_data;
2915 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2916 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2917 GFX_POWER_GATING_ENABLE,
2919 if(default_data != data)
2920 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2923 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2926 uint32_t data, default_data;
2928 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2929 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2930 GFX_PIPELINE_PG_ENABLE,
2932 if(default_data != data)
2933 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2936 /* read any GFX register to wake up GFX */
2937 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2940 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2943 uint32_t data, default_data;
2945 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2946 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2947 STATIC_PER_CU_PG_ENABLE,
2949 if(default_data != data)
2950 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2953 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2956 uint32_t data, default_data;
2958 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2959 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2960 DYN_PER_CU_PG_ENABLE,
2962 if(default_data != data)
2963 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2966 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2968 gfx_v9_0_init_csb(adev);
2971 * Rlc save restore list is workable since v2_1.
2972 * And it's needed by gfxoff feature.
2974 if (adev->gfx.rlc.is_rlc_v2_1) {
2975 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
2976 IP_VERSION(9, 2, 1) ||
2977 (adev->apu_flags & AMD_APU_IS_RAVEN2))
2978 gfx_v9_1_init_rlc_save_restore_list(adev);
2979 gfx_v9_0_enable_save_restore_machine(adev);
2982 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2983 AMD_PG_SUPPORT_GFX_SMG |
2984 AMD_PG_SUPPORT_GFX_DMG |
2986 AMD_PG_SUPPORT_GDS |
2987 AMD_PG_SUPPORT_RLC_SMU_HS)) {
2988 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2989 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2990 gfx_v9_0_init_gfx_power_gating(adev);
2994 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2996 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2997 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2998 gfx_v9_0_wait_for_rlc_serdes(adev);
3001 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3003 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3005 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3009 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3011 #ifdef AMDGPU_RLC_DEBUG_RETRY
3015 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3018 /* carrizo do enable cp interrupt after cp inited */
3019 if (!(adev->flags & AMD_IS_APU)) {
3020 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3024 #ifdef AMDGPU_RLC_DEBUG_RETRY
3025 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3026 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3027 if(rlc_ucode_ver == 0x108) {
3028 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3029 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3030 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3031 * default is 0x9C4 to create a 100us interval */
3032 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3033 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3034 * to disable the page fault retry interrupts, default is
3036 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3041 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3043 const struct rlc_firmware_header_v2_0 *hdr;
3044 const __le32 *fw_data;
3045 unsigned i, fw_size;
3047 if (!adev->gfx.rlc_fw)
3050 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3051 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3053 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3054 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3055 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3057 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3058 RLCG_UCODE_LOADING_START_ADDRESS);
3059 for (i = 0; i < fw_size; i++)
3060 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3061 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3066 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3070 if (amdgpu_sriov_vf(adev)) {
3071 gfx_v9_0_init_csb(adev);
3075 adev->gfx.rlc.funcs->stop(adev);
3078 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3080 gfx_v9_0_init_pg(adev);
3082 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3083 /* legacy rlc firmware loading */
3084 r = gfx_v9_0_rlc_load_microcode(adev);
3089 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3090 case IP_VERSION(9, 2, 2):
3091 case IP_VERSION(9, 1, 0):
3092 gfx_v9_0_init_lbpw(adev);
3093 if (amdgpu_lbpw == 0)
3094 gfx_v9_0_enable_lbpw(adev, false);
3096 gfx_v9_0_enable_lbpw(adev, true);
3098 case IP_VERSION(9, 4, 0):
3099 gfx_v9_4_init_lbpw(adev);
3100 if (amdgpu_lbpw > 0)
3101 gfx_v9_0_enable_lbpw(adev, true);
3103 gfx_v9_0_enable_lbpw(adev, false);
3109 gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3111 adev->gfx.rlc.funcs->start(adev);
3116 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3118 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3120 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3121 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3122 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3123 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3127 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3129 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3130 const struct gfx_firmware_header_v1_0 *ce_hdr;
3131 const struct gfx_firmware_header_v1_0 *me_hdr;
3132 const __le32 *fw_data;
3133 unsigned i, fw_size;
3135 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3138 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3139 adev->gfx.pfp_fw->data;
3140 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3141 adev->gfx.ce_fw->data;
3142 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3143 adev->gfx.me_fw->data;
3145 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3146 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3147 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3149 gfx_v9_0_cp_gfx_enable(adev, false);
3152 fw_data = (const __le32 *)
3153 (adev->gfx.pfp_fw->data +
3154 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3155 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3156 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3157 for (i = 0; i < fw_size; i++)
3158 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3159 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3162 fw_data = (const __le32 *)
3163 (adev->gfx.ce_fw->data +
3164 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3165 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3166 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3167 for (i = 0; i < fw_size; i++)
3168 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3169 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3172 fw_data = (const __le32 *)
3173 (adev->gfx.me_fw->data +
3174 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3175 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3176 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3177 for (i = 0; i < fw_size; i++)
3178 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3179 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3184 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3186 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3187 const struct cs_section_def *sect = NULL;
3188 const struct cs_extent_def *ext = NULL;
3192 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3193 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3195 gfx_v9_0_cp_gfx_enable(adev, true);
3197 /* Now only limit the quirk on the APU gfx9 series and already
3198 * confirmed that the APU gfx10/gfx11 needn't such update.
3200 if (adev->flags & AMD_IS_APU &&
3201 adev->in_s3 && !adev->suspend_complete) {
3202 DRM_INFO(" Will skip the CSB packet resubmit\n");
3205 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3207 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3211 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3212 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3214 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3215 amdgpu_ring_write(ring, 0x80000000);
3216 amdgpu_ring_write(ring, 0x80000000);
3218 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3219 for (ext = sect->section; ext->extent != NULL; ++ext) {
3220 if (sect->id == SECT_CONTEXT) {
3221 amdgpu_ring_write(ring,
3222 PACKET3(PACKET3_SET_CONTEXT_REG,
3224 amdgpu_ring_write(ring,
3225 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3226 for (i = 0; i < ext->reg_count; i++)
3227 amdgpu_ring_write(ring, ext->extent[i]);
3232 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3233 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3235 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3236 amdgpu_ring_write(ring, 0);
3238 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3239 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3240 amdgpu_ring_write(ring, 0x8000);
3241 amdgpu_ring_write(ring, 0x8000);
3243 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3244 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3245 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3246 amdgpu_ring_write(ring, tmp);
3247 amdgpu_ring_write(ring, 0);
3249 amdgpu_ring_commit(ring);
3254 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3256 struct amdgpu_ring *ring;
3259 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3261 /* Set the write pointer delay */
3262 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3264 /* set the RB to use vmid 0 */
3265 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3267 /* Set ring buffer size */
3268 ring = &adev->gfx.gfx_ring[0];
3269 rb_bufsz = order_base_2(ring->ring_size / 8);
3270 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3271 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3273 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3275 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3277 /* Initialize the ring buffer's write pointers */
3279 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3280 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3282 /* set the wb address wether it's enabled or not */
3283 rptr_addr = ring->rptr_gpu_addr;
3284 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3285 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3287 wptr_gpu_addr = ring->wptr_gpu_addr;
3288 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3289 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3292 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3294 rb_addr = ring->gpu_addr >> 8;
3295 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3296 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3298 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3299 if (ring->use_doorbell) {
3300 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3301 DOORBELL_OFFSET, ring->doorbell_index);
3302 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3305 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3307 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3309 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3310 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3311 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3313 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3314 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3317 /* start the ring */
3318 gfx_v9_0_cp_gfx_start(adev);
3323 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3326 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3328 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3329 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3330 adev->gfx.kiq[0].ring.sched.ready = false;
3335 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3337 const struct gfx_firmware_header_v1_0 *mec_hdr;
3338 const __le32 *fw_data;
3342 if (!adev->gfx.mec_fw)
3345 gfx_v9_0_cp_compute_enable(adev, false);
3347 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3348 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3350 fw_data = (const __le32 *)
3351 (adev->gfx.mec_fw->data +
3352 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3354 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3355 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3356 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3358 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3359 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3360 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3361 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3364 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3365 mec_hdr->jt_offset);
3366 for (i = 0; i < mec_hdr->jt_size; i++)
3367 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3368 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3370 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3371 adev->gfx.mec_fw_version);
3372 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3378 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3381 struct amdgpu_device *adev = ring->adev;
3383 /* tell RLC which is KIQ queue */
3384 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3386 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3387 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3389 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3392 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3394 struct amdgpu_device *adev = ring->adev;
3396 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3397 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3398 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3399 mqd->cp_hqd_queue_priority =
3400 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3405 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3407 struct amdgpu_device *adev = ring->adev;
3408 struct v9_mqd *mqd = ring->mqd_ptr;
3409 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3412 mqd->header = 0xC0310800;
3413 mqd->compute_pipelinestat_enable = 0x00000001;
3414 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3415 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3416 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3417 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3418 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3419 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3420 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3421 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3422 mqd->compute_misc_reserved = 0x00000003;
3424 mqd->dynamic_cu_mask_addr_lo =
3425 lower_32_bits(ring->mqd_gpu_addr
3426 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3427 mqd->dynamic_cu_mask_addr_hi =
3428 upper_32_bits(ring->mqd_gpu_addr
3429 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3431 eop_base_addr = ring->eop_gpu_addr >> 8;
3432 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3433 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3435 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3436 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3437 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3438 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3440 mqd->cp_hqd_eop_control = tmp;
3442 /* enable doorbell? */
3443 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3445 if (ring->use_doorbell) {
3446 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3447 DOORBELL_OFFSET, ring->doorbell_index);
3448 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3450 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3451 DOORBELL_SOURCE, 0);
3452 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3455 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3459 mqd->cp_hqd_pq_doorbell_control = tmp;
3461 /* disable the queue if it's active */
3463 mqd->cp_hqd_dequeue_request = 0;
3464 mqd->cp_hqd_pq_rptr = 0;
3465 mqd->cp_hqd_pq_wptr_lo = 0;
3466 mqd->cp_hqd_pq_wptr_hi = 0;
3468 /* set the pointer to the MQD */
3469 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3470 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3472 /* set MQD vmid to 0 */
3473 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3474 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3475 mqd->cp_mqd_control = tmp;
3477 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3478 hqd_gpu_addr = ring->gpu_addr >> 8;
3479 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3480 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3482 /* set up the HQD, this is similar to CP_RB0_CNTL */
3483 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3484 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3485 (order_base_2(ring->ring_size / 4) - 1));
3486 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3487 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3489 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3491 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3492 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3493 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3494 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3495 mqd->cp_hqd_pq_control = tmp;
3497 /* set the wb address whether it's enabled or not */
3498 wb_gpu_addr = ring->rptr_gpu_addr;
3499 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3500 mqd->cp_hqd_pq_rptr_report_addr_hi =
3501 upper_32_bits(wb_gpu_addr) & 0xffff;
3503 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3504 wb_gpu_addr = ring->wptr_gpu_addr;
3505 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3506 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3508 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3510 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3512 /* set the vmid for the queue */
3513 mqd->cp_hqd_vmid = 0;
3515 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3516 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3517 mqd->cp_hqd_persistent_state = tmp;
3519 /* set MIN_IB_AVAIL_SIZE */
3520 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3521 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3522 mqd->cp_hqd_ib_control = tmp;
3524 /* set static priority for a queue/ring */
3525 gfx_v9_0_mqd_set_priority(ring, mqd);
3526 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3528 /* map_queues packet doesn't need activate the queue,
3529 * so only kiq need set this field.
3531 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3532 mqd->cp_hqd_active = 1;
3537 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3539 struct amdgpu_device *adev = ring->adev;
3540 struct v9_mqd *mqd = ring->mqd_ptr;
3543 /* disable wptr polling */
3544 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3546 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3547 mqd->cp_hqd_eop_base_addr_lo);
3548 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3549 mqd->cp_hqd_eop_base_addr_hi);
3551 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3552 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3553 mqd->cp_hqd_eop_control);
3555 /* enable doorbell? */
3556 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3557 mqd->cp_hqd_pq_doorbell_control);
3559 /* disable the queue if it's active */
3560 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3561 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3562 for (j = 0; j < adev->usec_timeout; j++) {
3563 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3567 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3568 mqd->cp_hqd_dequeue_request);
3569 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3570 mqd->cp_hqd_pq_rptr);
3571 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3572 mqd->cp_hqd_pq_wptr_lo);
3573 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3574 mqd->cp_hqd_pq_wptr_hi);
3577 /* set the pointer to the MQD */
3578 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3579 mqd->cp_mqd_base_addr_lo);
3580 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3581 mqd->cp_mqd_base_addr_hi);
3583 /* set MQD vmid to 0 */
3584 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3585 mqd->cp_mqd_control);
3587 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3588 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3589 mqd->cp_hqd_pq_base_lo);
3590 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3591 mqd->cp_hqd_pq_base_hi);
3593 /* set up the HQD, this is similar to CP_RB0_CNTL */
3594 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3595 mqd->cp_hqd_pq_control);
3597 /* set the wb address whether it's enabled or not */
3598 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3599 mqd->cp_hqd_pq_rptr_report_addr_lo);
3600 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3601 mqd->cp_hqd_pq_rptr_report_addr_hi);
3603 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3604 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3605 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3606 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3607 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3609 /* enable the doorbell if requested */
3610 if (ring->use_doorbell) {
3611 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3612 (adev->doorbell_index.kiq * 2) << 2);
3613 /* If GC has entered CGPG, ringing doorbell > first page
3614 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3615 * workaround this issue. And this change has to align with firmware
3618 if (check_if_enlarge_doorbell_range(adev))
3619 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3620 (adev->doorbell.size - 4));
3622 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3623 (adev->doorbell_index.userqueue_end * 2) << 2);
3626 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3627 mqd->cp_hqd_pq_doorbell_control);
3629 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3630 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3631 mqd->cp_hqd_pq_wptr_lo);
3632 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3633 mqd->cp_hqd_pq_wptr_hi);
3635 /* set the vmid for the queue */
3636 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3638 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3639 mqd->cp_hqd_persistent_state);
3641 /* activate the queue */
3642 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3643 mqd->cp_hqd_active);
3645 if (ring->use_doorbell)
3646 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3651 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3653 struct amdgpu_device *adev = ring->adev;
3656 /* disable the queue if it's active */
3657 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3659 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3661 for (j = 0; j < adev->usec_timeout; j++) {
3662 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3667 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3668 DRM_DEBUG("KIQ dequeue request failed.\n");
3670 /* Manual disable if dequeue request times out */
3671 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3674 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3678 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3679 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3680 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3681 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3682 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3683 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3684 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3685 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3690 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3692 struct amdgpu_device *adev = ring->adev;
3693 struct v9_mqd *mqd = ring->mqd_ptr;
3694 struct v9_mqd *tmp_mqd;
3696 gfx_v9_0_kiq_setting(ring);
3698 /* GPU could be in bad state during probe, driver trigger the reset
3699 * after load the SMU, in this case , the mqd is not be initialized.
3700 * driver need to re-init the mqd.
3701 * check mqd->cp_hqd_pq_control since this value should not be 0
3703 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3704 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3705 /* for GPU_RESET case , reset MQD to a clean status */
3706 if (adev->gfx.kiq[0].mqd_backup)
3707 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3709 /* reset ring buffer */
3711 amdgpu_ring_clear_ring(ring);
3713 mutex_lock(&adev->srbm_mutex);
3714 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3715 gfx_v9_0_kiq_init_register(ring);
3716 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3717 mutex_unlock(&adev->srbm_mutex);
3719 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3720 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3721 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3722 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3723 amdgpu_ring_clear_ring(ring);
3724 mutex_lock(&adev->srbm_mutex);
3725 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3726 gfx_v9_0_mqd_init(ring);
3727 gfx_v9_0_kiq_init_register(ring);
3728 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3729 mutex_unlock(&adev->srbm_mutex);
3731 if (adev->gfx.kiq[0].mqd_backup)
3732 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3738 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3740 struct amdgpu_device *adev = ring->adev;
3741 struct v9_mqd *mqd = ring->mqd_ptr;
3742 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3743 struct v9_mqd *tmp_mqd;
3745 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3746 * is not be initialized before
3748 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3750 if (!tmp_mqd->cp_hqd_pq_control ||
3751 (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3752 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3753 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3754 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3755 mutex_lock(&adev->srbm_mutex);
3756 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3757 gfx_v9_0_mqd_init(ring);
3758 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3759 mutex_unlock(&adev->srbm_mutex);
3761 if (adev->gfx.mec.mqd_backup[mqd_idx])
3762 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3764 /* restore MQD to a clean status */
3765 if (adev->gfx.mec.mqd_backup[mqd_idx])
3766 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3767 /* reset ring buffer */
3769 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3770 amdgpu_ring_clear_ring(ring);
3776 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3778 struct amdgpu_ring *ring;
3781 ring = &adev->gfx.kiq[0].ring;
3783 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3784 if (unlikely(r != 0))
3787 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3788 if (unlikely(r != 0)) {
3789 amdgpu_bo_unreserve(ring->mqd_obj);
3793 gfx_v9_0_kiq_init_queue(ring);
3794 amdgpu_bo_kunmap(ring->mqd_obj);
3795 ring->mqd_ptr = NULL;
3796 amdgpu_bo_unreserve(ring->mqd_obj);
3800 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3802 struct amdgpu_ring *ring = NULL;
3805 gfx_v9_0_cp_compute_enable(adev, true);
3807 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3808 ring = &adev->gfx.compute_ring[i];
3810 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3811 if (unlikely(r != 0))
3813 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3815 r = gfx_v9_0_kcq_init_queue(ring);
3816 amdgpu_bo_kunmap(ring->mqd_obj);
3817 ring->mqd_ptr = NULL;
3819 amdgpu_bo_unreserve(ring->mqd_obj);
3824 r = amdgpu_gfx_enable_kcq(adev, 0);
3829 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3832 struct amdgpu_ring *ring;
3834 if (!(adev->flags & AMD_IS_APU))
3835 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3837 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3838 if (adev->gfx.num_gfx_rings) {
3839 /* legacy firmware loading */
3840 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3845 r = gfx_v9_0_cp_compute_load_microcode(adev);
3850 r = gfx_v9_0_kiq_resume(adev);
3854 if (adev->gfx.num_gfx_rings) {
3855 r = gfx_v9_0_cp_gfx_resume(adev);
3860 r = gfx_v9_0_kcq_resume(adev);
3864 if (adev->gfx.num_gfx_rings) {
3865 ring = &adev->gfx.gfx_ring[0];
3866 r = amdgpu_ring_test_helper(ring);
3871 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3872 ring = &adev->gfx.compute_ring[i];
3873 amdgpu_ring_test_helper(ring);
3876 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3881 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3885 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3886 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3889 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3890 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3891 adev->df.hash_status.hash_64k);
3892 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3893 adev->df.hash_status.hash_2m);
3894 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3895 adev->df.hash_status.hash_1g);
3896 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3899 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3901 if (adev->gfx.num_gfx_rings)
3902 gfx_v9_0_cp_gfx_enable(adev, enable);
3903 gfx_v9_0_cp_compute_enable(adev, enable);
3906 static int gfx_v9_0_hw_init(void *handle)
3909 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3911 if (!amdgpu_sriov_vf(adev))
3912 gfx_v9_0_init_golden_registers(adev);
3914 gfx_v9_0_constants_init(adev);
3916 gfx_v9_0_init_tcp_config(adev);
3918 r = adev->gfx.rlc.funcs->resume(adev);
3922 r = gfx_v9_0_cp_resume(adev);
3926 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
3927 gfx_v9_4_2_set_power_brake_sequence(adev);
3932 static int gfx_v9_0_hw_fini(void *handle)
3934 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3936 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3937 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3938 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3939 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3941 /* DF freeze and kcq disable will fail */
3942 if (!amdgpu_ras_intr_triggered())
3943 /* disable KCQ to avoid CPC touch memory not valid anymore */
3944 amdgpu_gfx_disable_kcq(adev, 0);
3946 if (amdgpu_sriov_vf(adev)) {
3947 gfx_v9_0_cp_gfx_enable(adev, false);
3948 /* must disable polling for SRIOV when hw finished, otherwise
3949 * CPC engine may still keep fetching WB address which is already
3950 * invalid after sw finished and trigger DMAR reading error in
3953 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3957 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3958 * otherwise KIQ is hanging when binding back
3960 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3961 mutex_lock(&adev->srbm_mutex);
3962 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
3963 adev->gfx.kiq[0].ring.pipe,
3964 adev->gfx.kiq[0].ring.queue, 0, 0);
3965 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
3966 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3967 mutex_unlock(&adev->srbm_mutex);
3970 gfx_v9_0_cp_enable(adev, false);
3972 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3973 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3974 (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
3975 dev_dbg(adev->dev, "Skipping RLC halt\n");
3979 adev->gfx.rlc.funcs->stop(adev);
3983 static int gfx_v9_0_suspend(void *handle)
3985 return gfx_v9_0_hw_fini(handle);
3988 static int gfx_v9_0_resume(void *handle)
3990 return gfx_v9_0_hw_init(handle);
3993 static bool gfx_v9_0_is_idle(void *handle)
3995 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3997 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3998 GRBM_STATUS, GUI_ACTIVE))
4004 static int gfx_v9_0_wait_for_idle(void *handle)
4007 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4009 for (i = 0; i < adev->usec_timeout; i++) {
4010 if (gfx_v9_0_is_idle(handle))
4017 static int gfx_v9_0_soft_reset(void *handle)
4019 u32 grbm_soft_reset = 0;
4021 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4024 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4025 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4026 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4027 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4028 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4029 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4030 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4031 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4032 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4033 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4034 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4037 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4038 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4039 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4043 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4044 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4045 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4046 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4049 if (grbm_soft_reset) {
4051 adev->gfx.rlc.funcs->stop(adev);
4053 if (adev->gfx.num_gfx_rings)
4054 /* Disable GFX parsing/prefetching */
4055 gfx_v9_0_cp_gfx_enable(adev, false);
4057 /* Disable MEC parsing/prefetching */
4058 gfx_v9_0_cp_compute_enable(adev, false);
4060 if (grbm_soft_reset) {
4061 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4062 tmp |= grbm_soft_reset;
4063 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4064 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4065 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4069 tmp &= ~grbm_soft_reset;
4070 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4071 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4074 /* Wait a little for things to settle down */
4080 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4082 signed long r, cnt = 0;
4083 unsigned long flags;
4084 uint32_t seq, reg_val_offs = 0;
4086 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4087 struct amdgpu_ring *ring = &kiq->ring;
4089 BUG_ON(!ring->funcs->emit_rreg);
4091 spin_lock_irqsave(&kiq->ring_lock, flags);
4092 if (amdgpu_device_wb_get(adev, ®_val_offs)) {
4093 pr_err("critical bug! too many kiq readers\n");
4096 amdgpu_ring_alloc(ring, 32);
4097 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4098 amdgpu_ring_write(ring, 9 | /* src: register*/
4099 (5 << 8) | /* dst: memory */
4100 (1 << 16) | /* count sel */
4101 (1 << 20)); /* write confirm */
4102 amdgpu_ring_write(ring, 0);
4103 amdgpu_ring_write(ring, 0);
4104 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4106 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4108 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4112 amdgpu_ring_commit(ring);
4113 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4115 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4117 /* don't wait anymore for gpu reset case because this way may
4118 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4119 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4120 * never return if we keep waiting in virt_kiq_rreg, which cause
4121 * gpu_recover() hang there.
4123 * also don't wait anymore for IRQ context
4125 if (r < 1 && (amdgpu_in_reset(adev)))
4126 goto failed_kiq_read;
4129 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4130 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4131 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4134 if (cnt > MAX_KIQ_REG_TRY)
4135 goto failed_kiq_read;
4138 value = (uint64_t)adev->wb.wb[reg_val_offs] |
4139 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4140 amdgpu_device_wb_free(adev, reg_val_offs);
4144 amdgpu_ring_undo(ring);
4146 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4149 amdgpu_device_wb_free(adev, reg_val_offs);
4150 pr_err("failed to read gpu clock\n");
4154 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4156 uint64_t clock, clock_lo, clock_hi, hi_check;
4158 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4159 case IP_VERSION(9, 3, 0):
4161 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4162 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4163 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4164 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4165 * roughly every 42 seconds.
4167 if (hi_check != clock_hi) {
4168 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4169 clock_hi = hi_check;
4172 clock = clock_lo | (clock_hi << 32ULL);
4175 amdgpu_gfx_off_ctrl(adev, false);
4176 mutex_lock(&adev->gfx.gpu_clock_mutex);
4177 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4178 IP_VERSION(9, 0, 1) &&
4179 amdgpu_sriov_runtime(adev)) {
4180 clock = gfx_v9_0_kiq_read_clock(adev);
4182 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4183 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4184 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4186 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4187 amdgpu_gfx_off_ctrl(adev, true);
4193 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4195 uint32_t gds_base, uint32_t gds_size,
4196 uint32_t gws_base, uint32_t gws_size,
4197 uint32_t oa_base, uint32_t oa_size)
4199 struct amdgpu_device *adev = ring->adev;
4202 gfx_v9_0_write_data_to_reg(ring, 0, false,
4203 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4207 gfx_v9_0_write_data_to_reg(ring, 0, false,
4208 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4212 gfx_v9_0_write_data_to_reg(ring, 0, false,
4213 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4214 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4217 gfx_v9_0_write_data_to_reg(ring, 0, false,
4218 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4219 (1 << (oa_size + oa_base)) - (1 << oa_base));
4222 static const u32 vgpr_init_compute_shader[] =
4224 0xb07c0000, 0xbe8000ff,
4225 0x000000f8, 0xbf110800,
4226 0x7e000280, 0x7e020280,
4227 0x7e040280, 0x7e060280,
4228 0x7e080280, 0x7e0a0280,
4229 0x7e0c0280, 0x7e0e0280,
4230 0x80808800, 0xbe803200,
4231 0xbf84fff5, 0xbf9c0000,
4232 0xd28c0001, 0x0001007f,
4233 0xd28d0001, 0x0002027e,
4234 0x10020288, 0xb8810904,
4235 0xb7814000, 0xd1196a01,
4236 0x00000301, 0xbe800087,
4237 0xbefc00c1, 0xd89c4000,
4238 0x00020201, 0xd89cc080,
4239 0x00040401, 0x320202ff,
4240 0x00000800, 0x80808100,
4241 0xbf84fff8, 0x7e020280,
4242 0xbf810000, 0x00000000,
4245 static const u32 sgpr_init_compute_shader[] =
4247 0xb07c0000, 0xbe8000ff,
4248 0x0000005f, 0xbee50080,
4249 0xbe812c65, 0xbe822c65,
4250 0xbe832c65, 0xbe842c65,
4251 0xbe852c65, 0xb77c0005,
4252 0x80808500, 0xbf84fff8,
4253 0xbe800080, 0xbf810000,
4256 static const u32 vgpr_init_compute_shader_arcturus[] = {
4257 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4258 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4259 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4260 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4261 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4262 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4263 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4264 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4265 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4266 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4267 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4268 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4269 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4270 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4271 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4272 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4273 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4274 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4275 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4276 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4277 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4278 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4279 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4280 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4281 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4282 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4283 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4284 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4285 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4286 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4287 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4288 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4289 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4290 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4291 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4292 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4293 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4294 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4295 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4296 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4297 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4298 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4299 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4300 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4301 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4302 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4303 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4304 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4305 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4306 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4307 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4308 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4309 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4310 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4311 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4312 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4313 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4314 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4315 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4316 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4317 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4318 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4319 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4320 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4321 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4322 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4323 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4324 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4325 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4326 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4327 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4328 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4329 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4330 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4331 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4332 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4333 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4334 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4335 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4336 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4337 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4338 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4339 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4340 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4341 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4342 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4343 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4344 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4345 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4346 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4347 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4348 0xbf84fff8, 0xbf810000,
4351 /* When below register arrays changed, please update gpr_reg_size,
4352 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4353 to cover all gfx9 ASICs */
4354 static const struct soc15_reg_entry vgpr_init_regs[] = {
4355 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4356 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4357 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4358 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4359 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4360 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4361 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4362 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4363 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4364 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4365 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4366 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4367 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4368 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4371 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4372 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4373 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4374 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4375 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4376 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4377 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4378 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4379 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4380 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4381 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4382 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4383 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4384 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4385 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4388 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4389 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4390 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4391 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4392 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4393 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4394 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4395 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4396 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4397 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4398 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4399 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4400 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4401 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4402 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4405 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4406 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4407 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4408 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4409 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4410 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4411 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4412 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4413 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4414 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4415 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4416 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4417 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4418 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4419 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4422 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4423 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4424 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4425 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4426 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4427 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4428 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4429 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4430 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4431 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4432 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4433 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4434 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4435 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4436 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4437 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4438 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4439 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4440 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4441 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4442 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4443 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4444 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4445 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4446 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4447 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4448 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4449 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4450 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4451 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4452 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4453 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4454 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4455 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4458 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4460 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4463 /* only support when RAS is enabled */
4464 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4467 r = amdgpu_ring_alloc(ring, 7);
4469 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4474 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4475 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4477 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4478 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4479 PACKET3_DMA_DATA_DST_SEL(1) |
4480 PACKET3_DMA_DATA_SRC_SEL(2) |
4481 PACKET3_DMA_DATA_ENGINE(0)));
4482 amdgpu_ring_write(ring, 0);
4483 amdgpu_ring_write(ring, 0);
4484 amdgpu_ring_write(ring, 0);
4485 amdgpu_ring_write(ring, 0);
4486 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4487 adev->gds.gds_size);
4489 amdgpu_ring_commit(ring);
4491 for (i = 0; i < adev->usec_timeout; i++) {
4492 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4497 if (i >= adev->usec_timeout)
4500 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4505 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4507 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4508 struct amdgpu_ib ib;
4509 struct dma_fence *f = NULL;
4511 unsigned total_size, vgpr_offset, sgpr_offset;
4514 int compute_dim_x = adev->gfx.config.max_shader_engines *
4515 adev->gfx.config.max_cu_per_sh *
4516 adev->gfx.config.max_sh_per_se;
4517 int sgpr_work_group_size = 5;
4518 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4519 int vgpr_init_shader_size;
4520 const u32 *vgpr_init_shader_ptr;
4521 const struct soc15_reg_entry *vgpr_init_regs_ptr;
4523 /* only support when RAS is enabled */
4524 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4527 /* bail if the compute ring is not ready */
4528 if (!ring->sched.ready)
4531 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4532 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4533 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4534 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4536 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4537 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4538 vgpr_init_regs_ptr = vgpr_init_regs;
4542 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4544 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4546 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4547 total_size = ALIGN(total_size, 256);
4548 vgpr_offset = total_size;
4549 total_size += ALIGN(vgpr_init_shader_size, 256);
4550 sgpr_offset = total_size;
4551 total_size += sizeof(sgpr_init_compute_shader);
4553 /* allocate an indirect buffer to put the commands in */
4554 memset(&ib, 0, sizeof(ib));
4555 r = amdgpu_ib_get(adev, NULL, total_size,
4556 AMDGPU_IB_POOL_DIRECT, &ib);
4558 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4562 /* load the compute shaders */
4563 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4564 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4566 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4567 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4569 /* init the ib length to 0 */
4573 /* write the register state for the compute dispatch */
4574 for (i = 0; i < gpr_reg_size; i++) {
4575 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4576 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4577 - PACKET3_SET_SH_REG_START;
4578 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4580 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4581 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4582 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4583 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4584 - PACKET3_SET_SH_REG_START;
4585 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4586 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4588 /* write dispatch packet */
4589 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4590 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4591 ib.ptr[ib.length_dw++] = 1; /* y */
4592 ib.ptr[ib.length_dw++] = 1; /* z */
4593 ib.ptr[ib.length_dw++] =
4594 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4596 /* write CS partial flush packet */
4597 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4598 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4601 /* write the register state for the compute dispatch */
4602 for (i = 0; i < gpr_reg_size; i++) {
4603 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4604 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4605 - PACKET3_SET_SH_REG_START;
4606 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4608 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4609 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4610 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4611 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4612 - PACKET3_SET_SH_REG_START;
4613 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4614 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4616 /* write dispatch packet */
4617 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4618 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4619 ib.ptr[ib.length_dw++] = 1; /* y */
4620 ib.ptr[ib.length_dw++] = 1; /* z */
4621 ib.ptr[ib.length_dw++] =
4622 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4624 /* write CS partial flush packet */
4625 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4626 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4629 /* write the register state for the compute dispatch */
4630 for (i = 0; i < gpr_reg_size; i++) {
4631 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4632 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4633 - PACKET3_SET_SH_REG_START;
4634 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4636 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4637 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4638 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4639 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4640 - PACKET3_SET_SH_REG_START;
4641 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4642 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4644 /* write dispatch packet */
4645 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4646 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4647 ib.ptr[ib.length_dw++] = 1; /* y */
4648 ib.ptr[ib.length_dw++] = 1; /* z */
4649 ib.ptr[ib.length_dw++] =
4650 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4652 /* write CS partial flush packet */
4653 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4654 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4656 /* shedule the ib on the ring */
4657 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4659 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4663 /* wait for the GPU to finish processing the IB */
4664 r = dma_fence_wait(f, false);
4666 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4671 amdgpu_ib_free(adev, &ib, NULL);
4677 static int gfx_v9_0_early_init(void *handle)
4679 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4681 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4683 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4684 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4685 adev->gfx.num_gfx_rings = 0;
4687 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4688 adev->gfx.xcc_mask = 1;
4689 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4690 AMDGPU_MAX_COMPUTE_RINGS);
4691 gfx_v9_0_set_kiq_pm4_funcs(adev);
4692 gfx_v9_0_set_ring_funcs(adev);
4693 gfx_v9_0_set_irq_funcs(adev);
4694 gfx_v9_0_set_gds_init(adev);
4695 gfx_v9_0_set_rlc_funcs(adev);
4697 /* init rlcg reg access ctrl */
4698 gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4700 return gfx_v9_0_init_microcode(adev);
4703 static int gfx_v9_0_ecc_late_init(void *handle)
4705 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4709 * Temp workaround to fix the issue that CP firmware fails to
4710 * update read pointer when CPDMA is writing clearing operation
4711 * to GDS in suspend/resume sequence on several cards. So just
4712 * limit this operation in cold boot sequence.
4714 if ((!adev->in_suspend) &&
4715 (adev->gds.gds_size)) {
4716 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4721 /* requires IBs so do in late init after IB pool is initialized */
4722 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4723 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4725 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4730 if (adev->gfx.ras &&
4731 adev->gfx.ras->enable_watchdog_timer)
4732 adev->gfx.ras->enable_watchdog_timer(adev);
4737 static int gfx_v9_0_late_init(void *handle)
4739 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4742 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4746 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4750 r = gfx_v9_0_ecc_late_init(handle);
4754 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4755 gfx_v9_4_2_debug_trap_config_init(adev,
4756 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4758 gfx_v9_0_debug_trap_config_init(adev,
4759 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4764 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4766 uint32_t rlc_setting;
4768 /* if RLC is not enabled, do nothing */
4769 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4770 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4776 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4781 data = RLC_SAFE_MODE__CMD_MASK;
4782 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4783 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4785 /* wait for RLC_SAFE_MODE */
4786 for (i = 0; i < adev->usec_timeout; i++) {
4787 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4793 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4797 data = RLC_SAFE_MODE__CMD_MASK;
4798 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4801 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4804 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4806 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4807 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4808 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4809 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4811 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4812 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4813 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4816 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4819 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4822 /* TODO: double check if we need to perform under safe mode */
4823 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4825 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4826 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4828 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4830 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4831 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4833 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4835 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4838 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4843 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4845 /* It is disabled by HW by default */
4846 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4847 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4848 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4850 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4851 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4853 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4854 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4855 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4857 /* only for Vega10 & Raven1 */
4858 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4861 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4863 /* MGLS is a global flag to control all MGLS in GFX */
4864 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4865 /* 2 - RLC memory Light sleep */
4866 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4867 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4868 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4870 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4872 /* 3 - CP memory Light sleep */
4873 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4874 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4875 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4877 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4881 /* 1 - MGCG_OVERRIDE */
4882 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4884 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4885 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4887 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4888 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4889 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4890 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4893 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4895 /* 2 - disable MGLS in RLC */
4896 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4897 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4898 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4899 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4902 /* 3 - disable MGLS in CP */
4903 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4904 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4905 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4906 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4910 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4913 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4918 if (!adev->gfx.num_gfx_rings)
4921 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4923 /* Enable 3D CGCG/CGLS */
4925 /* write cmd to clear cgcg/cgls ov */
4926 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4927 /* unset CGCG override */
4928 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4929 /* update CGCG and CGLS override bits */
4931 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4933 /* enable 3Dcgcg FSM(0x0000363f) */
4934 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4936 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4937 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4938 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4940 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4942 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4943 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4944 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4946 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4948 /* set IDLE_POLL_COUNT(0x00900100) */
4949 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4950 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4951 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4953 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4955 /* Disable CGCG/CGLS */
4956 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4957 /* disable cgcg, cgls should be disabled */
4958 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4959 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4960 /* disable cgcg and cgls in FSM */
4962 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4965 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4968 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4973 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4975 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4976 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4977 /* unset CGCG override */
4978 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4979 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4980 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4982 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4983 /* update CGCG and CGLS override bits */
4985 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4987 /* enable cgcg FSM(0x0000363F) */
4988 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4990 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
4991 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4992 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4994 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4995 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4996 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4997 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4998 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5000 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5002 /* set IDLE_POLL_COUNT(0x00900100) */
5003 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5004 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5005 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5007 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5009 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5010 /* reset CGCG/CGLS bits */
5011 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5012 /* disable cgcg and cgls in FSM */
5014 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5017 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5020 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5024 /* CGCG/CGLS should be enabled after MGCG/MGLS
5025 * === MGCG + MGLS ===
5027 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5028 /* === CGCG /CGLS for GFX 3D Only === */
5029 gfx_v9_0_update_3d_clock_gating(adev, enable);
5030 /* === CGCG + CGLS === */
5031 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5033 /* CGCG/CGLS should be disabled before MGCG/MGLS
5034 * === CGCG + CGLS ===
5036 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5037 /* === CGCG /CGLS for GFX 3D Only === */
5038 gfx_v9_0_update_3d_clock_gating(adev, enable);
5039 /* === MGCG + MGLS === */
5040 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5045 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5050 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5051 if (amdgpu_sriov_is_pp_one_vf(adev))
5052 data = RREG32_NO_KIQ(reg);
5054 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5056 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5057 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5059 if (amdgpu_sriov_is_pp_one_vf(adev))
5060 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5062 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5065 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5067 amdgpu_gfx_off_ctrl(adev, false);
5069 gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5071 amdgpu_gfx_off_ctrl(adev, true);
5074 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5076 struct soc15_reg_rlcg *entries, int arr_size)
5084 for (i = 0; i < arr_size; i++) {
5085 const struct soc15_reg_rlcg *entry;
5087 entry = &entries[i];
5088 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5096 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5098 return gfx_v9_0_check_rlcg_range(adev, offset,
5099 (void *)rlcg_access_gc_9_0,
5100 ARRAY_SIZE(rlcg_access_gc_9_0));
5103 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5104 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5105 .set_safe_mode = gfx_v9_0_set_safe_mode,
5106 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5107 .init = gfx_v9_0_rlc_init,
5108 .get_csb_size = gfx_v9_0_get_csb_size,
5109 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5110 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5111 .resume = gfx_v9_0_rlc_resume,
5112 .stop = gfx_v9_0_rlc_stop,
5113 .reset = gfx_v9_0_rlc_reset,
5114 .start = gfx_v9_0_rlc_start,
5115 .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5116 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5119 static int gfx_v9_0_set_powergating_state(void *handle,
5120 enum amd_powergating_state state)
5122 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5123 bool enable = (state == AMD_PG_STATE_GATE);
5125 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5126 case IP_VERSION(9, 2, 2):
5127 case IP_VERSION(9, 1, 0):
5128 case IP_VERSION(9, 3, 0):
5130 amdgpu_gfx_off_ctrl(adev, false);
5132 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5133 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5134 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5136 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5137 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5140 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5141 gfx_v9_0_enable_cp_power_gating(adev, true);
5143 gfx_v9_0_enable_cp_power_gating(adev, false);
5145 /* update gfx cgpg state */
5146 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5148 /* update mgcg state */
5149 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5152 amdgpu_gfx_off_ctrl(adev, true);
5154 case IP_VERSION(9, 2, 1):
5155 amdgpu_gfx_off_ctrl(adev, enable);
5164 static int gfx_v9_0_set_clockgating_state(void *handle,
5165 enum amd_clockgating_state state)
5167 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5169 if (amdgpu_sriov_vf(adev))
5172 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5173 case IP_VERSION(9, 0, 1):
5174 case IP_VERSION(9, 2, 1):
5175 case IP_VERSION(9, 4, 0):
5176 case IP_VERSION(9, 2, 2):
5177 case IP_VERSION(9, 1, 0):
5178 case IP_VERSION(9, 4, 1):
5179 case IP_VERSION(9, 3, 0):
5180 case IP_VERSION(9, 4, 2):
5181 gfx_v9_0_update_gfx_clock_gating(adev,
5182 state == AMD_CG_STATE_GATE);
5190 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5192 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5195 if (amdgpu_sriov_vf(adev))
5198 /* AMD_CG_SUPPORT_GFX_MGCG */
5199 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5200 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5201 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5203 /* AMD_CG_SUPPORT_GFX_CGCG */
5204 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5205 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5206 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5208 /* AMD_CG_SUPPORT_GFX_CGLS */
5209 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5210 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5212 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5213 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5214 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5215 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5217 /* AMD_CG_SUPPORT_GFX_CP_LS */
5218 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5219 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5220 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5222 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5223 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5224 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5225 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5226 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5228 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5229 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5230 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5234 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5236 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5239 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5241 struct amdgpu_device *adev = ring->adev;
5244 /* XXX check if swapping is necessary on BE */
5245 if (ring->use_doorbell) {
5246 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5248 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5249 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5255 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5257 struct amdgpu_device *adev = ring->adev;
5259 if (ring->use_doorbell) {
5260 /* XXX check if swapping is necessary on BE */
5261 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5262 WDOORBELL64(ring->doorbell_index, ring->wptr);
5264 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5265 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5269 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5271 struct amdgpu_device *adev = ring->adev;
5272 u32 ref_and_mask, reg_mem_engine;
5273 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5275 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5278 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5281 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5288 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5289 reg_mem_engine = 1; /* pfp */
5292 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5293 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5294 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5295 ref_and_mask, ref_and_mask, 0x20);
5298 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5299 struct amdgpu_job *job,
5300 struct amdgpu_ib *ib,
5303 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5304 u32 header, control = 0;
5306 if (ib->flags & AMDGPU_IB_FLAG_CE)
5307 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5309 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5311 control |= ib->length_dw | (vmid << 24);
5313 if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5314 control |= INDIRECT_BUFFER_PRE_ENB(1);
5316 if (flags & AMDGPU_IB_PREEMPTED)
5317 control |= INDIRECT_BUFFER_PRE_RESUME(1);
5319 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5320 gfx_v9_0_ring_emit_de_meta(ring,
5321 (!amdgpu_sriov_vf(ring->adev) &&
5322 flags & AMDGPU_IB_PREEMPTED) ?
5324 job->gds_size > 0 && job->gds_base != 0);
5327 amdgpu_ring_write(ring, header);
5328 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5329 amdgpu_ring_write(ring,
5333 lower_32_bits(ib->gpu_addr));
5334 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5335 amdgpu_ring_ib_on_emit_cntl(ring);
5336 amdgpu_ring_write(ring, control);
5339 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5342 u32 control = ring->ring[offset];
5344 control |= INDIRECT_BUFFER_PRE_RESUME(1);
5345 ring->ring[offset] = control;
5348 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5351 struct amdgpu_device *adev = ring->adev;
5352 void *ce_payload_cpu_addr;
5353 uint64_t payload_offset, payload_size;
5355 payload_size = sizeof(struct v9_ce_ib_state);
5357 if (ring->is_mes_queue) {
5358 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5359 gfx[0].gfx_meta_data) +
5360 offsetof(struct v9_gfx_meta_data, ce_payload);
5361 ce_payload_cpu_addr =
5362 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5364 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5365 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5368 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5369 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5371 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5372 (ring->buf_mask + 1 - offset) << 2);
5373 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5374 memcpy((void *)&ring->ring[0],
5375 ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5380 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5383 struct amdgpu_device *adev = ring->adev;
5384 void *de_payload_cpu_addr;
5385 uint64_t payload_offset, payload_size;
5387 payload_size = sizeof(struct v9_de_ib_state);
5389 if (ring->is_mes_queue) {
5390 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5391 gfx[0].gfx_meta_data) +
5392 offsetof(struct v9_gfx_meta_data, de_payload);
5393 de_payload_cpu_addr =
5394 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5396 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5397 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5400 ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5401 IB_COMPLETION_STATUS_PREEMPTED;
5403 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5404 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5406 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5407 (ring->buf_mask + 1 - offset) << 2);
5408 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5409 memcpy((void *)&ring->ring[0],
5410 de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5415 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5416 struct amdgpu_job *job,
5417 struct amdgpu_ib *ib,
5420 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5421 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5423 /* Currently, there is a high possibility to get wave ID mismatch
5424 * between ME and GDS, leading to a hw deadlock, because ME generates
5425 * different wave IDs than the GDS expects. This situation happens
5426 * randomly when at least 5 compute pipes use GDS ordered append.
5427 * The wave IDs generated by ME are also wrong after suspend/resume.
5428 * Those are probably bugs somewhere else in the kernel driver.
5430 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5431 * GDS to 0 for this ring (me/pipe).
5433 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5434 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5435 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5436 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5439 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5440 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5441 amdgpu_ring_write(ring,
5445 lower_32_bits(ib->gpu_addr));
5446 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5447 amdgpu_ring_write(ring, control);
5450 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5451 u64 seq, unsigned flags)
5453 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5454 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5455 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5456 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5459 /* RELEASE_MEM - flush caches, send int */
5460 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5463 dw2 = EOP_TC_NC_ACTION_EN;
5465 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5466 EOP_TC_MD_ACTION_EN;
5468 dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5473 amdgpu_ring_write(ring, dw2);
5474 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5477 * the address should be Qword aligned if 64bit write, Dword
5478 * aligned if only send 32bit data low (discard data high)
5484 amdgpu_ring_write(ring, lower_32_bits(addr));
5485 amdgpu_ring_write(ring, upper_32_bits(addr));
5486 amdgpu_ring_write(ring, lower_32_bits(seq));
5487 amdgpu_ring_write(ring, upper_32_bits(seq));
5488 amdgpu_ring_write(ring, 0);
5491 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5493 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5494 uint32_t seq = ring->fence_drv.sync_seq;
5495 uint64_t addr = ring->fence_drv.gpu_addr;
5497 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5498 lower_32_bits(addr), upper_32_bits(addr),
5499 seq, 0xffffffff, 4);
5502 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5503 unsigned vmid, uint64_t pd_addr)
5505 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5507 /* compute doesn't have PFP */
5508 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5509 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5510 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5511 amdgpu_ring_write(ring, 0x0);
5515 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5517 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5520 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5524 /* XXX check if swapping is necessary on BE */
5525 if (ring->use_doorbell)
5526 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5532 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5534 struct amdgpu_device *adev = ring->adev;
5536 /* XXX check if swapping is necessary on BE */
5537 if (ring->use_doorbell) {
5538 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5539 WDOORBELL64(ring->doorbell_index, ring->wptr);
5541 BUG(); /* only DOORBELL method supported on gfx9 now */
5545 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5546 u64 seq, unsigned int flags)
5548 struct amdgpu_device *adev = ring->adev;
5550 /* we only allocate 32bit for each seq wb address */
5551 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5553 /* write fence seq to the "addr" */
5554 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5555 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5556 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5557 amdgpu_ring_write(ring, lower_32_bits(addr));
5558 amdgpu_ring_write(ring, upper_32_bits(addr));
5559 amdgpu_ring_write(ring, lower_32_bits(seq));
5561 if (flags & AMDGPU_FENCE_FLAG_INT) {
5562 /* set register to trigger INT */
5563 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5564 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5565 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5566 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5567 amdgpu_ring_write(ring, 0);
5568 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5572 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5574 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5575 amdgpu_ring_write(ring, 0);
5578 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5580 struct amdgpu_device *adev = ring->adev;
5581 struct v9_ce_ib_state ce_payload = {0};
5582 uint64_t offset, ce_payload_gpu_addr;
5583 void *ce_payload_cpu_addr;
5586 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5588 if (ring->is_mes_queue) {
5589 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5590 gfx[0].gfx_meta_data) +
5591 offsetof(struct v9_gfx_meta_data, ce_payload);
5592 ce_payload_gpu_addr =
5593 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5594 ce_payload_cpu_addr =
5595 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5597 offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5598 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5599 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5602 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5603 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5604 WRITE_DATA_DST_SEL(8) |
5606 WRITE_DATA_CACHE_POLICY(0));
5607 amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5608 amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5610 amdgpu_ring_ib_on_emit_ce(ring);
5613 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5614 sizeof(ce_payload) >> 2);
5616 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5617 sizeof(ce_payload) >> 2);
5620 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5623 struct amdgpu_device *adev = ring->adev;
5624 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5625 struct amdgpu_ring *kiq_ring = &kiq->ring;
5626 unsigned long flags;
5628 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5631 spin_lock_irqsave(&kiq->ring_lock, flags);
5633 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5634 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5638 /* assert preemption condition */
5639 amdgpu_ring_set_preempt_cond_exec(ring, false);
5641 ring->trail_seq += 1;
5642 amdgpu_ring_alloc(ring, 13);
5643 gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5644 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5646 /* assert IB preemption, emit the trailing fence */
5647 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5648 ring->trail_fence_gpu_addr,
5651 amdgpu_ring_commit(kiq_ring);
5652 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5654 /* poll the trailing fence */
5655 for (i = 0; i < adev->usec_timeout; i++) {
5656 if (ring->trail_seq ==
5657 le32_to_cpu(*ring->trail_fence_cpu_addr))
5662 if (i >= adev->usec_timeout) {
5664 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5667 /*reset the CP_VMID_PREEMPT after trailing fence*/
5668 amdgpu_ring_emit_wreg(ring,
5669 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5671 amdgpu_ring_commit(ring);
5673 /* deassert preemption condition */
5674 amdgpu_ring_set_preempt_cond_exec(ring, true);
5678 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5680 struct amdgpu_device *adev = ring->adev;
5681 struct v9_de_ib_state de_payload = {0};
5682 uint64_t offset, gds_addr, de_payload_gpu_addr;
5683 void *de_payload_cpu_addr;
5686 if (ring->is_mes_queue) {
5687 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5688 gfx[0].gfx_meta_data) +
5689 offsetof(struct v9_gfx_meta_data, de_payload);
5690 de_payload_gpu_addr =
5691 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5692 de_payload_cpu_addr =
5693 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5695 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5696 gfx[0].gds_backup) +
5697 offsetof(struct v9_gfx_meta_data, de_payload);
5698 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5700 offset = offsetof(struct v9_gfx_meta_data, de_payload);
5701 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5702 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5704 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5705 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5710 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5711 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5714 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5715 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5716 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5717 WRITE_DATA_DST_SEL(8) |
5719 WRITE_DATA_CACHE_POLICY(0));
5720 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5721 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5723 amdgpu_ring_ib_on_emit_de(ring);
5725 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5726 sizeof(de_payload) >> 2);
5728 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5729 sizeof(de_payload) >> 2);
5732 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5735 uint32_t v = secure ? FRAME_TMZ : 0;
5737 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5738 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5741 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5745 gfx_v9_0_ring_emit_ce_meta(ring,
5746 (!amdgpu_sriov_vf(ring->adev) &&
5747 flags & AMDGPU_IB_PREEMPTED) ? true : false);
5749 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5750 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5751 /* set load_global_config & load_global_uconfig */
5753 /* set load_cs_sh_regs */
5755 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5758 /* set load_ce_ram if preamble presented */
5759 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5762 /* still load_ce_ram if this is the first time preamble presented
5763 * although there is no context switch happens.
5765 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5769 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5770 amdgpu_ring_write(ring, dw2);
5771 amdgpu_ring_write(ring, 0);
5774 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5778 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5779 amdgpu_ring_write(ring, lower_32_bits(addr));
5780 amdgpu_ring_write(ring, upper_32_bits(addr));
5781 /* discard following DWs if *cond_exec_gpu_addr==0 */
5782 amdgpu_ring_write(ring, 0);
5783 ret = ring->wptr & ring->buf_mask;
5784 /* patch dummy value later */
5785 amdgpu_ring_write(ring, 0);
5789 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5790 uint32_t reg_val_offs)
5792 struct amdgpu_device *adev = ring->adev;
5794 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5795 amdgpu_ring_write(ring, 0 | /* src: register*/
5796 (5 << 8) | /* dst: memory */
5797 (1 << 20)); /* write confirm */
5798 amdgpu_ring_write(ring, reg);
5799 amdgpu_ring_write(ring, 0);
5800 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5802 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5806 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5811 switch (ring->funcs->type) {
5812 case AMDGPU_RING_TYPE_GFX:
5813 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5815 case AMDGPU_RING_TYPE_KIQ:
5816 cmd = (1 << 16); /* no inc addr */
5822 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5823 amdgpu_ring_write(ring, cmd);
5824 amdgpu_ring_write(ring, reg);
5825 amdgpu_ring_write(ring, 0);
5826 amdgpu_ring_write(ring, val);
5829 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5830 uint32_t val, uint32_t mask)
5832 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5835 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5836 uint32_t reg0, uint32_t reg1,
5837 uint32_t ref, uint32_t mask)
5839 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5840 struct amdgpu_device *adev = ring->adev;
5841 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5842 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5845 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5848 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5852 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5854 struct amdgpu_device *adev = ring->adev;
5857 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5858 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5859 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5860 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5861 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5864 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5865 enum amdgpu_interrupt_state state)
5868 case AMDGPU_IRQ_STATE_DISABLE:
5869 case AMDGPU_IRQ_STATE_ENABLE:
5870 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5871 TIME_STAMP_INT_ENABLE,
5872 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5879 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5881 enum amdgpu_interrupt_state state)
5883 u32 mec_int_cntl, mec_int_cntl_reg;
5886 * amdgpu controls only the first MEC. That's why this function only
5887 * handles the setting of interrupts for this specific MEC. All other
5888 * pipes' interrupts are set by amdkfd.
5894 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5897 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5900 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5903 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5906 DRM_DEBUG("invalid pipe %d\n", pipe);
5910 DRM_DEBUG("invalid me %d\n", me);
5915 case AMDGPU_IRQ_STATE_DISABLE:
5916 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5917 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5918 TIME_STAMP_INT_ENABLE, 0);
5919 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5921 case AMDGPU_IRQ_STATE_ENABLE:
5922 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5923 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5924 TIME_STAMP_INT_ENABLE, 1);
5925 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5932 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5933 struct amdgpu_irq_src *source,
5935 enum amdgpu_interrupt_state state)
5938 case AMDGPU_IRQ_STATE_DISABLE:
5939 case AMDGPU_IRQ_STATE_ENABLE:
5940 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5941 PRIV_REG_INT_ENABLE,
5942 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5951 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5952 struct amdgpu_irq_src *source,
5954 enum amdgpu_interrupt_state state)
5957 case AMDGPU_IRQ_STATE_DISABLE:
5958 case AMDGPU_IRQ_STATE_ENABLE:
5959 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5960 PRIV_INSTR_INT_ENABLE,
5961 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5970 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
5971 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5972 CP_ECC_ERROR_INT_ENABLE, 1)
5974 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
5975 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5976 CP_ECC_ERROR_INT_ENABLE, 0)
5978 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5979 struct amdgpu_irq_src *source,
5981 enum amdgpu_interrupt_state state)
5984 case AMDGPU_IRQ_STATE_DISABLE:
5985 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5986 CP_ECC_ERROR_INT_ENABLE, 0);
5987 DISABLE_ECC_ON_ME_PIPE(1, 0);
5988 DISABLE_ECC_ON_ME_PIPE(1, 1);
5989 DISABLE_ECC_ON_ME_PIPE(1, 2);
5990 DISABLE_ECC_ON_ME_PIPE(1, 3);
5993 case AMDGPU_IRQ_STATE_ENABLE:
5994 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5995 CP_ECC_ERROR_INT_ENABLE, 1);
5996 ENABLE_ECC_ON_ME_PIPE(1, 0);
5997 ENABLE_ECC_ON_ME_PIPE(1, 1);
5998 ENABLE_ECC_ON_ME_PIPE(1, 2);
5999 ENABLE_ECC_ON_ME_PIPE(1, 3);
6009 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6010 struct amdgpu_irq_src *src,
6012 enum amdgpu_interrupt_state state)
6015 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6016 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6018 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6019 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6021 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6022 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6024 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6025 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6027 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6028 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6030 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6031 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6033 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6034 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6036 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6037 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6039 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6040 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6048 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6049 struct amdgpu_irq_src *source,
6050 struct amdgpu_iv_entry *entry)
6053 u8 me_id, pipe_id, queue_id;
6054 struct amdgpu_ring *ring;
6056 DRM_DEBUG("IH: CP EOP\n");
6057 me_id = (entry->ring_id & 0x0c) >> 2;
6058 pipe_id = (entry->ring_id & 0x03) >> 0;
6059 queue_id = (entry->ring_id & 0x70) >> 4;
6063 if (adev->gfx.num_gfx_rings) {
6064 if (!adev->gfx.mcbp) {
6065 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6066 } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6067 /* Fence signals are handled on the software rings*/
6068 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6069 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6075 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6076 ring = &adev->gfx.compute_ring[i];
6077 /* Per-queue interrupt is supported for MEC starting from VI.
6078 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6080 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6081 amdgpu_fence_process(ring);
6088 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6089 struct amdgpu_iv_entry *entry)
6091 u8 me_id, pipe_id, queue_id;
6092 struct amdgpu_ring *ring;
6095 me_id = (entry->ring_id & 0x0c) >> 2;
6096 pipe_id = (entry->ring_id & 0x03) >> 0;
6097 queue_id = (entry->ring_id & 0x70) >> 4;
6101 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6105 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6106 ring = &adev->gfx.compute_ring[i];
6107 if (ring->me == me_id && ring->pipe == pipe_id &&
6108 ring->queue == queue_id)
6109 drm_sched_fault(&ring->sched);
6115 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6116 struct amdgpu_irq_src *source,
6117 struct amdgpu_iv_entry *entry)
6119 DRM_ERROR("Illegal register access in command stream\n");
6120 gfx_v9_0_fault(adev, entry);
6124 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6125 struct amdgpu_irq_src *source,
6126 struct amdgpu_iv_entry *entry)
6128 DRM_ERROR("Illegal instruction in command stream\n");
6129 gfx_v9_0_fault(adev, entry);
6134 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6135 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6136 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6137 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6139 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6140 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6141 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6143 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6144 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6147 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6148 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6151 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6152 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6153 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6155 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6156 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6159 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6160 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6161 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6163 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6164 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6165 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6167 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6168 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6171 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6172 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6175 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6176 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6179 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6180 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6181 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6183 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6184 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6187 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6188 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6189 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6191 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6192 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6193 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6194 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6196 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6197 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6198 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6201 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6202 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6203 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6204 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6206 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6207 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6208 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6209 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6211 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6212 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6213 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6214 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6216 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6217 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6218 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6219 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6221 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6222 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6225 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6226 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6227 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6229 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6230 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6233 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6234 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6237 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6238 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6241 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6242 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6245 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6246 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6249 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6250 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6253 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6254 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6255 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6257 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6258 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6259 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6261 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6262 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6263 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6265 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6266 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6267 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6269 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6270 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6271 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6273 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6274 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6277 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6278 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6281 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6282 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6285 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6286 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6289 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6290 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6293 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6294 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6297 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6298 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6301 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6302 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6305 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6306 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6309 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6310 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6313 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6314 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6317 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6318 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6321 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6322 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6325 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6326 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6329 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6330 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6331 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6333 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6334 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6335 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6337 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6338 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6341 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6342 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6345 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6346 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6349 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6350 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6351 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6353 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6354 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6355 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6357 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6358 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6359 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6361 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6362 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6363 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6365 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6366 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6369 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6370 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6371 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6373 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6374 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6375 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6377 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6378 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6379 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6381 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6382 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6383 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6385 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6386 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6387 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6389 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6390 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6391 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6393 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6394 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6395 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6397 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6398 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6399 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6401 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6402 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6403 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6405 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6406 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6407 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6409 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6410 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6411 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6413 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6414 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6415 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6417 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6418 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6419 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6421 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6422 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6423 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6425 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6426 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6427 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6429 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6430 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6431 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6433 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6434 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6435 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6437 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6438 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6441 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6442 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6445 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6446 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6449 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6450 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6453 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6454 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6457 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6458 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6459 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6461 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6462 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6463 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6465 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6466 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6467 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6469 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6470 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6471 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6473 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6474 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6475 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6477 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6478 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6481 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6482 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6485 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6486 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6489 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6490 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6493 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6494 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6497 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6498 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6499 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6501 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6502 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6503 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6505 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6506 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6507 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6509 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6510 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6511 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6513 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6514 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6515 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6517 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6518 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6521 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6522 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6525 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6526 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6529 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6530 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6533 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6534 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6537 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6538 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6539 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6541 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6542 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6543 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6545 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6546 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6547 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6549 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6550 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6553 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6554 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6557 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6558 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6561 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6562 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6565 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6566 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6569 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6570 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6575 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6576 void *inject_if, uint32_t instance_mask)
6578 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6580 struct ta_ras_trigger_error_input block_info = { 0 };
6582 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6585 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6588 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6591 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6593 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6594 ras_gfx_subblocks[info->head.sub_block_index].name,
6599 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6601 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6602 ras_gfx_subblocks[info->head.sub_block_index].name,
6607 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6608 block_info.sub_block_index =
6609 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6610 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6611 block_info.address = info->address;
6612 block_info.value = info->value;
6614 mutex_lock(&adev->grbm_idx_mutex);
6615 ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6616 mutex_unlock(&adev->grbm_idx_mutex);
6621 static const char * const vml2_mems[] = {
6622 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6623 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6624 "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6625 "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6626 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6627 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6628 "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6629 "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6630 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6631 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6632 "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6633 "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6634 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6635 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6636 "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6637 "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6640 static const char * const vml2_walker_mems[] = {
6641 "UTC_VML2_CACHE_PDE0_MEM0",
6642 "UTC_VML2_CACHE_PDE0_MEM1",
6643 "UTC_VML2_CACHE_PDE1_MEM0",
6644 "UTC_VML2_CACHE_PDE1_MEM1",
6645 "UTC_VML2_CACHE_PDE2_MEM0",
6646 "UTC_VML2_CACHE_PDE2_MEM1",
6647 "UTC_VML2_RDIF_LOG_FIFO",
6650 static const char * const atc_l2_cache_2m_mems[] = {
6651 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6652 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6653 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6654 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6657 static const char *atc_l2_cache_4k_mems[] = {
6658 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6659 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6660 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6661 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6662 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6663 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6664 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6665 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6666 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6667 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6668 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6669 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6670 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6671 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6672 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6673 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6674 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6675 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6676 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6677 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6678 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6679 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6680 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6681 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6682 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6683 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6684 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6685 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6686 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6687 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6688 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6689 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6692 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6693 struct ras_err_data *err_data)
6696 uint32_t sec_count, ded_count;
6698 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6699 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6700 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6701 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6702 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6703 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6704 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6705 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6707 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6708 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6709 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6711 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6713 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6714 "SEC %d\n", i, vml2_mems[i], sec_count);
6715 err_data->ce_count += sec_count;
6718 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6720 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6721 "DED %d\n", i, vml2_mems[i], ded_count);
6722 err_data->ue_count += ded_count;
6726 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6727 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6728 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6730 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6733 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6734 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6735 err_data->ce_count += sec_count;
6738 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6741 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6742 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6743 err_data->ue_count += ded_count;
6747 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6748 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6749 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6751 sec_count = (data & 0x00006000L) >> 0xd;
6753 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6754 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6756 err_data->ce_count += sec_count;
6760 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6761 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6762 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6764 sec_count = (data & 0x00006000L) >> 0xd;
6766 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6767 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6769 err_data->ce_count += sec_count;
6772 ded_count = (data & 0x00018000L) >> 0xf;
6774 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6775 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6777 err_data->ue_count += ded_count;
6781 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6782 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6783 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6784 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6789 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6790 const struct soc15_reg_entry *reg,
6791 uint32_t se_id, uint32_t inst_id, uint32_t value,
6792 uint32_t *sec_count, uint32_t *ded_count)
6795 uint32_t sec_cnt, ded_cnt;
6797 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6798 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6799 gfx_v9_0_ras_fields[i].seg != reg->seg ||
6800 gfx_v9_0_ras_fields[i].inst != reg->inst)
6804 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6805 gfx_v9_0_ras_fields[i].sec_count_shift;
6807 dev_info(adev->dev, "GFX SubBlock %s, "
6808 "Instance[%d][%d], SEC %d\n",
6809 gfx_v9_0_ras_fields[i].name,
6812 *sec_count += sec_cnt;
6816 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6817 gfx_v9_0_ras_fields[i].ded_count_shift;
6819 dev_info(adev->dev, "GFX SubBlock %s, "
6820 "Instance[%d][%d], DED %d\n",
6821 gfx_v9_0_ras_fields[i].name,
6824 *ded_count += ded_cnt;
6831 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6835 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6838 /* read back registers to clear the counters */
6839 mutex_lock(&adev->grbm_idx_mutex);
6840 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6841 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6842 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6843 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
6844 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6848 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6849 mutex_unlock(&adev->grbm_idx_mutex);
6851 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6852 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6853 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6854 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6855 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6856 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6857 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6858 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6860 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6861 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6862 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6865 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6866 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6867 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6870 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6871 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6872 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6875 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6876 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6877 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6880 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6881 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6882 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6883 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6886 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6887 void *ras_error_status)
6889 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6890 uint32_t sec_count = 0, ded_count = 0;
6894 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6897 err_data->ue_count = 0;
6898 err_data->ce_count = 0;
6900 mutex_lock(&adev->grbm_idx_mutex);
6902 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6903 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6904 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6905 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
6907 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6909 gfx_v9_0_ras_error_count(adev,
6910 &gfx_v9_0_edc_counter_regs[i],
6912 &sec_count, &ded_count);
6917 err_data->ce_count += sec_count;
6918 err_data->ue_count += ded_count;
6920 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6921 mutex_unlock(&adev->grbm_idx_mutex);
6923 gfx_v9_0_query_utc_edc_status(adev, err_data);
6926 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6928 const unsigned int cp_coher_cntl =
6929 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6930 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6931 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6932 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6933 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6935 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6936 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6937 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6938 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6939 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
6940 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6941 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
6942 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6945 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6946 uint32_t pipe, bool enable)
6948 struct amdgpu_device *adev = ring->adev;
6950 uint32_t wcl_cs_reg;
6952 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6953 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6957 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6960 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6963 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6966 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6969 DRM_DEBUG("invalid pipe %d\n", pipe);
6973 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6976 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6978 struct amdgpu_device *adev = ring->adev;
6983 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6984 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6985 * around 25% of gpu resources.
6987 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6988 amdgpu_ring_emit_wreg(ring,
6989 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6992 /* Restrict waves for normal/low priority compute queues as well
6993 * to get best QoS for high priority compute jobs.
6995 * amdgpu controls only 1st ME(0-3 CS pipes).
6997 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6998 if (i != ring->pipe)
6999 gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7004 static void gfx_v9_ip_print(void *handle, struct drm_printer *p)
7006 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7007 uint32_t i, j, k, reg, index = 0;
7008 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7010 if (!adev->gfx.ip_dump_core)
7013 for (i = 0; i < reg_count; i++)
7014 drm_printf(p, "%-50s \t 0x%08x\n",
7015 gc_reg_list_9[i].reg_name,
7016 adev->gfx.ip_dump_core[i]);
7018 /* print compute queue registers for all instances */
7019 if (!adev->gfx.ip_dump_compute_queues)
7022 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7023 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7024 adev->gfx.mec.num_mec,
7025 adev->gfx.mec.num_pipe_per_mec,
7026 adev->gfx.mec.num_queue_per_pipe);
7028 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7029 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7030 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7031 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7032 for (reg = 0; reg < reg_count; reg++) {
7033 drm_printf(p, "%-50s \t 0x%08x\n",
7034 gc_cp_reg_list_9[reg].reg_name,
7035 adev->gfx.ip_dump_compute_queues[index + reg]);
7044 static void gfx_v9_ip_dump(void *handle)
7046 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7047 uint32_t i, j, k, reg, index = 0;
7048 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7050 if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7053 amdgpu_gfx_off_ctrl(adev, false);
7054 for (i = 0; i < reg_count; i++)
7055 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7056 amdgpu_gfx_off_ctrl(adev, true);
7058 /* dump compute queue registers for all instances */
7059 if (!adev->gfx.ip_dump_compute_queues)
7062 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7063 amdgpu_gfx_off_ctrl(adev, false);
7064 mutex_lock(&adev->srbm_mutex);
7065 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7066 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7067 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7068 /* ME0 is for GFX so start from 1 for CP */
7069 soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7071 for (reg = 0; reg < reg_count; reg++) {
7072 adev->gfx.ip_dump_compute_queues[index + reg] =
7073 RREG32(SOC15_REG_ENTRY_OFFSET(
7074 gc_cp_reg_list_9[reg]));
7080 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7081 mutex_unlock(&adev->srbm_mutex);
7082 amdgpu_gfx_off_ctrl(adev, true);
7086 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7088 .early_init = gfx_v9_0_early_init,
7089 .late_init = gfx_v9_0_late_init,
7090 .sw_init = gfx_v9_0_sw_init,
7091 .sw_fini = gfx_v9_0_sw_fini,
7092 .hw_init = gfx_v9_0_hw_init,
7093 .hw_fini = gfx_v9_0_hw_fini,
7094 .suspend = gfx_v9_0_suspend,
7095 .resume = gfx_v9_0_resume,
7096 .is_idle = gfx_v9_0_is_idle,
7097 .wait_for_idle = gfx_v9_0_wait_for_idle,
7098 .soft_reset = gfx_v9_0_soft_reset,
7099 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
7100 .set_powergating_state = gfx_v9_0_set_powergating_state,
7101 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
7102 .dump_ip_state = gfx_v9_ip_dump,
7103 .print_ip_state = gfx_v9_ip_print,
7106 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7107 .type = AMDGPU_RING_TYPE_GFX,
7109 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7110 .support_64bit_ptrs = true,
7111 .secure_submission_supported = true,
7112 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7113 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7114 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7115 .emit_frame_size = /* totally 242 maximum if 16 IBs */
7117 7 + /* PIPELINE_SYNC */
7118 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7119 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7121 8 + /* FENCE for VM_FLUSH */
7122 20 + /* GDS switch */
7123 4 + /* double SWITCH_BUFFER,
7124 the first COND_EXEC jump to the place just
7125 prior to this double SWITCH_BUFFER */
7133 8 + 8 + /* FENCE x2 */
7134 2 + /* SWITCH_BUFFER */
7135 7, /* gfx_v9_0_emit_mem_sync */
7136 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7137 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7138 .emit_fence = gfx_v9_0_ring_emit_fence,
7139 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7140 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7141 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7142 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7143 .test_ring = gfx_v9_0_ring_test_ring,
7144 .insert_nop = amdgpu_ring_insert_nop,
7145 .pad_ib = amdgpu_ring_generic_pad_ib,
7146 .emit_switch_buffer = gfx_v9_ring_emit_sb,
7147 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7148 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7149 .preempt_ib = gfx_v9_0_ring_preempt_ib,
7150 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7151 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7152 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7153 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7154 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7155 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7158 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7159 .type = AMDGPU_RING_TYPE_GFX,
7161 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7162 .support_64bit_ptrs = true,
7163 .secure_submission_supported = true,
7164 .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7165 .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7166 .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7167 .emit_frame_size = /* totally 242 maximum if 16 IBs */
7169 7 + /* PIPELINE_SYNC */
7170 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7171 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7173 8 + /* FENCE for VM_FLUSH */
7174 20 + /* GDS switch */
7175 4 + /* double SWITCH_BUFFER,
7176 * the first COND_EXEC jump to the place just
7177 * prior to this double SWITCH_BUFFER
7186 8 + 8 + /* FENCE x2 */
7187 2 + /* SWITCH_BUFFER */
7188 7, /* gfx_v9_0_emit_mem_sync */
7189 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7190 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7191 .emit_fence = gfx_v9_0_ring_emit_fence,
7192 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7193 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7194 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7195 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7196 .test_ring = gfx_v9_0_ring_test_ring,
7197 .test_ib = gfx_v9_0_ring_test_ib,
7198 .insert_nop = amdgpu_sw_ring_insert_nop,
7199 .pad_ib = amdgpu_ring_generic_pad_ib,
7200 .emit_switch_buffer = gfx_v9_ring_emit_sb,
7201 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7202 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7203 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7204 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7205 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7206 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7207 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7208 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7209 .patch_cntl = gfx_v9_0_ring_patch_cntl,
7210 .patch_de = gfx_v9_0_ring_patch_de_meta,
7211 .patch_ce = gfx_v9_0_ring_patch_ce_meta,
7214 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7215 .type = AMDGPU_RING_TYPE_COMPUTE,
7217 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7218 .support_64bit_ptrs = true,
7219 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7220 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7221 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7223 20 + /* gfx_v9_0_ring_emit_gds_switch */
7224 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7225 5 + /* hdp invalidate */
7226 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7227 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7228 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7229 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7230 7 + /* gfx_v9_0_emit_mem_sync */
7231 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7232 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7233 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7234 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
7235 .emit_fence = gfx_v9_0_ring_emit_fence,
7236 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7237 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7238 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7239 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7240 .test_ring = gfx_v9_0_ring_test_ring,
7241 .test_ib = gfx_v9_0_ring_test_ib,
7242 .insert_nop = amdgpu_ring_insert_nop,
7243 .pad_ib = amdgpu_ring_generic_pad_ib,
7244 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7245 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7246 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7247 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7248 .emit_wave_limit = gfx_v9_0_emit_wave_limit,
7251 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7252 .type = AMDGPU_RING_TYPE_KIQ,
7254 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7255 .support_64bit_ptrs = true,
7256 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7257 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7258 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7260 20 + /* gfx_v9_0_ring_emit_gds_switch */
7261 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7262 5 + /* hdp invalidate */
7263 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7264 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7265 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7266 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7267 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7268 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7269 .test_ring = gfx_v9_0_ring_test_ring,
7270 .insert_nop = amdgpu_ring_insert_nop,
7271 .pad_ib = amdgpu_ring_generic_pad_ib,
7272 .emit_rreg = gfx_v9_0_ring_emit_rreg,
7273 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7274 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7275 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7278 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7282 adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7284 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7285 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7287 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7288 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7289 adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7292 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7293 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7296 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7297 .set = gfx_v9_0_set_eop_interrupt_state,
7298 .process = gfx_v9_0_eop_irq,
7301 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7302 .set = gfx_v9_0_set_priv_reg_fault_state,
7303 .process = gfx_v9_0_priv_reg_irq,
7306 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7307 .set = gfx_v9_0_set_priv_inst_fault_state,
7308 .process = gfx_v9_0_priv_inst_irq,
7311 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7312 .set = gfx_v9_0_set_cp_ecc_error_state,
7313 .process = amdgpu_gfx_cp_ecc_error_irq,
7317 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7319 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7320 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7322 adev->gfx.priv_reg_irq.num_types = 1;
7323 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7325 adev->gfx.priv_inst_irq.num_types = 1;
7326 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7328 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7329 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7332 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7334 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7335 case IP_VERSION(9, 0, 1):
7336 case IP_VERSION(9, 2, 1):
7337 case IP_VERSION(9, 4, 0):
7338 case IP_VERSION(9, 2, 2):
7339 case IP_VERSION(9, 1, 0):
7340 case IP_VERSION(9, 4, 1):
7341 case IP_VERSION(9, 3, 0):
7342 case IP_VERSION(9, 4, 2):
7343 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7350 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7352 /* init asci gds info */
7353 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7354 case IP_VERSION(9, 0, 1):
7355 case IP_VERSION(9, 2, 1):
7356 case IP_VERSION(9, 4, 0):
7357 adev->gds.gds_size = 0x10000;
7359 case IP_VERSION(9, 2, 2):
7360 case IP_VERSION(9, 1, 0):
7361 case IP_VERSION(9, 4, 1):
7362 adev->gds.gds_size = 0x1000;
7364 case IP_VERSION(9, 4, 2):
7365 /* aldebaran removed all the GDS internal memory,
7366 * only support GWS opcode in kernel, like barrier
7368 adev->gds.gds_size = 0;
7371 adev->gds.gds_size = 0x10000;
7375 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7376 case IP_VERSION(9, 0, 1):
7377 case IP_VERSION(9, 4, 0):
7378 adev->gds.gds_compute_max_wave_id = 0x7ff;
7380 case IP_VERSION(9, 2, 1):
7381 adev->gds.gds_compute_max_wave_id = 0x27f;
7383 case IP_VERSION(9, 2, 2):
7384 case IP_VERSION(9, 1, 0):
7385 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7386 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7388 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7390 case IP_VERSION(9, 4, 1):
7391 adev->gds.gds_compute_max_wave_id = 0xfff;
7393 case IP_VERSION(9, 4, 2):
7394 /* deprecated for Aldebaran, no usage at all */
7395 adev->gds.gds_compute_max_wave_id = 0;
7398 /* this really depends on the chip */
7399 adev->gds.gds_compute_max_wave_id = 0x7ff;
7403 adev->gds.gws_size = 64;
7404 adev->gds.oa_size = 16;
7407 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7415 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7416 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7418 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7421 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7425 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7426 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7428 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7429 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7431 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7433 return (~data) & mask;
7436 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7437 struct amdgpu_cu_info *cu_info)
7439 int i, j, k, counter, active_cu_number = 0;
7440 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7441 unsigned disable_masks[4 * 4];
7443 if (!adev || !cu_info)
7447 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7449 if (adev->gfx.config.max_shader_engines *
7450 adev->gfx.config.max_sh_per_se > 16)
7453 amdgpu_gfx_parse_disable_cu(disable_masks,
7454 adev->gfx.config.max_shader_engines,
7455 adev->gfx.config.max_sh_per_se);
7457 mutex_lock(&adev->grbm_idx_mutex);
7458 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7459 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7463 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7464 gfx_v9_0_set_user_cu_inactive_bitmap(
7465 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7466 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7469 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7470 * 4x4 size array, and it's usually suitable for Vega
7471 * ASICs which has 4*2 SE/SH layout.
7472 * But for Arcturus, SE/SH layout is changed to 8*1.
7473 * To mostly reduce the impact, we make it compatible
7474 * with current bitmap array as below:
7475 * SE4,SH0 --> bitmap[0][1]
7476 * SE5,SH0 --> bitmap[1][1]
7477 * SE6,SH0 --> bitmap[2][1]
7478 * SE7,SH0 --> bitmap[3][1]
7480 cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7482 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7483 if (bitmap & mask) {
7484 if (counter < adev->gfx.config.max_cu_per_sh)
7490 active_cu_number += counter;
7492 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7493 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7496 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7497 mutex_unlock(&adev->grbm_idx_mutex);
7499 cu_info->number = active_cu_number;
7500 cu_info->ao_cu_mask = ao_cu_mask;
7501 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7506 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7508 .type = AMD_IP_BLOCK_TYPE_GFX,
7512 .funcs = &gfx_v9_0_ip_funcs,