2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
31 #include "amdgpu_gfx.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
40 #include "vega10_enum.h"
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 #include "amdgpu_ras.h"
52 #include "gfx_v9_4_2.h"
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
58 #define GFX9_NUM_GFX_RINGS 1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
63 #define mmGCEA_PROBE_MAP 0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX 0
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
130 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
132 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
134 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
136 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
138 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
140 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
143 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025
144 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1
145 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
146 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
148 enum ta_ras_gfx_subblock {
150 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
151 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
152 TA_RAS_BLOCK__GFX_CPC_UCODE,
153 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
154 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
155 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
156 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
157 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
158 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
159 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
161 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
162 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
163 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
164 TA_RAS_BLOCK__GFX_CPF_TAG,
165 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
167 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
168 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
169 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
170 TA_RAS_BLOCK__GFX_CPG_TAG,
171 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
173 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
174 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
175 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
176 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
177 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
178 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
179 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
181 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
183 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
184 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
185 TA_RAS_BLOCK__GFX_SQ_LDS_D,
186 TA_RAS_BLOCK__GFX_SQ_LDS_I,
187 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
188 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
190 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
192 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
194 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
195 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
196 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
197 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
198 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
199 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
200 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
202 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
204 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
206 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
208 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
209 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
210 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
212 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
213 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
216 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
218 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
220 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
222 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
223 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
224 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
225 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
226 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
227 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
228 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
230 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
233 TA_RAS_BLOCK__GFX_TA_INDEX_START,
234 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
235 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
236 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
237 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
238 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
239 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
241 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
242 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
243 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
244 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
245 /* TCC (5 sub-ranges)*/
246 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
248 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
250 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
251 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
252 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
253 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
254 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
255 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
256 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
257 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
259 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
260 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
261 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
263 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
265 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
266 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
267 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
268 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
269 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
270 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
271 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
272 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
273 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
275 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
277 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
278 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
279 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
281 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
283 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
285 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
288 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
291 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
293 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
294 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
295 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
296 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
297 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
298 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
299 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
300 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
301 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
303 TA_RAS_BLOCK__GFX_TD_INDEX_START,
304 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
305 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
306 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
307 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
308 /* EA (3 sub-ranges)*/
309 TA_RAS_BLOCK__GFX_EA_INDEX_START,
311 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
312 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
313 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
314 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
315 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
316 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
317 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
318 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
319 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
320 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
322 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
323 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
324 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
325 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
326 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
327 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
328 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
329 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
330 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
332 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
333 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
334 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
335 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
336 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
337 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
338 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
340 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
342 TA_RAS_BLOCK__UTC_VML2_WALKER,
343 /* UTC ATC L2 2MB cache*/
344 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
345 /* UTC ATC L2 4KB cache*/
346 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
347 TA_RAS_BLOCK__GFX_MAX
350 struct ras_gfx_subblock {
353 int hw_supported_error_type;
354 int sw_supported_error_type;
357 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
358 [AMDGPU_RAS_BLOCK__##subblock] = { \
360 TA_RAS_BLOCK__##subblock, \
361 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
362 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
365 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
366 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
367 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
368 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
369 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
370 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
371 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
377 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
378 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
379 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
380 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
381 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
382 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
384 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
386 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
387 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
388 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
389 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
390 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
391 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
392 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
427 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
429 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
431 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
433 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
435 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
437 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
439 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
441 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
442 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
447 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
457 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
462 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
464 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
465 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
467 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
468 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
469 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
470 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
472 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
475 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
477 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
479 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
481 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
482 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
483 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
484 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
485 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
486 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
487 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
488 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
489 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
510 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
511 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
512 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
515 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
539 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
561 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
576 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
590 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
604 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
615 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
638 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
640 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
641 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
647 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
648 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
654 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
661 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
681 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
694 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
698 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
708 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
709 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
710 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
713 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
714 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
715 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
718 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
720 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
730 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
732 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
742 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
743 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
744 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
745 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
747 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
748 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
749 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
750 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
751 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
752 struct amdgpu_cu_info *cu_info);
753 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
754 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
755 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
756 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
757 void *ras_error_status);
758 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
760 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
762 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
765 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
766 amdgpu_ring_write(kiq_ring,
767 PACKET3_SET_RESOURCES_VMID_MASK(0) |
768 /* vmid_mask:0* queue_type:0 (KIQ) */
769 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
770 amdgpu_ring_write(kiq_ring,
771 lower_32_bits(queue_mask)); /* queue mask lo */
772 amdgpu_ring_write(kiq_ring,
773 upper_32_bits(queue_mask)); /* queue mask hi */
774 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
775 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
776 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
777 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
780 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
781 struct amdgpu_ring *ring)
783 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
784 uint64_t wptr_addr = ring->wptr_gpu_addr;
785 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
787 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
788 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
789 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
790 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
791 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
792 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
793 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
794 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
795 /*queue_type: normal compute queue */
796 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
797 /* alloc format: all_on_one_pipe */
798 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
799 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
800 /* num_queues: must be 1 */
801 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
802 amdgpu_ring_write(kiq_ring,
803 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
804 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
805 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
806 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
807 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
810 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
811 struct amdgpu_ring *ring,
812 enum amdgpu_unmap_queues_action action,
813 u64 gpu_addr, u64 seq)
815 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
817 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
818 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
819 PACKET3_UNMAP_QUEUES_ACTION(action) |
820 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
821 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
822 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
823 amdgpu_ring_write(kiq_ring,
824 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
826 if (action == PREEMPT_QUEUES_NO_UNMAP) {
827 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
828 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
829 amdgpu_ring_write(kiq_ring, seq);
831 amdgpu_ring_write(kiq_ring, 0);
832 amdgpu_ring_write(kiq_ring, 0);
833 amdgpu_ring_write(kiq_ring, 0);
837 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
838 struct amdgpu_ring *ring,
842 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
844 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
845 amdgpu_ring_write(kiq_ring,
846 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
847 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
848 PACKET3_QUERY_STATUS_COMMAND(2));
849 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
850 amdgpu_ring_write(kiq_ring,
851 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
852 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
853 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
854 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
855 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
856 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
859 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
860 uint16_t pasid, uint32_t flush_type,
863 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
864 amdgpu_ring_write(kiq_ring,
865 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
866 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
867 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
868 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
871 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
872 .kiq_set_resources = gfx_v9_0_kiq_set_resources,
873 .kiq_map_queues = gfx_v9_0_kiq_map_queues,
874 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
875 .kiq_query_status = gfx_v9_0_kiq_query_status,
876 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
877 .set_resources_size = 8,
878 .map_queues_size = 7,
879 .unmap_queues_size = 6,
880 .query_status_size = 7,
881 .invalidate_tlbs_size = 2,
884 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
886 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
889 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
891 switch (adev->ip_versions[GC_HWIP][0]) {
892 case IP_VERSION(9, 0, 1):
893 soc15_program_register_sequence(adev,
894 golden_settings_gc_9_0,
895 ARRAY_SIZE(golden_settings_gc_9_0));
896 soc15_program_register_sequence(adev,
897 golden_settings_gc_9_0_vg10,
898 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
900 case IP_VERSION(9, 2, 1):
901 soc15_program_register_sequence(adev,
902 golden_settings_gc_9_2_1,
903 ARRAY_SIZE(golden_settings_gc_9_2_1));
904 soc15_program_register_sequence(adev,
905 golden_settings_gc_9_2_1_vg12,
906 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
908 case IP_VERSION(9, 4, 0):
909 soc15_program_register_sequence(adev,
910 golden_settings_gc_9_0,
911 ARRAY_SIZE(golden_settings_gc_9_0));
912 soc15_program_register_sequence(adev,
913 golden_settings_gc_9_0_vg20,
914 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
916 case IP_VERSION(9, 4, 1):
917 soc15_program_register_sequence(adev,
918 golden_settings_gc_9_4_1_arct,
919 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
921 case IP_VERSION(9, 2, 2):
922 case IP_VERSION(9, 1, 0):
923 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
924 ARRAY_SIZE(golden_settings_gc_9_1));
925 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
926 soc15_program_register_sequence(adev,
927 golden_settings_gc_9_1_rv2,
928 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
930 soc15_program_register_sequence(adev,
931 golden_settings_gc_9_1_rv1,
932 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
934 case IP_VERSION(9, 3, 0):
935 soc15_program_register_sequence(adev,
936 golden_settings_gc_9_1_rn,
937 ARRAY_SIZE(golden_settings_gc_9_1_rn));
938 return; /* for renoir, don't need common goldensetting */
939 case IP_VERSION(9, 4, 2):
940 gfx_v9_4_2_init_golden_registers(adev,
941 adev->smuio.funcs->get_die_id(adev));
947 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
948 (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
949 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
950 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
953 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
954 bool wc, uint32_t reg, uint32_t val)
956 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
957 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
958 WRITE_DATA_DST_SEL(0) |
959 (wc ? WR_CONFIRM : 0));
960 amdgpu_ring_write(ring, reg);
961 amdgpu_ring_write(ring, 0);
962 amdgpu_ring_write(ring, val);
965 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
966 int mem_space, int opt, uint32_t addr0,
967 uint32_t addr1, uint32_t ref, uint32_t mask,
970 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
971 amdgpu_ring_write(ring,
972 /* memory (1) or register (0) */
973 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
974 WAIT_REG_MEM_OPERATION(opt) | /* wait */
975 WAIT_REG_MEM_FUNCTION(3) | /* equal */
976 WAIT_REG_MEM_ENGINE(eng_sel)));
979 BUG_ON(addr0 & 0x3); /* Dword align */
980 amdgpu_ring_write(ring, addr0);
981 amdgpu_ring_write(ring, addr1);
982 amdgpu_ring_write(ring, ref);
983 amdgpu_ring_write(ring, mask);
984 amdgpu_ring_write(ring, inv); /* poll interval */
987 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
989 struct amdgpu_device *adev = ring->adev;
990 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
995 WREG32(scratch, 0xCAFEDEAD);
996 r = amdgpu_ring_alloc(ring, 3);
1000 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1001 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1002 amdgpu_ring_write(ring, 0xDEADBEEF);
1003 amdgpu_ring_commit(ring);
1005 for (i = 0; i < adev->usec_timeout; i++) {
1006 tmp = RREG32(scratch);
1007 if (tmp == 0xDEADBEEF)
1012 if (i >= adev->usec_timeout)
1017 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1019 struct amdgpu_device *adev = ring->adev;
1020 struct amdgpu_ib ib;
1021 struct dma_fence *f = NULL;
1028 r = amdgpu_device_wb_get(adev, &index);
1032 gpu_addr = adev->wb.gpu_addr + (index * 4);
1033 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1034 memset(&ib, 0, sizeof(ib));
1035 r = amdgpu_ib_get(adev, NULL, 16,
1036 AMDGPU_IB_POOL_DIRECT, &ib);
1040 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1041 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1042 ib.ptr[2] = lower_32_bits(gpu_addr);
1043 ib.ptr[3] = upper_32_bits(gpu_addr);
1044 ib.ptr[4] = 0xDEADBEEF;
1047 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1051 r = dma_fence_wait_timeout(f, false, timeout);
1059 tmp = adev->wb.wb[index];
1060 if (tmp == 0xDEADBEEF)
1066 amdgpu_ib_free(adev, &ib, NULL);
1069 amdgpu_device_wb_free(adev, index);
1074 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1076 release_firmware(adev->gfx.pfp_fw);
1077 adev->gfx.pfp_fw = NULL;
1078 release_firmware(adev->gfx.me_fw);
1079 adev->gfx.me_fw = NULL;
1080 release_firmware(adev->gfx.ce_fw);
1081 adev->gfx.ce_fw = NULL;
1082 release_firmware(adev->gfx.rlc_fw);
1083 adev->gfx.rlc_fw = NULL;
1084 release_firmware(adev->gfx.mec_fw);
1085 adev->gfx.mec_fw = NULL;
1086 release_firmware(adev->gfx.mec2_fw);
1087 adev->gfx.mec2_fw = NULL;
1089 kfree(adev->gfx.rlc.register_list_format);
1092 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1094 const struct rlc_firmware_header_v2_1 *rlc_hdr;
1096 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1097 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1098 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1099 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1100 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1101 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1102 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1103 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1104 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1105 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1106 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1107 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1108 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1109 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1110 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1113 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1115 adev->gfx.me_fw_write_wait = false;
1116 adev->gfx.mec_fw_write_wait = false;
1118 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1119 ((adev->gfx.mec_fw_version < 0x000001a5) ||
1120 (adev->gfx.mec_feature_version < 46) ||
1121 (adev->gfx.pfp_fw_version < 0x000000b7) ||
1122 (adev->gfx.pfp_feature_version < 46)))
1123 DRM_WARN_ONCE("CP firmware version too old, please update!");
1125 switch (adev->ip_versions[GC_HWIP][0]) {
1126 case IP_VERSION(9, 0, 1):
1127 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1128 (adev->gfx.me_feature_version >= 42) &&
1129 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1130 (adev->gfx.pfp_feature_version >= 42))
1131 adev->gfx.me_fw_write_wait = true;
1133 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
1134 (adev->gfx.mec_feature_version >= 42))
1135 adev->gfx.mec_fw_write_wait = true;
1137 case IP_VERSION(9, 2, 1):
1138 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1139 (adev->gfx.me_feature_version >= 44) &&
1140 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1141 (adev->gfx.pfp_feature_version >= 44))
1142 adev->gfx.me_fw_write_wait = true;
1144 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
1145 (adev->gfx.mec_feature_version >= 44))
1146 adev->gfx.mec_fw_write_wait = true;
1148 case IP_VERSION(9, 4, 0):
1149 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1150 (adev->gfx.me_feature_version >= 44) &&
1151 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1152 (adev->gfx.pfp_feature_version >= 44))
1153 adev->gfx.me_fw_write_wait = true;
1155 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
1156 (adev->gfx.mec_feature_version >= 44))
1157 adev->gfx.mec_fw_write_wait = true;
1159 case IP_VERSION(9, 1, 0):
1160 case IP_VERSION(9, 2, 2):
1161 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1162 (adev->gfx.me_feature_version >= 42) &&
1163 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1164 (adev->gfx.pfp_feature_version >= 42))
1165 adev->gfx.me_fw_write_wait = true;
1167 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
1168 (adev->gfx.mec_feature_version >= 42))
1169 adev->gfx.mec_fw_write_wait = true;
1172 adev->gfx.me_fw_write_wait = true;
1173 adev->gfx.mec_fw_write_wait = true;
1178 struct amdgpu_gfxoff_quirk {
1186 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1187 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1188 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1189 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1190 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1191 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1192 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1193 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1194 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1198 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1200 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1202 while (p && p->chip_device != 0) {
1203 if (pdev->vendor == p->chip_vendor &&
1204 pdev->device == p->chip_device &&
1205 pdev->subsystem_vendor == p->subsys_vendor &&
1206 pdev->subsystem_device == p->subsys_device &&
1207 pdev->revision == p->revision) {
1215 static bool is_raven_kicker(struct amdgpu_device *adev)
1217 if (adev->pm.fw_version >= 0x41e2b)
1223 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1225 if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1226 (adev->gfx.me_fw_version >= 0x000000a5) &&
1227 (adev->gfx.me_feature_version >= 52))
1233 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1235 if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1236 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1238 switch (adev->ip_versions[GC_HWIP][0]) {
1239 case IP_VERSION(9, 0, 1):
1240 case IP_VERSION(9, 2, 1):
1241 case IP_VERSION(9, 4, 0):
1243 case IP_VERSION(9, 2, 2):
1244 case IP_VERSION(9, 1, 0):
1245 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1246 (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1247 ((!is_raven_kicker(adev) &&
1248 adev->gfx.rlc_fw_version < 531) ||
1249 (adev->gfx.rlc_feature_version < 1) ||
1250 !adev->gfx.rlc.is_rlc_v2_1))
1251 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1253 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1254 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1256 AMD_PG_SUPPORT_RLC_SMU_HS;
1258 case IP_VERSION(9, 3, 0):
1259 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1260 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1262 AMD_PG_SUPPORT_RLC_SMU_HS;
1269 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1270 const char *chip_name)
1274 struct amdgpu_firmware_info *info = NULL;
1275 const struct common_firmware_header *header = NULL;
1276 const struct gfx_firmware_header_v1_0 *cp_hdr;
1278 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1279 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1282 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1285 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1286 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1287 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1289 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1290 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1293 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1296 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1297 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1298 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1300 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1301 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1304 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1307 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1308 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1309 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1311 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1312 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1313 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1314 info->fw = adev->gfx.pfp_fw;
1315 header = (const struct common_firmware_header *)info->fw->data;
1316 adev->firmware.fw_size +=
1317 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1319 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1320 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1321 info->fw = adev->gfx.me_fw;
1322 header = (const struct common_firmware_header *)info->fw->data;
1323 adev->firmware.fw_size +=
1324 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1326 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1327 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1328 info->fw = adev->gfx.ce_fw;
1329 header = (const struct common_firmware_header *)info->fw->data;
1330 adev->firmware.fw_size +=
1331 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1337 "gfx9: Failed to load firmware \"%s\"\n",
1339 release_firmware(adev->gfx.pfp_fw);
1340 adev->gfx.pfp_fw = NULL;
1341 release_firmware(adev->gfx.me_fw);
1342 adev->gfx.me_fw = NULL;
1343 release_firmware(adev->gfx.ce_fw);
1344 adev->gfx.ce_fw = NULL;
1349 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1350 const char *chip_name)
1354 struct amdgpu_firmware_info *info = NULL;
1355 const struct common_firmware_header *header = NULL;
1356 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1357 unsigned int *tmp = NULL;
1359 uint16_t version_major;
1360 uint16_t version_minor;
1361 uint32_t smu_version;
1364 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1365 * instead of picasso_rlc.bin.
1367 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1368 * or revision >= 0xD8 && revision <= 0xDF
1369 * otherwise is PCO FP5
1371 if (!strcmp(chip_name, "picasso") &&
1372 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1373 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1374 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1375 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1376 (smu_version >= 0x41e2b))
1378 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1380 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1382 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1383 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1386 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1387 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1389 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1390 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1391 if (version_major == 2 && version_minor == 1)
1392 adev->gfx.rlc.is_rlc_v2_1 = true;
1394 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1395 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1396 adev->gfx.rlc.save_and_restore_offset =
1397 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1398 adev->gfx.rlc.clear_state_descriptor_offset =
1399 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1400 adev->gfx.rlc.avail_scratch_ram_locations =
1401 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1402 adev->gfx.rlc.reg_restore_list_size =
1403 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1404 adev->gfx.rlc.reg_list_format_start =
1405 le32_to_cpu(rlc_hdr->reg_list_format_start);
1406 adev->gfx.rlc.reg_list_format_separate_start =
1407 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1408 adev->gfx.rlc.starting_offsets_start =
1409 le32_to_cpu(rlc_hdr->starting_offsets_start);
1410 adev->gfx.rlc.reg_list_format_size_bytes =
1411 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1412 adev->gfx.rlc.reg_list_size_bytes =
1413 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1414 adev->gfx.rlc.register_list_format =
1415 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1416 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1417 if (!adev->gfx.rlc.register_list_format) {
1422 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1423 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1424 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1425 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1427 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1429 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1430 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1431 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1432 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1434 if (adev->gfx.rlc.is_rlc_v2_1)
1435 gfx_v9_0_init_rlc_ext_microcode(adev);
1437 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1438 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1439 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1440 info->fw = adev->gfx.rlc_fw;
1441 header = (const struct common_firmware_header *)info->fw->data;
1442 adev->firmware.fw_size +=
1443 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1445 if (adev->gfx.rlc.is_rlc_v2_1 &&
1446 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1447 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1448 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1449 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1450 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1451 info->fw = adev->gfx.rlc_fw;
1452 adev->firmware.fw_size +=
1453 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1455 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1456 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1457 info->fw = adev->gfx.rlc_fw;
1458 adev->firmware.fw_size +=
1459 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1461 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1462 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1463 info->fw = adev->gfx.rlc_fw;
1464 adev->firmware.fw_size +=
1465 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1472 "gfx9: Failed to load firmware \"%s\"\n",
1474 release_firmware(adev->gfx.rlc_fw);
1475 adev->gfx.rlc_fw = NULL;
1480 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1482 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1483 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1484 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1490 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1491 const char *chip_name)
1495 struct amdgpu_firmware_info *info = NULL;
1496 const struct common_firmware_header *header = NULL;
1497 const struct gfx_firmware_header_v1_0 *cp_hdr;
1499 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1500 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1503 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1506 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1507 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1508 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1511 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1512 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1513 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1515 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1518 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1519 adev->gfx.mec2_fw->data;
1520 adev->gfx.mec2_fw_version =
1521 le32_to_cpu(cp_hdr->header.ucode_version);
1522 adev->gfx.mec2_feature_version =
1523 le32_to_cpu(cp_hdr->ucode_feature_version);
1526 adev->gfx.mec2_fw = NULL;
1529 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1530 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1533 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1534 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1535 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1536 info->fw = adev->gfx.mec_fw;
1537 header = (const struct common_firmware_header *)info->fw->data;
1538 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1539 adev->firmware.fw_size +=
1540 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1542 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1543 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1544 info->fw = adev->gfx.mec_fw;
1545 adev->firmware.fw_size +=
1546 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1548 if (adev->gfx.mec2_fw) {
1549 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1550 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1551 info->fw = adev->gfx.mec2_fw;
1552 header = (const struct common_firmware_header *)info->fw->data;
1553 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1554 adev->firmware.fw_size +=
1555 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1557 /* TODO: Determine if MEC2 JT FW loading can be removed
1558 for all GFX V9 asic and above */
1559 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1560 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1561 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1562 info->fw = adev->gfx.mec2_fw;
1563 adev->firmware.fw_size +=
1564 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1571 gfx_v9_0_check_if_need_gfxoff(adev);
1572 gfx_v9_0_check_fw_write_wait(adev);
1575 "gfx9: Failed to load firmware \"%s\"\n",
1577 release_firmware(adev->gfx.mec_fw);
1578 adev->gfx.mec_fw = NULL;
1579 release_firmware(adev->gfx.mec2_fw);
1580 adev->gfx.mec2_fw = NULL;
1585 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1587 const char *chip_name;
1592 switch (adev->ip_versions[GC_HWIP][0]) {
1593 case IP_VERSION(9, 0, 1):
1594 chip_name = "vega10";
1596 case IP_VERSION(9, 2, 1):
1597 chip_name = "vega12";
1599 case IP_VERSION(9, 4, 0):
1600 chip_name = "vega20";
1602 case IP_VERSION(9, 2, 2):
1603 case IP_VERSION(9, 1, 0):
1604 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1605 chip_name = "raven2";
1606 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1607 chip_name = "picasso";
1609 chip_name = "raven";
1611 case IP_VERSION(9, 4, 1):
1612 chip_name = "arcturus";
1614 case IP_VERSION(9, 3, 0):
1615 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1616 chip_name = "renoir";
1618 chip_name = "green_sardine";
1620 case IP_VERSION(9, 4, 2):
1621 chip_name = "aldebaran";
1627 /* No CPG in Arcturus */
1628 if (adev->gfx.num_gfx_rings) {
1629 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1634 r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1638 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1645 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1648 const struct cs_section_def *sect = NULL;
1649 const struct cs_extent_def *ext = NULL;
1651 /* begin clear state */
1653 /* context control state */
1656 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1657 for (ext = sect->section; ext->extent != NULL; ++ext) {
1658 if (sect->id == SECT_CONTEXT)
1659 count += 2 + ext->reg_count;
1665 /* end clear state */
1673 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1674 volatile u32 *buffer)
1677 const struct cs_section_def *sect = NULL;
1678 const struct cs_extent_def *ext = NULL;
1680 if (adev->gfx.rlc.cs_data == NULL)
1685 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1686 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1688 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1689 buffer[count++] = cpu_to_le32(0x80000000);
1690 buffer[count++] = cpu_to_le32(0x80000000);
1692 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1693 for (ext = sect->section; ext->extent != NULL; ++ext) {
1694 if (sect->id == SECT_CONTEXT) {
1696 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1697 buffer[count++] = cpu_to_le32(ext->reg_index -
1698 PACKET3_SET_CONTEXT_REG_START);
1699 for (i = 0; i < ext->reg_count; i++)
1700 buffer[count++] = cpu_to_le32(ext->extent[i]);
1707 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1708 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1710 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1711 buffer[count++] = cpu_to_le32(0);
1714 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1716 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1717 uint32_t pg_always_on_cu_num = 2;
1718 uint32_t always_on_cu_num;
1720 uint32_t mask, cu_bitmap, counter;
1722 if (adev->flags & AMD_IS_APU)
1723 always_on_cu_num = 4;
1724 else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1725 always_on_cu_num = 8;
1727 always_on_cu_num = 12;
1729 mutex_lock(&adev->grbm_idx_mutex);
1730 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1731 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1735 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1737 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1738 if (cu_info->bitmap[i][j] & mask) {
1739 if (counter == pg_always_on_cu_num)
1740 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1741 if (counter < always_on_cu_num)
1750 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1751 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1754 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1755 mutex_unlock(&adev->grbm_idx_mutex);
1758 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1762 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1763 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1764 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1765 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1766 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1768 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1769 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1771 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1772 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1774 mutex_lock(&adev->grbm_idx_mutex);
1775 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1776 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1777 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1779 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1780 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1781 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1782 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1783 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1785 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1786 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1789 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1792 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1793 * programmed in gfx_v9_0_init_always_on_cu_mask()
1796 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1797 * but used for RLC_LB_CNTL configuration */
1798 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1799 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1800 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1801 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1802 mutex_unlock(&adev->grbm_idx_mutex);
1804 gfx_v9_0_init_always_on_cu_mask(adev);
1807 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1811 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1812 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1813 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1814 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1815 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1817 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1818 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1820 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1821 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1823 mutex_lock(&adev->grbm_idx_mutex);
1824 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1825 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1826 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1828 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1829 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1830 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1831 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1832 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1834 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1835 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1838 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1841 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1842 * programmed in gfx_v9_0_init_always_on_cu_mask()
1845 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1846 * but used for RLC_LB_CNTL configuration */
1847 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1848 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1849 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1850 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1851 mutex_unlock(&adev->grbm_idx_mutex);
1853 gfx_v9_0_init_always_on_cu_mask(adev);
1856 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1858 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1861 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1863 if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1869 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1871 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1873 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
1874 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1875 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1876 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1877 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1878 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1879 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1880 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1881 adev->gfx.rlc.rlcg_reg_access_supported = true;
1884 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1886 const struct cs_section_def *cs_data;
1889 adev->gfx.rlc.cs_data = gfx9_cs_data;
1891 cs_data = adev->gfx.rlc.cs_data;
1894 /* init clear state block */
1895 r = amdgpu_gfx_rlc_init_csb(adev);
1900 if (adev->flags & AMD_IS_APU) {
1901 /* TODO: double check the cp_table_size for RV */
1902 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1903 r = amdgpu_gfx_rlc_init_cpt(adev);
1908 switch (adev->ip_versions[GC_HWIP][0]) {
1909 case IP_VERSION(9, 2, 2):
1910 case IP_VERSION(9, 1, 0):
1911 gfx_v9_0_init_lbpw(adev);
1913 case IP_VERSION(9, 4, 0):
1914 gfx_v9_4_init_lbpw(adev);
1920 /* init spm vmid with 0xf */
1921 if (adev->gfx.rlc.funcs->update_spm_vmid)
1922 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1927 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1929 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1930 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1933 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1937 const __le32 *fw_data;
1940 size_t mec_hpd_size;
1942 const struct gfx_firmware_header_v1_0 *mec_hdr;
1944 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1946 /* take ownership of the relevant compute queues */
1947 amdgpu_gfx_compute_queue_acquire(adev);
1948 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1950 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1951 AMDGPU_GEM_DOMAIN_VRAM,
1952 &adev->gfx.mec.hpd_eop_obj,
1953 &adev->gfx.mec.hpd_eop_gpu_addr,
1956 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1957 gfx_v9_0_mec_fini(adev);
1961 memset(hpd, 0, mec_hpd_size);
1963 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1964 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1967 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1969 fw_data = (const __le32 *)
1970 (adev->gfx.mec_fw->data +
1971 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1972 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1974 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1975 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1976 &adev->gfx.mec.mec_fw_obj,
1977 &adev->gfx.mec.mec_fw_gpu_addr,
1980 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1981 gfx_v9_0_mec_fini(adev);
1985 memcpy(fw, fw_data, fw_size);
1987 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1988 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1993 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1995 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1996 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1997 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1998 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1999 (SQ_IND_INDEX__FORCE_READ_MASK));
2000 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2003 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2004 uint32_t wave, uint32_t thread,
2005 uint32_t regno, uint32_t num, uint32_t *out)
2007 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2008 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2009 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2010 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2011 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2012 (SQ_IND_INDEX__FORCE_READ_MASK) |
2013 (SQ_IND_INDEX__AUTO_INCR_MASK));
2015 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2018 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2020 /* type 1 wave data */
2021 dst[(*no_fields)++] = 1;
2022 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2023 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2024 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2025 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2026 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2027 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2028 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2029 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2030 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2031 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2032 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2033 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2034 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2035 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2036 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2039 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2040 uint32_t wave, uint32_t start,
2041 uint32_t size, uint32_t *dst)
2044 adev, simd, wave, 0,
2045 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2048 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2049 uint32_t wave, uint32_t thread,
2050 uint32_t start, uint32_t size,
2054 adev, simd, wave, thread,
2055 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2058 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2059 u32 me, u32 pipe, u32 q, u32 vm)
2061 soc15_grbm_select(adev, me, pipe, q, vm);
2064 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2065 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2066 .select_se_sh = &gfx_v9_0_select_se_sh,
2067 .read_wave_data = &gfx_v9_0_read_wave_data,
2068 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2069 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2070 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2073 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = {
2074 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2075 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2076 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2079 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2081 .hw_ops = &gfx_v9_0_ras_ops,
2085 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2090 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2092 switch (adev->ip_versions[GC_HWIP][0]) {
2093 case IP_VERSION(9, 0, 1):
2094 adev->gfx.config.max_hw_contexts = 8;
2095 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2096 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2097 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2098 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2099 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2101 case IP_VERSION(9, 2, 1):
2102 adev->gfx.config.max_hw_contexts = 8;
2103 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2104 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2105 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2106 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2107 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2108 DRM_INFO("fix gfx.config for vega12\n");
2110 case IP_VERSION(9, 4, 0):
2111 adev->gfx.ras = &gfx_v9_0_ras;
2112 adev->gfx.config.max_hw_contexts = 8;
2113 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2114 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2115 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2116 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2117 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2118 gb_addr_config &= ~0xf3e777ff;
2119 gb_addr_config |= 0x22014042;
2120 /* check vbios table if gpu info is not available */
2121 err = amdgpu_atomfirmware_get_gfx_info(adev);
2125 case IP_VERSION(9, 2, 2):
2126 case IP_VERSION(9, 1, 0):
2127 adev->gfx.config.max_hw_contexts = 8;
2128 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2129 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2130 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2131 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2132 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2133 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2135 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2137 case IP_VERSION(9, 4, 1):
2138 adev->gfx.ras = &gfx_v9_4_ras;
2139 adev->gfx.config.max_hw_contexts = 8;
2140 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2141 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2142 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2143 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2144 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2145 gb_addr_config &= ~0xf3e777ff;
2146 gb_addr_config |= 0x22014042;
2148 case IP_VERSION(9, 3, 0):
2149 adev->gfx.config.max_hw_contexts = 8;
2150 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2151 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2152 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2153 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2154 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2155 gb_addr_config &= ~0xf3e777ff;
2156 gb_addr_config |= 0x22010042;
2158 case IP_VERSION(9, 4, 2):
2159 adev->gfx.ras = &gfx_v9_4_2_ras;
2160 adev->gfx.config.max_hw_contexts = 8;
2161 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2162 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2163 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2164 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2165 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2166 gb_addr_config &= ~0xf3e777ff;
2167 gb_addr_config |= 0x22014042;
2168 /* check vbios table if gpu info is not available */
2169 err = amdgpu_atomfirmware_get_gfx_info(adev);
2178 if (adev->gfx.ras) {
2179 err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block);
2181 DRM_ERROR("Failed to register gfx ras block!\n");
2185 strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
2186 adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
2187 adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
2188 adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm;
2190 /* If not define special ras_late_init function, use gfx default ras_late_init */
2191 if (!adev->gfx.ras->ras_block.ras_late_init)
2192 adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
2194 /* If not defined special ras_cb function, use default ras_cb */
2195 if (!adev->gfx.ras->ras_block.ras_cb)
2196 adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
2199 adev->gfx.config.gb_addr_config = gb_addr_config;
2201 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2203 adev->gfx.config.gb_addr_config,
2207 adev->gfx.config.max_tile_pipes =
2208 adev->gfx.config.gb_addr_config_fields.num_pipes;
2210 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2212 adev->gfx.config.gb_addr_config,
2215 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2217 adev->gfx.config.gb_addr_config,
2219 MAX_COMPRESSED_FRAGS);
2220 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2222 adev->gfx.config.gb_addr_config,
2225 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2227 adev->gfx.config.gb_addr_config,
2229 NUM_SHADER_ENGINES);
2230 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2232 adev->gfx.config.gb_addr_config,
2234 PIPE_INTERLEAVE_SIZE));
2239 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2240 int mec, int pipe, int queue)
2243 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2244 unsigned int hw_prio;
2246 ring = &adev->gfx.compute_ring[ring_id];
2251 ring->queue = queue;
2253 ring->ring_obj = NULL;
2254 ring->use_doorbell = true;
2255 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2256 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2257 + (ring_id * GFX9_MEC_HPD_SIZE);
2258 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2260 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2261 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2263 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2264 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2265 /* type-2 packets are deprecated on MEC, use type-3 instead */
2266 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2270 static int gfx_v9_0_sw_init(void *handle)
2272 int i, j, k, r, ring_id;
2273 struct amdgpu_ring *ring;
2274 struct amdgpu_kiq *kiq;
2275 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2277 switch (adev->ip_versions[GC_HWIP][0]) {
2278 case IP_VERSION(9, 0, 1):
2279 case IP_VERSION(9, 2, 1):
2280 case IP_VERSION(9, 4, 0):
2281 case IP_VERSION(9, 2, 2):
2282 case IP_VERSION(9, 1, 0):
2283 case IP_VERSION(9, 4, 1):
2284 case IP_VERSION(9, 3, 0):
2285 case IP_VERSION(9, 4, 2):
2286 adev->gfx.mec.num_mec = 2;
2289 adev->gfx.mec.num_mec = 1;
2293 adev->gfx.mec.num_pipe_per_mec = 4;
2294 adev->gfx.mec.num_queue_per_pipe = 8;
2297 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2301 /* Privileged reg */
2302 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2303 &adev->gfx.priv_reg_irq);
2307 /* Privileged inst */
2308 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2309 &adev->gfx.priv_inst_irq);
2314 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2315 &adev->gfx.cp_ecc_error_irq);
2320 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2321 &adev->gfx.cp_ecc_error_irq);
2325 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2327 r = gfx_v9_0_init_microcode(adev);
2329 DRM_ERROR("Failed to load gfx firmware!\n");
2333 if (adev->gfx.rlc.funcs) {
2334 if (adev->gfx.rlc.funcs->init) {
2335 r = adev->gfx.rlc.funcs->init(adev);
2337 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2343 r = gfx_v9_0_mec_init(adev);
2345 DRM_ERROR("Failed to init MEC BOs!\n");
2349 /* set up the gfx ring */
2350 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2351 ring = &adev->gfx.gfx_ring[i];
2352 ring->ring_obj = NULL;
2354 sprintf(ring->name, "gfx");
2356 sprintf(ring->name, "gfx_%d", i);
2357 ring->use_doorbell = true;
2358 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2359 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2360 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2361 AMDGPU_RING_PRIO_DEFAULT, NULL);
2366 /* set up the compute queues - allocate horizontally across pipes */
2368 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2369 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2370 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2371 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2374 r = gfx_v9_0_compute_ring_init(adev,
2385 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2387 DRM_ERROR("Failed to init KIQ BOs!\n");
2391 kiq = &adev->gfx.kiq;
2392 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2396 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2397 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2401 adev->gfx.ce_ram_size = 0x8000;
2403 r = gfx_v9_0_gpu_early_init(adev);
2411 static int gfx_v9_0_sw_fini(void *handle)
2414 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2416 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2417 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2418 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2419 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2421 amdgpu_gfx_mqd_sw_fini(adev);
2422 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2423 amdgpu_gfx_kiq_fini(adev);
2425 gfx_v9_0_mec_fini(adev);
2426 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2427 &adev->gfx.rlc.clear_state_gpu_addr,
2428 (void **)&adev->gfx.rlc.cs_ptr);
2429 if (adev->flags & AMD_IS_APU) {
2430 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2431 &adev->gfx.rlc.cp_table_gpu_addr,
2432 (void **)&adev->gfx.rlc.cp_table_ptr);
2434 gfx_v9_0_free_microcode(adev);
2440 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2445 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2450 if (instance == 0xffffffff)
2451 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2453 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2455 if (se_num == 0xffffffff)
2456 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2458 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2460 if (sh_num == 0xffffffff)
2461 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2463 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2465 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2468 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2472 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2473 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2475 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2476 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2478 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2479 adev->gfx.config.max_sh_per_se);
2481 return (~data) & mask;
2484 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2489 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2490 adev->gfx.config.max_sh_per_se;
2492 mutex_lock(&adev->grbm_idx_mutex);
2493 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2494 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2495 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2496 data = gfx_v9_0_get_rb_active_bitmap(adev);
2497 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2498 rb_bitmap_width_per_sh);
2501 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2502 mutex_unlock(&adev->grbm_idx_mutex);
2504 adev->gfx.config.backend_enable_mask = active_rbs;
2505 adev->gfx.config.num_rbs = hweight32(active_rbs);
2508 #define DEFAULT_SH_MEM_BASES (0x6000)
2509 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2512 uint32_t sh_mem_config;
2513 uint32_t sh_mem_bases;
2516 * Configure apertures:
2517 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2518 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2519 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2521 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2523 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2524 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2525 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2527 mutex_lock(&adev->srbm_mutex);
2528 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2529 soc15_grbm_select(adev, 0, 0, 0, i);
2530 /* CP and shaders */
2531 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2532 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2534 soc15_grbm_select(adev, 0, 0, 0, 0);
2535 mutex_unlock(&adev->srbm_mutex);
2537 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2538 access. These should be enabled by FW for target VMIDs. */
2539 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2540 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2541 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2542 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2543 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2547 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2552 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2553 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2554 * the driver can enable them for graphics. VMID0 should maintain
2555 * access so that HWS firmware can save/restore entries.
2557 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2558 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2559 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2560 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2561 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2565 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2569 switch (adev->ip_versions[GC_HWIP][0]) {
2570 case IP_VERSION(9, 4, 1):
2571 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2572 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2573 DISABLE_BARRIER_WAITCNT, 1);
2574 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2581 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2586 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2588 gfx_v9_0_tiling_mode_table_init(adev);
2590 gfx_v9_0_setup_rb(adev);
2591 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2592 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2594 /* XXX SH_MEM regs */
2595 /* where to put LDS, scratch, GPUVM in FSA64 space */
2596 mutex_lock(&adev->srbm_mutex);
2597 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2598 soc15_grbm_select(adev, 0, 0, 0, i);
2599 /* CP and shaders */
2601 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2602 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2603 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2604 !!adev->gmc.noretry);
2605 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2606 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2608 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2609 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2610 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2611 !!adev->gmc.noretry);
2612 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2613 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2614 (adev->gmc.private_aperture_start >> 48));
2615 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2616 (adev->gmc.shared_aperture_start >> 48));
2617 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2620 soc15_grbm_select(adev, 0, 0, 0, 0);
2622 mutex_unlock(&adev->srbm_mutex);
2624 gfx_v9_0_init_compute_vmid(adev);
2625 gfx_v9_0_init_gds_vmid(adev);
2626 gfx_v9_0_init_sq_config(adev);
2629 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2634 mutex_lock(&adev->grbm_idx_mutex);
2635 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2636 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2637 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2638 for (k = 0; k < adev->usec_timeout; k++) {
2639 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2643 if (k == adev->usec_timeout) {
2644 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2645 0xffffffff, 0xffffffff);
2646 mutex_unlock(&adev->grbm_idx_mutex);
2647 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2653 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2654 mutex_unlock(&adev->grbm_idx_mutex);
2656 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2657 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2658 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2659 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2660 for (k = 0; k < adev->usec_timeout; k++) {
2661 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2667 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2672 /* These interrupts should be enabled to drive DS clock */
2674 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2676 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2677 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2678 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2679 if(adev->gfx.num_gfx_rings)
2680 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2682 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2685 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2687 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2689 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2690 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2691 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2692 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2693 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2694 adev->gfx.rlc.clear_state_size);
2697 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2698 int indirect_offset,
2700 int *unique_indirect_regs,
2701 int unique_indirect_reg_count,
2702 int *indirect_start_offsets,
2703 int *indirect_start_offsets_count,
2704 int max_start_offsets_count)
2708 for (; indirect_offset < list_size; indirect_offset++) {
2709 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2710 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2711 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2713 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2714 indirect_offset += 2;
2716 /* look for the matching indice */
2717 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2718 if (unique_indirect_regs[idx] ==
2719 register_list_format[indirect_offset] ||
2720 !unique_indirect_regs[idx])
2724 BUG_ON(idx >= unique_indirect_reg_count);
2726 if (!unique_indirect_regs[idx])
2727 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2734 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2736 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2737 int unique_indirect_reg_count = 0;
2739 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2740 int indirect_start_offsets_count = 0;
2746 u32 *register_list_format =
2747 kmemdup(adev->gfx.rlc.register_list_format,
2748 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2749 if (!register_list_format)
2752 /* setup unique_indirect_regs array and indirect_start_offsets array */
2753 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2754 gfx_v9_1_parse_ind_reg_list(register_list_format,
2755 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2756 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2757 unique_indirect_regs,
2758 unique_indirect_reg_count,
2759 indirect_start_offsets,
2760 &indirect_start_offsets_count,
2761 ARRAY_SIZE(indirect_start_offsets));
2763 /* enable auto inc in case it is disabled */
2764 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2765 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2766 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2768 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2769 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2770 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2771 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2772 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2773 adev->gfx.rlc.register_restore[i]);
2775 /* load indirect register */
2776 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2777 adev->gfx.rlc.reg_list_format_start);
2779 /* direct register portion */
2780 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2781 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2782 register_list_format[i]);
2784 /* indirect register portion */
2785 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2786 if (register_list_format[i] == 0xFFFFFFFF) {
2787 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2791 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2792 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2794 for (j = 0; j < unique_indirect_reg_count; j++) {
2795 if (register_list_format[i] == unique_indirect_regs[j]) {
2796 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2801 BUG_ON(j >= unique_indirect_reg_count);
2806 /* set save/restore list size */
2807 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2808 list_size = list_size >> 1;
2809 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2810 adev->gfx.rlc.reg_restore_list_size);
2811 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2813 /* write the starting offsets to RLC scratch ram */
2814 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2815 adev->gfx.rlc.starting_offsets_start);
2816 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2817 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2818 indirect_start_offsets[i]);
2820 /* load unique indirect regs*/
2821 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2822 if (unique_indirect_regs[i] != 0) {
2823 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2824 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2825 unique_indirect_regs[i] & 0x3FFFF);
2827 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2828 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2829 unique_indirect_regs[i] >> 20);
2833 kfree(register_list_format);
2837 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2839 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2842 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2846 uint32_t default_data = 0;
2848 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2850 /* enable GFXIP control over CGPG */
2851 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2852 if(default_data != data)
2853 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2856 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2857 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2858 if(default_data != data)
2859 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2861 /* restore GFXIP control over GCPG */
2862 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2863 if(default_data != data)
2864 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2868 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2872 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2873 AMD_PG_SUPPORT_GFX_SMG |
2874 AMD_PG_SUPPORT_GFX_DMG)) {
2875 /* init IDLE_POLL_COUNT = 60 */
2876 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2877 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2878 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2879 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2881 /* init RLC PG Delay */
2883 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2884 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2885 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2886 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2887 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2889 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2890 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2891 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2892 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2894 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2895 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2896 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2897 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2899 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2900 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2902 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2903 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2904 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2905 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2906 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2910 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2914 uint32_t default_data = 0;
2916 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2917 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2918 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2920 if (default_data != data)
2921 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2924 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2928 uint32_t default_data = 0;
2930 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2931 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2932 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2934 if(default_data != data)
2935 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2938 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2942 uint32_t default_data = 0;
2944 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2945 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2948 if(default_data != data)
2949 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2952 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2955 uint32_t data, default_data;
2957 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2958 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2959 GFX_POWER_GATING_ENABLE,
2961 if(default_data != data)
2962 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2965 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2968 uint32_t data, default_data;
2970 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2971 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2972 GFX_PIPELINE_PG_ENABLE,
2974 if(default_data != data)
2975 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2978 /* read any GFX register to wake up GFX */
2979 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2982 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2985 uint32_t data, default_data;
2987 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2988 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2989 STATIC_PER_CU_PG_ENABLE,
2991 if(default_data != data)
2992 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2995 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2998 uint32_t data, default_data;
3000 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3001 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3002 DYN_PER_CU_PG_ENABLE,
3004 if(default_data != data)
3005 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3008 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3010 gfx_v9_0_init_csb(adev);
3013 * Rlc save restore list is workable since v2_1.
3014 * And it's needed by gfxoff feature.
3016 if (adev->gfx.rlc.is_rlc_v2_1) {
3017 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3018 (adev->apu_flags & AMD_APU_IS_RAVEN2))
3019 gfx_v9_1_init_rlc_save_restore_list(adev);
3020 gfx_v9_0_enable_save_restore_machine(adev);
3023 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3024 AMD_PG_SUPPORT_GFX_SMG |
3025 AMD_PG_SUPPORT_GFX_DMG |
3027 AMD_PG_SUPPORT_GDS |
3028 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3029 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3030 adev->gfx.rlc.cp_table_gpu_addr >> 8);
3031 gfx_v9_0_init_gfx_power_gating(adev);
3035 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3037 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3038 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3039 gfx_v9_0_wait_for_rlc_serdes(adev);
3042 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3044 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3046 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3050 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3052 #ifdef AMDGPU_RLC_DEBUG_RETRY
3056 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3059 /* carrizo do enable cp interrupt after cp inited */
3060 if (!(adev->flags & AMD_IS_APU)) {
3061 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3065 #ifdef AMDGPU_RLC_DEBUG_RETRY
3066 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3067 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3068 if(rlc_ucode_ver == 0x108) {
3069 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3070 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3071 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3072 * default is 0x9C4 to create a 100us interval */
3073 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3074 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3075 * to disable the page fault retry interrupts, default is
3077 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3082 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3084 const struct rlc_firmware_header_v2_0 *hdr;
3085 const __le32 *fw_data;
3086 unsigned i, fw_size;
3088 if (!adev->gfx.rlc_fw)
3091 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3092 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3094 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3095 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3096 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3098 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3099 RLCG_UCODE_LOADING_START_ADDRESS);
3100 for (i = 0; i < fw_size; i++)
3101 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3102 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3107 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3111 if (amdgpu_sriov_vf(adev)) {
3112 gfx_v9_0_init_csb(adev);
3116 adev->gfx.rlc.funcs->stop(adev);
3119 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3121 gfx_v9_0_init_pg(adev);
3123 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3124 /* legacy rlc firmware loading */
3125 r = gfx_v9_0_rlc_load_microcode(adev);
3130 switch (adev->ip_versions[GC_HWIP][0]) {
3131 case IP_VERSION(9, 2, 2):
3132 case IP_VERSION(9, 1, 0):
3133 if (amdgpu_lbpw == 0)
3134 gfx_v9_0_enable_lbpw(adev, false);
3136 gfx_v9_0_enable_lbpw(adev, true);
3138 case IP_VERSION(9, 4, 0):
3139 if (amdgpu_lbpw > 0)
3140 gfx_v9_0_enable_lbpw(adev, true);
3142 gfx_v9_0_enable_lbpw(adev, false);
3148 adev->gfx.rlc.funcs->start(adev);
3153 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3155 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3157 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3158 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3159 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3160 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3164 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3166 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3167 const struct gfx_firmware_header_v1_0 *ce_hdr;
3168 const struct gfx_firmware_header_v1_0 *me_hdr;
3169 const __le32 *fw_data;
3170 unsigned i, fw_size;
3172 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3175 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3176 adev->gfx.pfp_fw->data;
3177 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3178 adev->gfx.ce_fw->data;
3179 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3180 adev->gfx.me_fw->data;
3182 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3183 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3184 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3186 gfx_v9_0_cp_gfx_enable(adev, false);
3189 fw_data = (const __le32 *)
3190 (adev->gfx.pfp_fw->data +
3191 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3192 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3193 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3194 for (i = 0; i < fw_size; i++)
3195 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3196 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3199 fw_data = (const __le32 *)
3200 (adev->gfx.ce_fw->data +
3201 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3202 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3203 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3204 for (i = 0; i < fw_size; i++)
3205 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3206 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3209 fw_data = (const __le32 *)
3210 (adev->gfx.me_fw->data +
3211 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3212 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3213 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3214 for (i = 0; i < fw_size; i++)
3215 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3216 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3221 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3223 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3224 const struct cs_section_def *sect = NULL;
3225 const struct cs_extent_def *ext = NULL;
3229 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3230 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3232 gfx_v9_0_cp_gfx_enable(adev, true);
3234 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3236 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3240 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3241 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3243 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3244 amdgpu_ring_write(ring, 0x80000000);
3245 amdgpu_ring_write(ring, 0x80000000);
3247 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3248 for (ext = sect->section; ext->extent != NULL; ++ext) {
3249 if (sect->id == SECT_CONTEXT) {
3250 amdgpu_ring_write(ring,
3251 PACKET3(PACKET3_SET_CONTEXT_REG,
3253 amdgpu_ring_write(ring,
3254 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3255 for (i = 0; i < ext->reg_count; i++)
3256 amdgpu_ring_write(ring, ext->extent[i]);
3261 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3262 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3264 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3265 amdgpu_ring_write(ring, 0);
3267 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3268 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3269 amdgpu_ring_write(ring, 0x8000);
3270 amdgpu_ring_write(ring, 0x8000);
3272 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3273 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3274 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3275 amdgpu_ring_write(ring, tmp);
3276 amdgpu_ring_write(ring, 0);
3278 amdgpu_ring_commit(ring);
3283 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3285 struct amdgpu_ring *ring;
3288 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3290 /* Set the write pointer delay */
3291 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3293 /* set the RB to use vmid 0 */
3294 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3296 /* Set ring buffer size */
3297 ring = &adev->gfx.gfx_ring[0];
3298 rb_bufsz = order_base_2(ring->ring_size / 8);
3299 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3300 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3302 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3304 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3306 /* Initialize the ring buffer's write pointers */
3308 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3309 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3311 /* set the wb address wether it's enabled or not */
3312 rptr_addr = ring->rptr_gpu_addr;
3313 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3314 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3316 wptr_gpu_addr = ring->wptr_gpu_addr;
3317 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3318 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3321 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3323 rb_addr = ring->gpu_addr >> 8;
3324 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3325 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3327 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3328 if (ring->use_doorbell) {
3329 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3330 DOORBELL_OFFSET, ring->doorbell_index);
3331 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3334 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3336 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3338 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3339 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3340 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3342 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3343 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3346 /* start the ring */
3347 gfx_v9_0_cp_gfx_start(adev);
3348 ring->sched.ready = true;
3353 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3356 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3358 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3359 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3360 adev->gfx.kiq.ring.sched.ready = false;
3365 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3367 const struct gfx_firmware_header_v1_0 *mec_hdr;
3368 const __le32 *fw_data;
3372 if (!adev->gfx.mec_fw)
3375 gfx_v9_0_cp_compute_enable(adev, false);
3377 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3378 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3380 fw_data = (const __le32 *)
3381 (adev->gfx.mec_fw->data +
3382 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3384 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3385 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3386 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3388 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3389 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3390 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3391 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3394 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3395 mec_hdr->jt_offset);
3396 for (i = 0; i < mec_hdr->jt_size; i++)
3397 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3398 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3400 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3401 adev->gfx.mec_fw_version);
3402 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3408 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3411 struct amdgpu_device *adev = ring->adev;
3413 /* tell RLC which is KIQ queue */
3414 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3416 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3417 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3419 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3422 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3424 struct amdgpu_device *adev = ring->adev;
3426 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3427 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3428 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3429 mqd->cp_hqd_queue_priority =
3430 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3435 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3437 struct amdgpu_device *adev = ring->adev;
3438 struct v9_mqd *mqd = ring->mqd_ptr;
3439 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3442 mqd->header = 0xC0310800;
3443 mqd->compute_pipelinestat_enable = 0x00000001;
3444 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3445 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3446 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3447 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3448 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3449 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3450 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3451 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3452 mqd->compute_misc_reserved = 0x00000003;
3454 mqd->dynamic_cu_mask_addr_lo =
3455 lower_32_bits(ring->mqd_gpu_addr
3456 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3457 mqd->dynamic_cu_mask_addr_hi =
3458 upper_32_bits(ring->mqd_gpu_addr
3459 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3461 eop_base_addr = ring->eop_gpu_addr >> 8;
3462 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3463 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3465 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3466 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3467 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3468 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3470 mqd->cp_hqd_eop_control = tmp;
3472 /* enable doorbell? */
3473 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3475 if (ring->use_doorbell) {
3476 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3477 DOORBELL_OFFSET, ring->doorbell_index);
3478 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3480 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3481 DOORBELL_SOURCE, 0);
3482 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3485 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3489 mqd->cp_hqd_pq_doorbell_control = tmp;
3491 /* disable the queue if it's active */
3493 mqd->cp_hqd_dequeue_request = 0;
3494 mqd->cp_hqd_pq_rptr = 0;
3495 mqd->cp_hqd_pq_wptr_lo = 0;
3496 mqd->cp_hqd_pq_wptr_hi = 0;
3498 /* set the pointer to the MQD */
3499 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3500 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3502 /* set MQD vmid to 0 */
3503 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3504 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3505 mqd->cp_mqd_control = tmp;
3507 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3508 hqd_gpu_addr = ring->gpu_addr >> 8;
3509 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3510 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3512 /* set up the HQD, this is similar to CP_RB0_CNTL */
3513 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3514 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3515 (order_base_2(ring->ring_size / 4) - 1));
3516 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3517 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3519 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3521 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3522 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3523 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3524 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3525 mqd->cp_hqd_pq_control = tmp;
3527 /* set the wb address whether it's enabled or not */
3528 wb_gpu_addr = ring->rptr_gpu_addr;
3529 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3530 mqd->cp_hqd_pq_rptr_report_addr_hi =
3531 upper_32_bits(wb_gpu_addr) & 0xffff;
3533 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3534 wb_gpu_addr = ring->wptr_gpu_addr;
3535 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3536 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3538 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3540 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3542 /* set the vmid for the queue */
3543 mqd->cp_hqd_vmid = 0;
3545 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3546 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3547 mqd->cp_hqd_persistent_state = tmp;
3549 /* set MIN_IB_AVAIL_SIZE */
3550 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3551 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3552 mqd->cp_hqd_ib_control = tmp;
3554 /* set static priority for a queue/ring */
3555 gfx_v9_0_mqd_set_priority(ring, mqd);
3556 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3558 /* map_queues packet doesn't need activate the queue,
3559 * so only kiq need set this field.
3561 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3562 mqd->cp_hqd_active = 1;
3567 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3569 struct amdgpu_device *adev = ring->adev;
3570 struct v9_mqd *mqd = ring->mqd_ptr;
3573 /* disable wptr polling */
3574 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3576 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3577 mqd->cp_hqd_eop_base_addr_lo);
3578 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3579 mqd->cp_hqd_eop_base_addr_hi);
3581 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3582 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3583 mqd->cp_hqd_eop_control);
3585 /* enable doorbell? */
3586 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3587 mqd->cp_hqd_pq_doorbell_control);
3589 /* disable the queue if it's active */
3590 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3591 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3592 for (j = 0; j < adev->usec_timeout; j++) {
3593 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3597 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3598 mqd->cp_hqd_dequeue_request);
3599 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3600 mqd->cp_hqd_pq_rptr);
3601 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3602 mqd->cp_hqd_pq_wptr_lo);
3603 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3604 mqd->cp_hqd_pq_wptr_hi);
3607 /* set the pointer to the MQD */
3608 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3609 mqd->cp_mqd_base_addr_lo);
3610 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3611 mqd->cp_mqd_base_addr_hi);
3613 /* set MQD vmid to 0 */
3614 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3615 mqd->cp_mqd_control);
3617 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3618 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3619 mqd->cp_hqd_pq_base_lo);
3620 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3621 mqd->cp_hqd_pq_base_hi);
3623 /* set up the HQD, this is similar to CP_RB0_CNTL */
3624 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3625 mqd->cp_hqd_pq_control);
3627 /* set the wb address whether it's enabled or not */
3628 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3629 mqd->cp_hqd_pq_rptr_report_addr_lo);
3630 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3631 mqd->cp_hqd_pq_rptr_report_addr_hi);
3633 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3634 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3635 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3636 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3637 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3639 /* enable the doorbell if requested */
3640 if (ring->use_doorbell) {
3641 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3642 (adev->doorbell_index.kiq * 2) << 2);
3643 /* If GC has entered CGPG, ringing doorbell > first page
3644 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3645 * workaround this issue. And this change has to align with firmware
3648 if (check_if_enlarge_doorbell_range(adev))
3649 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3650 (adev->doorbell.size - 4));
3652 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3653 (adev->doorbell_index.userqueue_end * 2) << 2);
3656 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3657 mqd->cp_hqd_pq_doorbell_control);
3659 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3660 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3661 mqd->cp_hqd_pq_wptr_lo);
3662 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3663 mqd->cp_hqd_pq_wptr_hi);
3665 /* set the vmid for the queue */
3666 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3668 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3669 mqd->cp_hqd_persistent_state);
3671 /* activate the queue */
3672 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3673 mqd->cp_hqd_active);
3675 if (ring->use_doorbell)
3676 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3681 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3683 struct amdgpu_device *adev = ring->adev;
3686 /* disable the queue if it's active */
3687 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3689 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3691 for (j = 0; j < adev->usec_timeout; j++) {
3692 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3697 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3698 DRM_DEBUG("KIQ dequeue request failed.\n");
3700 /* Manual disable if dequeue request times out */
3701 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3704 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3708 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3709 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3710 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3711 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3712 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3713 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3714 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3715 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3720 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3722 struct amdgpu_device *adev = ring->adev;
3723 struct v9_mqd *mqd = ring->mqd_ptr;
3724 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3725 struct v9_mqd *tmp_mqd;
3727 gfx_v9_0_kiq_setting(ring);
3729 /* GPU could be in bad state during probe, driver trigger the reset
3730 * after load the SMU, in this case , the mqd is not be initialized.
3731 * driver need to re-init the mqd.
3732 * check mqd->cp_hqd_pq_control since this value should not be 0
3734 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3735 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3736 /* for GPU_RESET case , reset MQD to a clean status */
3737 if (adev->gfx.mec.mqd_backup[mqd_idx])
3738 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3740 /* reset ring buffer */
3742 amdgpu_ring_clear_ring(ring);
3744 mutex_lock(&adev->srbm_mutex);
3745 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3746 gfx_v9_0_kiq_init_register(ring);
3747 soc15_grbm_select(adev, 0, 0, 0, 0);
3748 mutex_unlock(&adev->srbm_mutex);
3750 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3751 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3752 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3753 mutex_lock(&adev->srbm_mutex);
3754 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3755 gfx_v9_0_mqd_init(ring);
3756 gfx_v9_0_kiq_init_register(ring);
3757 soc15_grbm_select(adev, 0, 0, 0, 0);
3758 mutex_unlock(&adev->srbm_mutex);
3760 if (adev->gfx.mec.mqd_backup[mqd_idx])
3761 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3767 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3769 struct amdgpu_device *adev = ring->adev;
3770 struct v9_mqd *mqd = ring->mqd_ptr;
3771 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3772 struct v9_mqd *tmp_mqd;
3774 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3775 * is not be initialized before
3777 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3779 if (!tmp_mqd->cp_hqd_pq_control ||
3780 (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3781 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3782 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3783 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3784 mutex_lock(&adev->srbm_mutex);
3785 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3786 gfx_v9_0_mqd_init(ring);
3787 soc15_grbm_select(adev, 0, 0, 0, 0);
3788 mutex_unlock(&adev->srbm_mutex);
3790 if (adev->gfx.mec.mqd_backup[mqd_idx])
3791 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3792 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3793 /* reset MQD to a clean status */
3794 if (adev->gfx.mec.mqd_backup[mqd_idx])
3795 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3797 /* reset ring buffer */
3799 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3800 amdgpu_ring_clear_ring(ring);
3802 amdgpu_ring_clear_ring(ring);
3808 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3810 struct amdgpu_ring *ring;
3813 ring = &adev->gfx.kiq.ring;
3815 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3816 if (unlikely(r != 0))
3819 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3820 if (unlikely(r != 0))
3823 gfx_v9_0_kiq_init_queue(ring);
3824 amdgpu_bo_kunmap(ring->mqd_obj);
3825 ring->mqd_ptr = NULL;
3826 amdgpu_bo_unreserve(ring->mqd_obj);
3827 ring->sched.ready = true;
3831 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3833 struct amdgpu_ring *ring = NULL;
3836 gfx_v9_0_cp_compute_enable(adev, true);
3838 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3839 ring = &adev->gfx.compute_ring[i];
3841 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3842 if (unlikely(r != 0))
3844 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3846 r = gfx_v9_0_kcq_init_queue(ring);
3847 amdgpu_bo_kunmap(ring->mqd_obj);
3848 ring->mqd_ptr = NULL;
3850 amdgpu_bo_unreserve(ring->mqd_obj);
3855 r = amdgpu_gfx_enable_kcq(adev);
3860 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3863 struct amdgpu_ring *ring;
3865 if (!(adev->flags & AMD_IS_APU))
3866 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3868 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3869 if (adev->gfx.num_gfx_rings) {
3870 /* legacy firmware loading */
3871 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3876 r = gfx_v9_0_cp_compute_load_microcode(adev);
3881 r = gfx_v9_0_kiq_resume(adev);
3885 if (adev->gfx.num_gfx_rings) {
3886 r = gfx_v9_0_cp_gfx_resume(adev);
3891 r = gfx_v9_0_kcq_resume(adev);
3895 if (adev->gfx.num_gfx_rings) {
3896 ring = &adev->gfx.gfx_ring[0];
3897 r = amdgpu_ring_test_helper(ring);
3902 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3903 ring = &adev->gfx.compute_ring[i];
3904 amdgpu_ring_test_helper(ring);
3907 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3912 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3916 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3917 adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3920 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3921 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3922 adev->df.hash_status.hash_64k);
3923 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3924 adev->df.hash_status.hash_2m);
3925 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3926 adev->df.hash_status.hash_1g);
3927 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3930 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3932 if (adev->gfx.num_gfx_rings)
3933 gfx_v9_0_cp_gfx_enable(adev, enable);
3934 gfx_v9_0_cp_compute_enable(adev, enable);
3937 static int gfx_v9_0_hw_init(void *handle)
3940 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3942 if (!amdgpu_sriov_vf(adev))
3943 gfx_v9_0_init_golden_registers(adev);
3945 gfx_v9_0_constants_init(adev);
3947 gfx_v9_0_init_tcp_config(adev);
3949 r = adev->gfx.rlc.funcs->resume(adev);
3953 r = gfx_v9_0_cp_resume(adev);
3957 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3958 gfx_v9_4_2_set_power_brake_sequence(adev);
3963 static int gfx_v9_0_hw_fini(void *handle)
3965 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3967 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3968 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3969 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3971 /* DF freeze and kcq disable will fail */
3972 if (!amdgpu_ras_intr_triggered())
3973 /* disable KCQ to avoid CPC touch memory not valid anymore */
3974 amdgpu_gfx_disable_kcq(adev);
3976 if (amdgpu_sriov_vf(adev)) {
3977 gfx_v9_0_cp_gfx_enable(adev, false);
3978 /* must disable polling for SRIOV when hw finished, otherwise
3979 * CPC engine may still keep fetching WB address which is already
3980 * invalid after sw finished and trigger DMAR reading error in
3983 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3987 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3988 * otherwise KIQ is hanging when binding back
3990 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3991 mutex_lock(&adev->srbm_mutex);
3992 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3993 adev->gfx.kiq.ring.pipe,
3994 adev->gfx.kiq.ring.queue, 0);
3995 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3996 soc15_grbm_select(adev, 0, 0, 0, 0);
3997 mutex_unlock(&adev->srbm_mutex);
4000 gfx_v9_0_cp_enable(adev, false);
4002 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4003 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4004 (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
4005 dev_dbg(adev->dev, "Skipping RLC halt\n");
4009 adev->gfx.rlc.funcs->stop(adev);
4013 static int gfx_v9_0_suspend(void *handle)
4015 return gfx_v9_0_hw_fini(handle);
4018 static int gfx_v9_0_resume(void *handle)
4020 return gfx_v9_0_hw_init(handle);
4023 static bool gfx_v9_0_is_idle(void *handle)
4025 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4027 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4028 GRBM_STATUS, GUI_ACTIVE))
4034 static int gfx_v9_0_wait_for_idle(void *handle)
4037 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4039 for (i = 0; i < adev->usec_timeout; i++) {
4040 if (gfx_v9_0_is_idle(handle))
4047 static int gfx_v9_0_soft_reset(void *handle)
4049 u32 grbm_soft_reset = 0;
4051 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4054 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4055 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4056 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4057 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4058 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4059 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4060 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4061 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4062 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4063 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4064 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4067 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4068 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4069 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4073 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4074 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4075 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4076 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4079 if (grbm_soft_reset) {
4081 adev->gfx.rlc.funcs->stop(adev);
4083 if (adev->gfx.num_gfx_rings)
4084 /* Disable GFX parsing/prefetching */
4085 gfx_v9_0_cp_gfx_enable(adev, false);
4087 /* Disable MEC parsing/prefetching */
4088 gfx_v9_0_cp_compute_enable(adev, false);
4090 if (grbm_soft_reset) {
4091 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4092 tmp |= grbm_soft_reset;
4093 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4094 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4095 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4099 tmp &= ~grbm_soft_reset;
4100 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4101 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4104 /* Wait a little for things to settle down */
4110 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4112 signed long r, cnt = 0;
4113 unsigned long flags;
4114 uint32_t seq, reg_val_offs = 0;
4116 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4117 struct amdgpu_ring *ring = &kiq->ring;
4119 BUG_ON(!ring->funcs->emit_rreg);
4121 spin_lock_irqsave(&kiq->ring_lock, flags);
4122 if (amdgpu_device_wb_get(adev, ®_val_offs)) {
4123 pr_err("critical bug! too many kiq readers\n");
4126 amdgpu_ring_alloc(ring, 32);
4127 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4128 amdgpu_ring_write(ring, 9 | /* src: register*/
4129 (5 << 8) | /* dst: memory */
4130 (1 << 16) | /* count sel */
4131 (1 << 20)); /* write confirm */
4132 amdgpu_ring_write(ring, 0);
4133 amdgpu_ring_write(ring, 0);
4134 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4136 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4138 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4142 amdgpu_ring_commit(ring);
4143 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4145 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4147 /* don't wait anymore for gpu reset case because this way may
4148 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4149 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4150 * never return if we keep waiting in virt_kiq_rreg, which cause
4151 * gpu_recover() hang there.
4153 * also don't wait anymore for IRQ context
4155 if (r < 1 && (amdgpu_in_reset(adev)))
4156 goto failed_kiq_read;
4159 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4160 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4161 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4164 if (cnt > MAX_KIQ_REG_TRY)
4165 goto failed_kiq_read;
4168 value = (uint64_t)adev->wb.wb[reg_val_offs] |
4169 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4170 amdgpu_device_wb_free(adev, reg_val_offs);
4174 amdgpu_ring_undo(ring);
4176 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4179 amdgpu_device_wb_free(adev, reg_val_offs);
4180 pr_err("failed to read gpu clock\n");
4184 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4186 uint64_t clock, clock_lo, clock_hi, hi_check;
4188 switch (adev->ip_versions[GC_HWIP][0]) {
4189 case IP_VERSION(9, 3, 0):
4191 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4192 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4193 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4194 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4195 * roughly every 42 seconds.
4197 if (hi_check != clock_hi) {
4198 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4199 clock_hi = hi_check;
4202 clock = clock_lo | (clock_hi << 32ULL);
4205 amdgpu_gfx_off_ctrl(adev, false);
4206 mutex_lock(&adev->gfx.gpu_clock_mutex);
4207 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4208 clock = gfx_v9_0_kiq_read_clock(adev);
4210 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4211 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4212 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4214 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4215 amdgpu_gfx_off_ctrl(adev, true);
4221 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4223 uint32_t gds_base, uint32_t gds_size,
4224 uint32_t gws_base, uint32_t gws_size,
4225 uint32_t oa_base, uint32_t oa_size)
4227 struct amdgpu_device *adev = ring->adev;
4230 gfx_v9_0_write_data_to_reg(ring, 0, false,
4231 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4235 gfx_v9_0_write_data_to_reg(ring, 0, false,
4236 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4240 gfx_v9_0_write_data_to_reg(ring, 0, false,
4241 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4242 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4245 gfx_v9_0_write_data_to_reg(ring, 0, false,
4246 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4247 (1 << (oa_size + oa_base)) - (1 << oa_base));
4250 static const u32 vgpr_init_compute_shader[] =
4252 0xb07c0000, 0xbe8000ff,
4253 0x000000f8, 0xbf110800,
4254 0x7e000280, 0x7e020280,
4255 0x7e040280, 0x7e060280,
4256 0x7e080280, 0x7e0a0280,
4257 0x7e0c0280, 0x7e0e0280,
4258 0x80808800, 0xbe803200,
4259 0xbf84fff5, 0xbf9c0000,
4260 0xd28c0001, 0x0001007f,
4261 0xd28d0001, 0x0002027e,
4262 0x10020288, 0xb8810904,
4263 0xb7814000, 0xd1196a01,
4264 0x00000301, 0xbe800087,
4265 0xbefc00c1, 0xd89c4000,
4266 0x00020201, 0xd89cc080,
4267 0x00040401, 0x320202ff,
4268 0x00000800, 0x80808100,
4269 0xbf84fff8, 0x7e020280,
4270 0xbf810000, 0x00000000,
4273 static const u32 sgpr_init_compute_shader[] =
4275 0xb07c0000, 0xbe8000ff,
4276 0x0000005f, 0xbee50080,
4277 0xbe812c65, 0xbe822c65,
4278 0xbe832c65, 0xbe842c65,
4279 0xbe852c65, 0xb77c0005,
4280 0x80808500, 0xbf84fff8,
4281 0xbe800080, 0xbf810000,
4284 static const u32 vgpr_init_compute_shader_arcturus[] = {
4285 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4286 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4287 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4288 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4289 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4290 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4291 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4292 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4293 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4294 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4295 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4296 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4297 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4298 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4299 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4300 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4301 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4302 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4303 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4304 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4305 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4306 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4307 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4308 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4309 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4310 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4311 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4312 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4313 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4314 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4315 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4316 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4317 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4318 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4319 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4320 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4321 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4322 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4323 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4324 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4325 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4326 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4327 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4328 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4329 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4330 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4331 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4332 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4333 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4334 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4335 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4336 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4337 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4338 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4339 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4340 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4341 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4342 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4343 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4344 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4345 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4346 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4347 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4348 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4349 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4350 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4351 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4352 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4353 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4354 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4355 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4356 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4357 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4358 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4359 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4360 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4361 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4362 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4363 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4364 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4365 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4366 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4367 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4368 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4369 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4370 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4371 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4372 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4373 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4374 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4375 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4376 0xbf84fff8, 0xbf810000,
4379 /* When below register arrays changed, please update gpr_reg_size,
4380 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4381 to cover all gfx9 ASICs */
4382 static const struct soc15_reg_entry vgpr_init_regs[] = {
4383 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4384 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4385 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4386 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4387 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4388 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4389 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4390 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4391 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4392 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4393 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4394 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4395 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4396 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4399 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4400 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4401 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4402 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4403 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4404 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4405 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4406 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4407 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4408 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4409 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4410 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4411 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4412 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4413 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4416 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4417 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4418 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4419 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4420 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4421 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4422 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4423 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4424 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4425 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4426 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4427 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4428 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4429 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4430 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4433 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4434 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4435 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4436 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4437 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4438 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4439 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4440 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4441 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4442 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4443 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4444 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4445 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4446 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4447 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4450 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4451 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4452 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4453 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4454 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4455 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4456 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4457 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4458 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4459 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4460 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4461 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4462 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4463 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4464 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4465 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4466 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4467 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4468 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4469 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4470 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4471 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4472 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4473 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4474 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4475 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4476 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4477 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4478 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4479 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4480 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4481 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4482 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4483 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4486 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4488 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4491 /* only support when RAS is enabled */
4492 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4495 r = amdgpu_ring_alloc(ring, 7);
4497 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4502 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4503 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4505 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4506 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4507 PACKET3_DMA_DATA_DST_SEL(1) |
4508 PACKET3_DMA_DATA_SRC_SEL(2) |
4509 PACKET3_DMA_DATA_ENGINE(0)));
4510 amdgpu_ring_write(ring, 0);
4511 amdgpu_ring_write(ring, 0);
4512 amdgpu_ring_write(ring, 0);
4513 amdgpu_ring_write(ring, 0);
4514 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4515 adev->gds.gds_size);
4517 amdgpu_ring_commit(ring);
4519 for (i = 0; i < adev->usec_timeout; i++) {
4520 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4525 if (i >= adev->usec_timeout)
4528 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4533 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4535 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4536 struct amdgpu_ib ib;
4537 struct dma_fence *f = NULL;
4539 unsigned total_size, vgpr_offset, sgpr_offset;
4542 int compute_dim_x = adev->gfx.config.max_shader_engines *
4543 adev->gfx.config.max_cu_per_sh *
4544 adev->gfx.config.max_sh_per_se;
4545 int sgpr_work_group_size = 5;
4546 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4547 int vgpr_init_shader_size;
4548 const u32 *vgpr_init_shader_ptr;
4549 const struct soc15_reg_entry *vgpr_init_regs_ptr;
4551 /* only support when RAS is enabled */
4552 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4555 /* bail if the compute ring is not ready */
4556 if (!ring->sched.ready)
4559 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4560 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4561 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4562 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4564 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4565 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4566 vgpr_init_regs_ptr = vgpr_init_regs;
4570 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4572 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4574 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4575 total_size = ALIGN(total_size, 256);
4576 vgpr_offset = total_size;
4577 total_size += ALIGN(vgpr_init_shader_size, 256);
4578 sgpr_offset = total_size;
4579 total_size += sizeof(sgpr_init_compute_shader);
4581 /* allocate an indirect buffer to put the commands in */
4582 memset(&ib, 0, sizeof(ib));
4583 r = amdgpu_ib_get(adev, NULL, total_size,
4584 AMDGPU_IB_POOL_DIRECT, &ib);
4586 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4590 /* load the compute shaders */
4591 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4592 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4594 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4595 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4597 /* init the ib length to 0 */
4601 /* write the register state for the compute dispatch */
4602 for (i = 0; i < gpr_reg_size; i++) {
4603 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4604 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4605 - PACKET3_SET_SH_REG_START;
4606 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4608 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4609 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4610 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4611 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4612 - PACKET3_SET_SH_REG_START;
4613 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4614 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4616 /* write dispatch packet */
4617 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4618 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4619 ib.ptr[ib.length_dw++] = 1; /* y */
4620 ib.ptr[ib.length_dw++] = 1; /* z */
4621 ib.ptr[ib.length_dw++] =
4622 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4624 /* write CS partial flush packet */
4625 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4626 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4629 /* write the register state for the compute dispatch */
4630 for (i = 0; i < gpr_reg_size; i++) {
4631 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4632 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4633 - PACKET3_SET_SH_REG_START;
4634 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4636 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4637 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4638 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4639 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4640 - PACKET3_SET_SH_REG_START;
4641 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4642 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4644 /* write dispatch packet */
4645 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4646 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4647 ib.ptr[ib.length_dw++] = 1; /* y */
4648 ib.ptr[ib.length_dw++] = 1; /* z */
4649 ib.ptr[ib.length_dw++] =
4650 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4652 /* write CS partial flush packet */
4653 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4654 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4657 /* write the register state for the compute dispatch */
4658 for (i = 0; i < gpr_reg_size; i++) {
4659 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4660 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4661 - PACKET3_SET_SH_REG_START;
4662 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4664 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4665 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4667 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4668 - PACKET3_SET_SH_REG_START;
4669 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4670 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4672 /* write dispatch packet */
4673 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4674 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4675 ib.ptr[ib.length_dw++] = 1; /* y */
4676 ib.ptr[ib.length_dw++] = 1; /* z */
4677 ib.ptr[ib.length_dw++] =
4678 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4680 /* write CS partial flush packet */
4681 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4682 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4684 /* shedule the ib on the ring */
4685 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4687 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4691 /* wait for the GPU to finish processing the IB */
4692 r = dma_fence_wait(f, false);
4694 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4699 amdgpu_ib_free(adev, &ib, NULL);
4705 static int gfx_v9_0_early_init(void *handle)
4707 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4709 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4710 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4711 adev->gfx.num_gfx_rings = 0;
4713 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4714 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4715 AMDGPU_MAX_COMPUTE_RINGS);
4716 gfx_v9_0_set_kiq_pm4_funcs(adev);
4717 gfx_v9_0_set_ring_funcs(adev);
4718 gfx_v9_0_set_irq_funcs(adev);
4719 gfx_v9_0_set_gds_init(adev);
4720 gfx_v9_0_set_rlc_funcs(adev);
4722 /* init rlcg reg access ctrl */
4723 gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4728 static int gfx_v9_0_ecc_late_init(void *handle)
4730 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4734 * Temp workaround to fix the issue that CP firmware fails to
4735 * update read pointer when CPDMA is writing clearing operation
4736 * to GDS in suspend/resume sequence on several cards. So just
4737 * limit this operation in cold boot sequence.
4739 if ((!adev->in_suspend) &&
4740 (adev->gds.gds_size)) {
4741 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4746 /* requires IBs so do in late init after IB pool is initialized */
4747 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4748 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4750 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4755 if (adev->gfx.ras &&
4756 adev->gfx.ras->enable_watchdog_timer)
4757 adev->gfx.ras->enable_watchdog_timer(adev);
4762 static int gfx_v9_0_late_init(void *handle)
4764 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4767 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4771 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4775 r = gfx_v9_0_ecc_late_init(handle);
4782 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4784 uint32_t rlc_setting;
4786 /* if RLC is not enabled, do nothing */
4787 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4788 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4794 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4799 data = RLC_SAFE_MODE__CMD_MASK;
4800 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4801 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4803 /* wait for RLC_SAFE_MODE */
4804 for (i = 0; i < adev->usec_timeout; i++) {
4805 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4811 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4815 data = RLC_SAFE_MODE__CMD_MASK;
4816 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4819 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4822 amdgpu_gfx_rlc_enter_safe_mode(adev);
4824 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4825 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4826 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4827 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4829 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4830 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4831 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4834 amdgpu_gfx_rlc_exit_safe_mode(adev);
4837 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4840 /* TODO: double check if we need to perform under safe mode */
4841 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4843 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4844 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4846 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4848 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4849 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4851 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4853 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4856 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4861 amdgpu_gfx_rlc_enter_safe_mode(adev);
4863 /* It is disabled by HW by default */
4864 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4865 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4866 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4868 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4869 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4871 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4872 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4873 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4875 /* only for Vega10 & Raven1 */
4876 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4879 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4881 /* MGLS is a global flag to control all MGLS in GFX */
4882 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4883 /* 2 - RLC memory Light sleep */
4884 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4885 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4886 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4888 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4890 /* 3 - CP memory Light sleep */
4891 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4892 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4893 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4895 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4899 /* 1 - MGCG_OVERRIDE */
4900 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4902 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4903 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4905 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4906 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4907 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4908 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4911 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4913 /* 2 - disable MGLS in RLC */
4914 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4915 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4916 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4917 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4920 /* 3 - disable MGLS in CP */
4921 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4922 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4923 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4924 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4928 amdgpu_gfx_rlc_exit_safe_mode(adev);
4931 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4936 if (!adev->gfx.num_gfx_rings)
4939 amdgpu_gfx_rlc_enter_safe_mode(adev);
4941 /* Enable 3D CGCG/CGLS */
4943 /* write cmd to clear cgcg/cgls ov */
4944 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4945 /* unset CGCG override */
4946 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4947 /* update CGCG and CGLS override bits */
4949 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4951 /* enable 3Dcgcg FSM(0x0000363f) */
4952 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4954 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4955 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4956 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4958 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4960 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4961 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4962 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4964 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4966 /* set IDLE_POLL_COUNT(0x00900100) */
4967 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4968 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4969 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4971 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4973 /* Disable CGCG/CGLS */
4974 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4975 /* disable cgcg, cgls should be disabled */
4976 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4977 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4978 /* disable cgcg and cgls in FSM */
4980 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4983 amdgpu_gfx_rlc_exit_safe_mode(adev);
4986 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4991 amdgpu_gfx_rlc_enter_safe_mode(adev);
4993 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4994 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4995 /* unset CGCG override */
4996 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4997 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4998 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5000 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5001 /* update CGCG and CGLS override bits */
5003 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5005 /* enable cgcg FSM(0x0000363F) */
5006 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5008 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5009 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5010 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5012 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5013 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5014 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5015 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5016 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5018 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5020 /* set IDLE_POLL_COUNT(0x00900100) */
5021 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5022 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5023 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5025 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5027 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5028 /* reset CGCG/CGLS bits */
5029 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5030 /* disable cgcg and cgls in FSM */
5032 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5035 amdgpu_gfx_rlc_exit_safe_mode(adev);
5038 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5042 /* CGCG/CGLS should be enabled after MGCG/MGLS
5043 * === MGCG + MGLS ===
5045 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5046 /* === CGCG /CGLS for GFX 3D Only === */
5047 gfx_v9_0_update_3d_clock_gating(adev, enable);
5048 /* === CGCG + CGLS === */
5049 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5051 /* CGCG/CGLS should be disabled before MGCG/MGLS
5052 * === CGCG + CGLS ===
5054 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5055 /* === CGCG /CGLS for GFX 3D Only === */
5056 gfx_v9_0_update_3d_clock_gating(adev, enable);
5057 /* === MGCG + MGLS === */
5058 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5063 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5067 amdgpu_gfx_off_ctrl(adev, false);
5069 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5070 if (amdgpu_sriov_is_pp_one_vf(adev))
5071 data = RREG32_NO_KIQ(reg);
5073 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5075 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5076 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5078 if (amdgpu_sriov_is_pp_one_vf(adev))
5079 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5081 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5083 amdgpu_gfx_off_ctrl(adev, true);
5086 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5088 struct soc15_reg_rlcg *entries, int arr_size)
5096 for (i = 0; i < arr_size; i++) {
5097 const struct soc15_reg_rlcg *entry;
5099 entry = &entries[i];
5100 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5108 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5110 return gfx_v9_0_check_rlcg_range(adev, offset,
5111 (void *)rlcg_access_gc_9_0,
5112 ARRAY_SIZE(rlcg_access_gc_9_0));
5115 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5116 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5117 .set_safe_mode = gfx_v9_0_set_safe_mode,
5118 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5119 .init = gfx_v9_0_rlc_init,
5120 .get_csb_size = gfx_v9_0_get_csb_size,
5121 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5122 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5123 .resume = gfx_v9_0_rlc_resume,
5124 .stop = gfx_v9_0_rlc_stop,
5125 .reset = gfx_v9_0_rlc_reset,
5126 .start = gfx_v9_0_rlc_start,
5127 .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5128 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5131 static int gfx_v9_0_set_powergating_state(void *handle,
5132 enum amd_powergating_state state)
5134 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5135 bool enable = (state == AMD_PG_STATE_GATE);
5137 switch (adev->ip_versions[GC_HWIP][0]) {
5138 case IP_VERSION(9, 2, 2):
5139 case IP_VERSION(9, 1, 0):
5140 case IP_VERSION(9, 3, 0):
5142 amdgpu_gfx_off_ctrl(adev, false);
5144 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5145 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5146 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5148 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5149 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5152 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5153 gfx_v9_0_enable_cp_power_gating(adev, true);
5155 gfx_v9_0_enable_cp_power_gating(adev, false);
5157 /* update gfx cgpg state */
5158 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5160 /* update mgcg state */
5161 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5164 amdgpu_gfx_off_ctrl(adev, true);
5166 case IP_VERSION(9, 2, 1):
5167 amdgpu_gfx_off_ctrl(adev, enable);
5176 static int gfx_v9_0_set_clockgating_state(void *handle,
5177 enum amd_clockgating_state state)
5179 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5181 if (amdgpu_sriov_vf(adev))
5184 switch (adev->ip_versions[GC_HWIP][0]) {
5185 case IP_VERSION(9, 0, 1):
5186 case IP_VERSION(9, 2, 1):
5187 case IP_VERSION(9, 4, 0):
5188 case IP_VERSION(9, 2, 2):
5189 case IP_VERSION(9, 1, 0):
5190 case IP_VERSION(9, 4, 1):
5191 case IP_VERSION(9, 3, 0):
5192 case IP_VERSION(9, 4, 2):
5193 gfx_v9_0_update_gfx_clock_gating(adev,
5194 state == AMD_CG_STATE_GATE);
5202 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5204 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5207 if (amdgpu_sriov_vf(adev))
5210 /* AMD_CG_SUPPORT_GFX_MGCG */
5211 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5212 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5213 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5215 /* AMD_CG_SUPPORT_GFX_CGCG */
5216 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5217 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5218 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5220 /* AMD_CG_SUPPORT_GFX_CGLS */
5221 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5222 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5224 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5225 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5226 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5227 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5229 /* AMD_CG_SUPPORT_GFX_CP_LS */
5230 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5231 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5232 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5234 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5235 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5236 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5237 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5238 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5240 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5241 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5242 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5246 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5248 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5251 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5253 struct amdgpu_device *adev = ring->adev;
5256 /* XXX check if swapping is necessary on BE */
5257 if (ring->use_doorbell) {
5258 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5260 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5261 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5267 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5269 struct amdgpu_device *adev = ring->adev;
5271 if (ring->use_doorbell) {
5272 /* XXX check if swapping is necessary on BE */
5273 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5274 WDOORBELL64(ring->doorbell_index, ring->wptr);
5276 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5277 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5281 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5283 struct amdgpu_device *adev = ring->adev;
5284 u32 ref_and_mask, reg_mem_engine;
5285 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5287 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5290 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5293 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5300 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5301 reg_mem_engine = 1; /* pfp */
5304 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5305 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5306 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5307 ref_and_mask, ref_and_mask, 0x20);
5310 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5311 struct amdgpu_job *job,
5312 struct amdgpu_ib *ib,
5315 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5316 u32 header, control = 0;
5318 if (ib->flags & AMDGPU_IB_FLAG_CE)
5319 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5321 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5323 control |= ib->length_dw | (vmid << 24);
5325 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5326 control |= INDIRECT_BUFFER_PRE_ENB(1);
5328 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5329 gfx_v9_0_ring_emit_de_meta(ring);
5332 amdgpu_ring_write(ring, header);
5333 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5334 amdgpu_ring_write(ring,
5338 lower_32_bits(ib->gpu_addr));
5339 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5340 amdgpu_ring_write(ring, control);
5343 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5344 struct amdgpu_job *job,
5345 struct amdgpu_ib *ib,
5348 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5349 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5351 /* Currently, there is a high possibility to get wave ID mismatch
5352 * between ME and GDS, leading to a hw deadlock, because ME generates
5353 * different wave IDs than the GDS expects. This situation happens
5354 * randomly when at least 5 compute pipes use GDS ordered append.
5355 * The wave IDs generated by ME are also wrong after suspend/resume.
5356 * Those are probably bugs somewhere else in the kernel driver.
5358 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5359 * GDS to 0 for this ring (me/pipe).
5361 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5362 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5363 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5364 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5367 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5368 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5369 amdgpu_ring_write(ring,
5373 lower_32_bits(ib->gpu_addr));
5374 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5375 amdgpu_ring_write(ring, control);
5378 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5379 u64 seq, unsigned flags)
5381 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5382 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5383 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5385 /* RELEASE_MEM - flush caches, send int */
5386 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5387 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5388 EOP_TC_NC_ACTION_EN) :
5389 (EOP_TCL1_ACTION_EN |
5391 EOP_TC_WB_ACTION_EN |
5392 EOP_TC_MD_ACTION_EN)) |
5393 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5395 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5398 * the address should be Qword aligned if 64bit write, Dword
5399 * aligned if only send 32bit data low (discard data high)
5405 amdgpu_ring_write(ring, lower_32_bits(addr));
5406 amdgpu_ring_write(ring, upper_32_bits(addr));
5407 amdgpu_ring_write(ring, lower_32_bits(seq));
5408 amdgpu_ring_write(ring, upper_32_bits(seq));
5409 amdgpu_ring_write(ring, 0);
5412 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5414 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5415 uint32_t seq = ring->fence_drv.sync_seq;
5416 uint64_t addr = ring->fence_drv.gpu_addr;
5418 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5419 lower_32_bits(addr), upper_32_bits(addr),
5420 seq, 0xffffffff, 4);
5423 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5424 unsigned vmid, uint64_t pd_addr)
5426 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5428 /* compute doesn't have PFP */
5429 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5430 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5431 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5432 amdgpu_ring_write(ring, 0x0);
5436 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5438 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5441 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5445 /* XXX check if swapping is necessary on BE */
5446 if (ring->use_doorbell)
5447 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5453 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5455 struct amdgpu_device *adev = ring->adev;
5457 /* XXX check if swapping is necessary on BE */
5458 if (ring->use_doorbell) {
5459 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5460 WDOORBELL64(ring->doorbell_index, ring->wptr);
5462 BUG(); /* only DOORBELL method supported on gfx9 now */
5466 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5467 u64 seq, unsigned int flags)
5469 struct amdgpu_device *adev = ring->adev;
5471 /* we only allocate 32bit for each seq wb address */
5472 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5474 /* write fence seq to the "addr" */
5475 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5476 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5477 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5478 amdgpu_ring_write(ring, lower_32_bits(addr));
5479 amdgpu_ring_write(ring, upper_32_bits(addr));
5480 amdgpu_ring_write(ring, lower_32_bits(seq));
5482 if (flags & AMDGPU_FENCE_FLAG_INT) {
5483 /* set register to trigger INT */
5484 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5485 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5486 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5487 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5488 amdgpu_ring_write(ring, 0);
5489 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5493 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5495 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5496 amdgpu_ring_write(ring, 0);
5499 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5501 struct v9_ce_ib_state ce_payload = {0};
5505 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5506 csa_addr = amdgpu_csa_vaddr(ring->adev);
5508 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5509 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5510 WRITE_DATA_DST_SEL(8) |
5512 WRITE_DATA_CACHE_POLICY(0));
5513 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5514 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5515 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5518 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5520 struct v9_de_ib_state de_payload = {0};
5521 uint64_t csa_addr, gds_addr;
5524 csa_addr = amdgpu_csa_vaddr(ring->adev);
5525 gds_addr = csa_addr + 4096;
5526 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5527 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5529 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5530 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5531 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5532 WRITE_DATA_DST_SEL(8) |
5534 WRITE_DATA_CACHE_POLICY(0));
5535 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5536 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5537 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5540 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5543 uint32_t v = secure ? FRAME_TMZ : 0;
5545 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5546 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5549 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5553 if (amdgpu_sriov_vf(ring->adev))
5554 gfx_v9_0_ring_emit_ce_meta(ring);
5556 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5557 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5558 /* set load_global_config & load_global_uconfig */
5560 /* set load_cs_sh_regs */
5562 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5565 /* set load_ce_ram if preamble presented */
5566 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5569 /* still load_ce_ram if this is the first time preamble presented
5570 * although there is no context switch happens.
5572 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5576 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5577 amdgpu_ring_write(ring, dw2);
5578 amdgpu_ring_write(ring, 0);
5581 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5584 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5585 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5586 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5587 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5588 ret = ring->wptr & ring->buf_mask;
5589 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5593 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5596 BUG_ON(offset > ring->buf_mask);
5597 BUG_ON(ring->ring[offset] != 0x55aa55aa);
5599 cur = (ring->wptr & ring->buf_mask) - 1;
5600 if (likely(cur > offset))
5601 ring->ring[offset] = cur - offset;
5603 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5606 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5607 uint32_t reg_val_offs)
5609 struct amdgpu_device *adev = ring->adev;
5611 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5612 amdgpu_ring_write(ring, 0 | /* src: register*/
5613 (5 << 8) | /* dst: memory */
5614 (1 << 20)); /* write confirm */
5615 amdgpu_ring_write(ring, reg);
5616 amdgpu_ring_write(ring, 0);
5617 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5619 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5623 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5628 switch (ring->funcs->type) {
5629 case AMDGPU_RING_TYPE_GFX:
5630 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5632 case AMDGPU_RING_TYPE_KIQ:
5633 cmd = (1 << 16); /* no inc addr */
5639 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5640 amdgpu_ring_write(ring, cmd);
5641 amdgpu_ring_write(ring, reg);
5642 amdgpu_ring_write(ring, 0);
5643 amdgpu_ring_write(ring, val);
5646 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5647 uint32_t val, uint32_t mask)
5649 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5652 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5653 uint32_t reg0, uint32_t reg1,
5654 uint32_t ref, uint32_t mask)
5656 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5657 struct amdgpu_device *adev = ring->adev;
5658 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5659 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5662 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5665 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5669 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5671 struct amdgpu_device *adev = ring->adev;
5674 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5675 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5676 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5677 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5678 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5681 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5682 enum amdgpu_interrupt_state state)
5685 case AMDGPU_IRQ_STATE_DISABLE:
5686 case AMDGPU_IRQ_STATE_ENABLE:
5687 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5688 TIME_STAMP_INT_ENABLE,
5689 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5696 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5698 enum amdgpu_interrupt_state state)
5700 u32 mec_int_cntl, mec_int_cntl_reg;
5703 * amdgpu controls only the first MEC. That's why this function only
5704 * handles the setting of interrupts for this specific MEC. All other
5705 * pipes' interrupts are set by amdkfd.
5711 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5714 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5717 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5720 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5723 DRM_DEBUG("invalid pipe %d\n", pipe);
5727 DRM_DEBUG("invalid me %d\n", me);
5732 case AMDGPU_IRQ_STATE_DISABLE:
5733 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5734 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5735 TIME_STAMP_INT_ENABLE, 0);
5736 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5738 case AMDGPU_IRQ_STATE_ENABLE:
5739 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5740 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5741 TIME_STAMP_INT_ENABLE, 1);
5742 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5749 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5750 struct amdgpu_irq_src *source,
5752 enum amdgpu_interrupt_state state)
5755 case AMDGPU_IRQ_STATE_DISABLE:
5756 case AMDGPU_IRQ_STATE_ENABLE:
5757 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5758 PRIV_REG_INT_ENABLE,
5759 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5768 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5769 struct amdgpu_irq_src *source,
5771 enum amdgpu_interrupt_state state)
5774 case AMDGPU_IRQ_STATE_DISABLE:
5775 case AMDGPU_IRQ_STATE_ENABLE:
5776 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5777 PRIV_INSTR_INT_ENABLE,
5778 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5787 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
5788 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5789 CP_ECC_ERROR_INT_ENABLE, 1)
5791 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
5792 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5793 CP_ECC_ERROR_INT_ENABLE, 0)
5795 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5796 struct amdgpu_irq_src *source,
5798 enum amdgpu_interrupt_state state)
5801 case AMDGPU_IRQ_STATE_DISABLE:
5802 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5803 CP_ECC_ERROR_INT_ENABLE, 0);
5804 DISABLE_ECC_ON_ME_PIPE(1, 0);
5805 DISABLE_ECC_ON_ME_PIPE(1, 1);
5806 DISABLE_ECC_ON_ME_PIPE(1, 2);
5807 DISABLE_ECC_ON_ME_PIPE(1, 3);
5810 case AMDGPU_IRQ_STATE_ENABLE:
5811 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5812 CP_ECC_ERROR_INT_ENABLE, 1);
5813 ENABLE_ECC_ON_ME_PIPE(1, 0);
5814 ENABLE_ECC_ON_ME_PIPE(1, 1);
5815 ENABLE_ECC_ON_ME_PIPE(1, 2);
5816 ENABLE_ECC_ON_ME_PIPE(1, 3);
5826 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5827 struct amdgpu_irq_src *src,
5829 enum amdgpu_interrupt_state state)
5832 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5833 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5835 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5836 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5838 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5839 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5841 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5842 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5844 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5845 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5847 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5848 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5850 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5851 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5853 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5854 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5856 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5857 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5865 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5866 struct amdgpu_irq_src *source,
5867 struct amdgpu_iv_entry *entry)
5870 u8 me_id, pipe_id, queue_id;
5871 struct amdgpu_ring *ring;
5873 DRM_DEBUG("IH: CP EOP\n");
5874 me_id = (entry->ring_id & 0x0c) >> 2;
5875 pipe_id = (entry->ring_id & 0x03) >> 0;
5876 queue_id = (entry->ring_id & 0x70) >> 4;
5880 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5884 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5885 ring = &adev->gfx.compute_ring[i];
5886 /* Per-queue interrupt is supported for MEC starting from VI.
5887 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5889 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5890 amdgpu_fence_process(ring);
5897 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5898 struct amdgpu_iv_entry *entry)
5900 u8 me_id, pipe_id, queue_id;
5901 struct amdgpu_ring *ring;
5904 me_id = (entry->ring_id & 0x0c) >> 2;
5905 pipe_id = (entry->ring_id & 0x03) >> 0;
5906 queue_id = (entry->ring_id & 0x70) >> 4;
5910 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5914 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5915 ring = &adev->gfx.compute_ring[i];
5916 if (ring->me == me_id && ring->pipe == pipe_id &&
5917 ring->queue == queue_id)
5918 drm_sched_fault(&ring->sched);
5924 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5925 struct amdgpu_irq_src *source,
5926 struct amdgpu_iv_entry *entry)
5928 DRM_ERROR("Illegal register access in command stream\n");
5929 gfx_v9_0_fault(adev, entry);
5933 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5934 struct amdgpu_irq_src *source,
5935 struct amdgpu_iv_entry *entry)
5937 DRM_ERROR("Illegal instruction in command stream\n");
5938 gfx_v9_0_fault(adev, entry);
5943 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5944 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5945 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5946 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5948 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5949 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5950 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5952 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5953 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5956 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5957 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5960 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5961 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5962 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5964 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5965 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5968 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5969 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5970 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5972 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5973 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5974 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5976 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5977 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5980 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5981 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5984 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5985 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5988 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5989 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5990 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5992 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5993 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5996 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5997 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5998 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6000 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6001 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6002 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6003 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6005 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6006 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6007 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6010 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6011 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6012 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6013 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6015 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6016 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6017 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6018 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6020 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6021 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6022 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6023 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6025 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6026 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6027 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6028 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6030 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6031 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6034 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6035 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6036 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6038 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6039 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6042 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6043 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6046 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6047 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6050 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6051 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6054 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6055 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6058 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6059 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6062 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6063 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6064 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6066 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6067 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6068 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6070 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6071 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6072 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6074 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6075 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6076 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6078 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6079 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6080 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6082 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6083 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6086 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6087 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6090 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6091 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6094 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6095 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6098 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6099 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6102 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6103 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6106 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6107 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6110 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6111 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6114 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6115 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6118 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6119 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6122 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6123 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6126 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6127 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6130 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6131 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6134 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6135 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6138 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6139 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6140 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6142 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6143 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6144 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6146 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6147 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6150 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6151 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6154 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6155 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6158 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6159 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6160 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6162 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6163 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6164 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6166 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6167 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6168 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6170 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6171 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6172 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6174 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6175 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6178 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6179 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6180 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6182 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6183 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6184 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6186 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6187 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6188 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6190 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6191 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6192 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6194 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6195 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6196 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6198 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6199 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6200 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6202 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6203 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6204 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6206 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6207 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6208 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6210 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6211 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6212 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6214 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6215 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6216 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6218 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6219 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6220 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6222 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6223 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6224 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6226 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6227 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6228 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6230 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6231 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6232 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6234 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6235 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6236 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6238 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6239 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6240 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6242 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6243 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6244 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6246 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6247 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6250 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6251 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6254 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6255 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6258 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6259 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6262 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6263 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6266 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6267 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6268 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6270 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6271 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6272 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6274 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6275 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6276 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6278 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6279 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6280 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6282 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6283 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6284 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6286 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6287 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6290 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6291 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6294 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6295 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6298 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6299 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6302 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6303 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6306 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6307 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6308 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6310 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6311 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6312 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6314 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6315 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6316 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6318 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6319 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6320 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6322 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6323 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6324 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6326 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6327 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6330 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6331 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6334 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6335 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6338 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6339 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6342 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6343 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6346 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6347 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6348 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6350 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6351 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6352 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6354 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6355 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6356 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6358 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6359 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6362 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6363 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6366 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6367 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6370 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6371 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6374 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6375 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6378 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6379 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6384 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6387 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6389 struct ta_ras_trigger_error_input block_info = { 0 };
6391 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6394 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6397 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6400 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6402 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6403 ras_gfx_subblocks[info->head.sub_block_index].name,
6408 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6410 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6411 ras_gfx_subblocks[info->head.sub_block_index].name,
6416 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6417 block_info.sub_block_index =
6418 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6419 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6420 block_info.address = info->address;
6421 block_info.value = info->value;
6423 mutex_lock(&adev->grbm_idx_mutex);
6424 ret = psp_ras_trigger_error(&adev->psp, &block_info);
6425 mutex_unlock(&adev->grbm_idx_mutex);
6430 static const char *vml2_mems[] = {
6431 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6432 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6433 "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6434 "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6435 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6436 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6437 "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6438 "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6439 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6440 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6441 "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6442 "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6443 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6444 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6445 "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6446 "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6449 static const char *vml2_walker_mems[] = {
6450 "UTC_VML2_CACHE_PDE0_MEM0",
6451 "UTC_VML2_CACHE_PDE0_MEM1",
6452 "UTC_VML2_CACHE_PDE1_MEM0",
6453 "UTC_VML2_CACHE_PDE1_MEM1",
6454 "UTC_VML2_CACHE_PDE2_MEM0",
6455 "UTC_VML2_CACHE_PDE2_MEM1",
6456 "UTC_VML2_RDIF_LOG_FIFO",
6459 static const char *atc_l2_cache_2m_mems[] = {
6460 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6461 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6462 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6463 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6466 static const char *atc_l2_cache_4k_mems[] = {
6467 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6468 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6469 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6470 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6471 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6472 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6473 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6474 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6475 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6476 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6477 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6478 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6479 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6480 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6481 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6482 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6483 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6484 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6485 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6486 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6487 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6488 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6489 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6490 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6491 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6492 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6493 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6494 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6495 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6496 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6497 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6498 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6501 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6502 struct ras_err_data *err_data)
6505 uint32_t sec_count, ded_count;
6507 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6508 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6509 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6510 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6511 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6512 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6513 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6514 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6516 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6517 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6518 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6520 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6522 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6523 "SEC %d\n", i, vml2_mems[i], sec_count);
6524 err_data->ce_count += sec_count;
6527 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6529 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6530 "DED %d\n", i, vml2_mems[i], ded_count);
6531 err_data->ue_count += ded_count;
6535 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6536 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6537 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6539 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6542 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6543 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6544 err_data->ce_count += sec_count;
6547 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6550 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6551 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6552 err_data->ue_count += ded_count;
6556 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6557 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6558 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6560 sec_count = (data & 0x00006000L) >> 0xd;
6562 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6563 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6565 err_data->ce_count += sec_count;
6569 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6570 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6571 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6573 sec_count = (data & 0x00006000L) >> 0xd;
6575 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6576 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6578 err_data->ce_count += sec_count;
6581 ded_count = (data & 0x00018000L) >> 0xf;
6583 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6584 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6586 err_data->ue_count += ded_count;
6590 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6591 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6592 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6593 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6598 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6599 const struct soc15_reg_entry *reg,
6600 uint32_t se_id, uint32_t inst_id, uint32_t value,
6601 uint32_t *sec_count, uint32_t *ded_count)
6604 uint32_t sec_cnt, ded_cnt;
6606 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6607 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6608 gfx_v9_0_ras_fields[i].seg != reg->seg ||
6609 gfx_v9_0_ras_fields[i].inst != reg->inst)
6613 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6614 gfx_v9_0_ras_fields[i].sec_count_shift;
6616 dev_info(adev->dev, "GFX SubBlock %s, "
6617 "Instance[%d][%d], SEC %d\n",
6618 gfx_v9_0_ras_fields[i].name,
6621 *sec_count += sec_cnt;
6625 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6626 gfx_v9_0_ras_fields[i].ded_count_shift;
6628 dev_info(adev->dev, "GFX SubBlock %s, "
6629 "Instance[%d][%d], DED %d\n",
6630 gfx_v9_0_ras_fields[i].name,
6633 *ded_count += ded_cnt;
6640 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6644 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6647 /* read back registers to clear the counters */
6648 mutex_lock(&adev->grbm_idx_mutex);
6649 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6650 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6651 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6652 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6653 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6657 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6658 mutex_unlock(&adev->grbm_idx_mutex);
6660 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6661 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6662 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6663 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6664 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6665 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6666 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6667 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6669 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6670 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6671 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6674 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6675 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6676 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6679 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6680 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6681 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6684 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6685 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6686 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6689 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6690 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6691 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6692 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6695 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6696 void *ras_error_status)
6698 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6699 uint32_t sec_count = 0, ded_count = 0;
6703 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6706 err_data->ue_count = 0;
6707 err_data->ce_count = 0;
6709 mutex_lock(&adev->grbm_idx_mutex);
6711 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6712 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6713 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6714 gfx_v9_0_select_se_sh(adev, j, 0, k);
6716 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6718 gfx_v9_0_ras_error_count(adev,
6719 &gfx_v9_0_edc_counter_regs[i],
6721 &sec_count, &ded_count);
6726 err_data->ce_count += sec_count;
6727 err_data->ue_count += ded_count;
6729 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6730 mutex_unlock(&adev->grbm_idx_mutex);
6732 gfx_v9_0_query_utc_edc_status(adev, err_data);
6735 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6737 const unsigned int cp_coher_cntl =
6738 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6739 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6740 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6741 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6742 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6744 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6745 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6746 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6747 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6748 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
6749 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6750 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
6751 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6754 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6755 uint32_t pipe, bool enable)
6757 struct amdgpu_device *adev = ring->adev;
6759 uint32_t wcl_cs_reg;
6761 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6762 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6766 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6769 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6772 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6775 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6778 DRM_DEBUG("invalid pipe %d\n", pipe);
6782 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6785 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6787 struct amdgpu_device *adev = ring->adev;
6792 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6793 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6794 * around 25% of gpu resources.
6796 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6797 amdgpu_ring_emit_wreg(ring,
6798 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6801 /* Restrict waves for normal/low priority compute queues as well
6802 * to get best QoS for high priority compute jobs.
6804 * amdgpu controls only 1st ME(0-3 CS pipes).
6806 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6807 if (i != ring->pipe)
6808 gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6813 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6815 .early_init = gfx_v9_0_early_init,
6816 .late_init = gfx_v9_0_late_init,
6817 .sw_init = gfx_v9_0_sw_init,
6818 .sw_fini = gfx_v9_0_sw_fini,
6819 .hw_init = gfx_v9_0_hw_init,
6820 .hw_fini = gfx_v9_0_hw_fini,
6821 .suspend = gfx_v9_0_suspend,
6822 .resume = gfx_v9_0_resume,
6823 .is_idle = gfx_v9_0_is_idle,
6824 .wait_for_idle = gfx_v9_0_wait_for_idle,
6825 .soft_reset = gfx_v9_0_soft_reset,
6826 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6827 .set_powergating_state = gfx_v9_0_set_powergating_state,
6828 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6831 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6832 .type = AMDGPU_RING_TYPE_GFX,
6834 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6835 .support_64bit_ptrs = true,
6836 .secure_submission_supported = true,
6837 .vmhub = AMDGPU_GFXHUB_0,
6838 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6839 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6840 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6841 .emit_frame_size = /* totally 242 maximum if 16 IBs */
6843 7 + /* PIPELINE_SYNC */
6844 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6845 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6847 8 + /* FENCE for VM_FLUSH */
6848 20 + /* GDS switch */
6849 4 + /* double SWITCH_BUFFER,
6850 the first COND_EXEC jump to the place just
6851 prior to this double SWITCH_BUFFER */
6859 8 + 8 + /* FENCE x2 */
6860 2 + /* SWITCH_BUFFER */
6861 7, /* gfx_v9_0_emit_mem_sync */
6862 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6863 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6864 .emit_fence = gfx_v9_0_ring_emit_fence,
6865 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6866 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6867 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6868 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6869 .test_ring = gfx_v9_0_ring_test_ring,
6870 .test_ib = gfx_v9_0_ring_test_ib,
6871 .insert_nop = amdgpu_ring_insert_nop,
6872 .pad_ib = amdgpu_ring_generic_pad_ib,
6873 .emit_switch_buffer = gfx_v9_ring_emit_sb,
6874 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6875 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6876 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6877 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6878 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6879 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6880 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6881 .soft_recovery = gfx_v9_0_ring_soft_recovery,
6882 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6885 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6886 .type = AMDGPU_RING_TYPE_COMPUTE,
6888 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6889 .support_64bit_ptrs = true,
6890 .vmhub = AMDGPU_GFXHUB_0,
6891 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6892 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6893 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6895 20 + /* gfx_v9_0_ring_emit_gds_switch */
6896 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6897 5 + /* hdp invalidate */
6898 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6899 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6900 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6901 2 + /* gfx_v9_0_ring_emit_vm_flush */
6902 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6903 7 + /* gfx_v9_0_emit_mem_sync */
6904 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6905 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6906 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6907 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6908 .emit_fence = gfx_v9_0_ring_emit_fence,
6909 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6910 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6911 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6912 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6913 .test_ring = gfx_v9_0_ring_test_ring,
6914 .test_ib = gfx_v9_0_ring_test_ib,
6915 .insert_nop = amdgpu_ring_insert_nop,
6916 .pad_ib = amdgpu_ring_generic_pad_ib,
6917 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6918 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6919 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6920 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6921 .emit_wave_limit = gfx_v9_0_emit_wave_limit,
6924 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6925 .type = AMDGPU_RING_TYPE_KIQ,
6927 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6928 .support_64bit_ptrs = true,
6929 .vmhub = AMDGPU_GFXHUB_0,
6930 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6931 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6932 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6934 20 + /* gfx_v9_0_ring_emit_gds_switch */
6935 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6936 5 + /* hdp invalidate */
6937 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6938 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6939 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6940 2 + /* gfx_v9_0_ring_emit_vm_flush */
6941 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6942 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6943 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6944 .test_ring = gfx_v9_0_ring_test_ring,
6945 .insert_nop = amdgpu_ring_insert_nop,
6946 .pad_ib = amdgpu_ring_generic_pad_ib,
6947 .emit_rreg = gfx_v9_0_ring_emit_rreg,
6948 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6949 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6950 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6953 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6957 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6959 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6960 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6962 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6963 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6966 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6967 .set = gfx_v9_0_set_eop_interrupt_state,
6968 .process = gfx_v9_0_eop_irq,
6971 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6972 .set = gfx_v9_0_set_priv_reg_fault_state,
6973 .process = gfx_v9_0_priv_reg_irq,
6976 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6977 .set = gfx_v9_0_set_priv_inst_fault_state,
6978 .process = gfx_v9_0_priv_inst_irq,
6981 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6982 .set = gfx_v9_0_set_cp_ecc_error_state,
6983 .process = amdgpu_gfx_cp_ecc_error_irq,
6987 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6989 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6990 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6992 adev->gfx.priv_reg_irq.num_types = 1;
6993 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6995 adev->gfx.priv_inst_irq.num_types = 1;
6996 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6998 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6999 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7002 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7004 switch (adev->ip_versions[GC_HWIP][0]) {
7005 case IP_VERSION(9, 0, 1):
7006 case IP_VERSION(9, 2, 1):
7007 case IP_VERSION(9, 4, 0):
7008 case IP_VERSION(9, 2, 2):
7009 case IP_VERSION(9, 1, 0):
7010 case IP_VERSION(9, 4, 1):
7011 case IP_VERSION(9, 3, 0):
7012 case IP_VERSION(9, 4, 2):
7013 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7020 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7022 /* init asci gds info */
7023 switch (adev->ip_versions[GC_HWIP][0]) {
7024 case IP_VERSION(9, 0, 1):
7025 case IP_VERSION(9, 2, 1):
7026 case IP_VERSION(9, 4, 0):
7027 adev->gds.gds_size = 0x10000;
7029 case IP_VERSION(9, 2, 2):
7030 case IP_VERSION(9, 1, 0):
7031 case IP_VERSION(9, 4, 1):
7032 adev->gds.gds_size = 0x1000;
7034 case IP_VERSION(9, 4, 2):
7035 /* aldebaran removed all the GDS internal memory,
7036 * only support GWS opcode in kernel, like barrier
7038 adev->gds.gds_size = 0;
7041 adev->gds.gds_size = 0x10000;
7045 switch (adev->ip_versions[GC_HWIP][0]) {
7046 case IP_VERSION(9, 0, 1):
7047 case IP_VERSION(9, 4, 0):
7048 adev->gds.gds_compute_max_wave_id = 0x7ff;
7050 case IP_VERSION(9, 2, 1):
7051 adev->gds.gds_compute_max_wave_id = 0x27f;
7053 case IP_VERSION(9, 2, 2):
7054 case IP_VERSION(9, 1, 0):
7055 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7056 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7058 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7060 case IP_VERSION(9, 4, 1):
7061 adev->gds.gds_compute_max_wave_id = 0xfff;
7063 case IP_VERSION(9, 4, 2):
7064 /* deprecated for Aldebaran, no usage at all */
7065 adev->gds.gds_compute_max_wave_id = 0;
7068 /* this really depends on the chip */
7069 adev->gds.gds_compute_max_wave_id = 0x7ff;
7073 adev->gds.gws_size = 64;
7074 adev->gds.oa_size = 16;
7077 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7085 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7086 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7088 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7091 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7095 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7096 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7098 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7099 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7101 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7103 return (~data) & mask;
7106 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7107 struct amdgpu_cu_info *cu_info)
7109 int i, j, k, counter, active_cu_number = 0;
7110 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7111 unsigned disable_masks[4 * 4];
7113 if (!adev || !cu_info)
7117 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7119 if (adev->gfx.config.max_shader_engines *
7120 adev->gfx.config.max_sh_per_se > 16)
7123 amdgpu_gfx_parse_disable_cu(disable_masks,
7124 adev->gfx.config.max_shader_engines,
7125 adev->gfx.config.max_sh_per_se);
7127 mutex_lock(&adev->grbm_idx_mutex);
7128 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7129 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7133 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7134 gfx_v9_0_set_user_cu_inactive_bitmap(
7135 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7136 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7139 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7140 * 4x4 size array, and it's usually suitable for Vega
7141 * ASICs which has 4*2 SE/SH layout.
7142 * But for Arcturus, SE/SH layout is changed to 8*1.
7143 * To mostly reduce the impact, we make it compatible
7144 * with current bitmap array as below:
7145 * SE4,SH0 --> bitmap[0][1]
7146 * SE5,SH0 --> bitmap[1][1]
7147 * SE6,SH0 --> bitmap[2][1]
7148 * SE7,SH0 --> bitmap[3][1]
7150 cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7152 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7153 if (bitmap & mask) {
7154 if (counter < adev->gfx.config.max_cu_per_sh)
7160 active_cu_number += counter;
7162 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7163 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7166 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7167 mutex_unlock(&adev->grbm_idx_mutex);
7169 cu_info->number = active_cu_number;
7170 cu_info->ao_cu_mask = ao_cu_mask;
7171 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7176 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7178 .type = AMD_IP_BLOCK_TYPE_GFX,
7182 .funcs = &gfx_v9_0_ip_funcs,