2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
31 #include "amdgpu_gfx.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
40 #include "vega10_enum.h"
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 #include "amdgpu_ras.h"
52 #include "gfx_v9_4_2.h"
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
58 #define GFX9_NUM_GFX_RINGS 1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
63 #define mmGCEA_PROBE_MAP 0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX 0
66 #define GFX9_RLCG_GC_WRITE_OLD (0x8 << 28)
67 #define GFX9_RLCG_GC_WRITE (0x0 << 28)
68 #define GFX9_RLCG_GC_READ (0x1 << 28)
69 #define GFX9_RLCG_VFGATE_DISABLED 0x4000000
70 #define GFX9_RLCG_WRONG_OPERATION_TYPE 0x2000000
71 #define GFX9_RLCG_NOT_IN_RANGE 0x1000000
73 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
90 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
91 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
94 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
95 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/raven_me.bin");
97 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
98 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
104 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
105 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
107 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
111 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
112 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
113 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
114 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
117 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
118 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
121 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
123 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
124 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
129 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
130 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
133 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
134 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
135 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
137 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
138 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
139 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
140 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
141 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
142 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
143 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
144 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
145 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
146 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
147 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
148 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
150 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025
151 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1
152 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
153 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
155 enum ta_ras_gfx_subblock {
157 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
158 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
159 TA_RAS_BLOCK__GFX_CPC_UCODE,
160 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
161 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
162 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
163 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
164 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
165 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
166 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
168 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
169 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
170 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
171 TA_RAS_BLOCK__GFX_CPF_TAG,
172 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
174 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
175 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
176 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
177 TA_RAS_BLOCK__GFX_CPG_TAG,
178 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
180 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
181 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
182 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
183 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
184 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
185 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
186 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
188 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
190 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
191 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
192 TA_RAS_BLOCK__GFX_SQ_LDS_D,
193 TA_RAS_BLOCK__GFX_SQ_LDS_I,
194 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
195 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
197 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
199 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
200 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
201 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
202 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
203 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
204 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
205 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
206 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
207 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
208 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
209 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
211 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
212 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
213 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
214 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
215 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
216 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
217 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
218 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
219 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
220 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
221 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
222 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
223 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
225 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
226 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
227 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
228 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
229 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
230 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
231 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
232 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
233 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
234 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
235 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
236 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
237 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
238 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
240 TA_RAS_BLOCK__GFX_TA_INDEX_START,
241 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
242 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
243 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
244 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
245 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
246 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
248 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
249 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
250 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
251 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
252 /* TCC (5 sub-ranges)*/
253 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
255 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
256 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
257 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
258 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
259 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
260 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
261 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
262 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
263 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
264 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
266 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
267 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
268 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
269 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
270 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
272 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
273 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
274 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
275 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
276 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
277 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
278 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
279 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
280 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
281 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
282 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
284 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
285 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
286 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
287 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
288 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
290 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
291 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
292 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
293 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
294 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
295 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
296 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
298 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
300 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
301 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
302 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
303 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
304 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
305 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
306 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
307 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
308 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
310 TA_RAS_BLOCK__GFX_TD_INDEX_START,
311 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
312 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
313 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
314 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
315 /* EA (3 sub-ranges)*/
316 TA_RAS_BLOCK__GFX_EA_INDEX_START,
318 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
319 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
320 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
321 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
322 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
323 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
324 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
325 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
326 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
327 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
329 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
330 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
331 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
332 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
333 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
334 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
335 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
336 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
337 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
339 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
340 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
341 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
342 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
343 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
344 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
345 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
347 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
349 TA_RAS_BLOCK__UTC_VML2_WALKER,
350 /* UTC ATC L2 2MB cache*/
351 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
352 /* UTC ATC L2 4KB cache*/
353 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
354 TA_RAS_BLOCK__GFX_MAX
357 struct ras_gfx_subblock {
360 int hw_supported_error_type;
361 int sw_supported_error_type;
364 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
365 [AMDGPU_RAS_BLOCK__##subblock] = { \
367 TA_RAS_BLOCK__##subblock, \
368 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
369 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
372 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
373 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
374 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
375 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
377 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
378 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
379 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
380 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
381 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
382 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
383 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
384 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
385 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
386 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
387 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
388 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
389 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
391 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
393 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
394 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
395 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
396 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
397 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
398 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
400 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
404 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
406 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
408 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
410 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
412 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
414 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
416 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
418 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
420 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
422 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
424 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
426 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
428 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
430 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
432 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
434 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
436 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
438 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
440 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
442 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
444 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
446 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
448 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
449 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
451 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
453 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
454 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
462 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
464 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
466 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
467 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
468 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
469 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
470 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
471 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
472 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
474 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
475 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
476 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
477 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
479 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
480 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
482 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
484 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
486 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
487 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
488 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
489 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
491 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
492 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
493 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
494 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
495 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
496 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
497 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
498 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
499 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
500 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
501 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
502 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
503 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
504 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
505 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
510 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
511 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
512 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
513 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
514 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
515 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
516 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
517 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
518 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
519 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
522 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
546 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
568 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
583 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
590 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
602 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
611 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
622 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
639 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
640 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
641 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
645 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
647 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
648 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
661 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
668 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
688 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
694 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
705 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
708 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
709 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
710 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
711 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
720 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
721 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
722 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
725 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
727 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
729 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
730 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
731 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
732 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
733 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
734 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
737 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
739 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
741 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
742 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
743 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
744 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
745 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
746 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
749 static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag)
751 static void *scratch_reg0;
752 static void *scratch_reg1;
753 static void *scratch_reg2;
754 static void *scratch_reg3;
755 static void *spare_int;
756 static uint32_t grbm_cntl;
757 static uint32_t grbm_idx;
759 uint32_t retries = 50000;
763 scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
764 scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
765 scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4;
766 scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4;
767 spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
769 grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
770 grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
772 if (offset == grbm_cntl || offset == grbm_idx) {
773 if (offset == grbm_cntl)
774 writel(v, scratch_reg2);
775 else if (offset == grbm_idx)
776 writel(v, scratch_reg3);
778 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
781 * SCRATCH_REG0 = read/write value
782 * SCRATCH_REG1[30:28] = command
783 * SCRATCH_REG1[19:0] = address in dword
784 * SCRATCH_REG1[26:24] = Error reporting
786 writel(v, scratch_reg0);
787 writel(offset | flag, scratch_reg1);
788 writel(1, spare_int);
790 for (i = 0; i < retries; i++) {
791 tmp = readl(scratch_reg1);
799 if (amdgpu_sriov_reg_indirect_gc(adev)) {
800 if (tmp & GFX9_RLCG_VFGATE_DISABLED)
801 pr_err("The vfgate is disabled, program reg:0x%05x failed!\n", offset);
802 else if (tmp & GFX9_RLCG_WRONG_OPERATION_TYPE)
803 pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset);
804 else if (tmp & GFX9_RLCG_NOT_IN_RANGE)
805 pr_err("The register is not in range, program reg:0x%05x failed!\n", offset);
807 pr_err("Unknown error type, program reg:0x%05x failed!\n", offset);
809 pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset);
813 ret = readl(scratch_reg0);
818 static bool gfx_v9_0_get_rlcg_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip,
819 int write, u32 *rlcg_flag)
824 if (amdgpu_sriov_reg_indirect_gc(adev)) {
825 *rlcg_flag = write ? GFX9_RLCG_GC_WRITE : GFX9_RLCG_GC_READ;
828 /* only in new version, AMDGPU_REGS_NO_KIQ and AMDGPU_REGS_RLC enabled simultaneously */
829 } else if ((acc_flags & AMDGPU_REGS_RLC) && !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) {
830 *rlcg_flag = GFX9_RLCG_GC_WRITE_OLD;
842 static u32 gfx_v9_0_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip)
846 if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 0, &rlcg_flag))
847 return gfx_v9_0_rlcg_rw(adev, offset, 0, rlcg_flag);
849 if (acc_flags & AMDGPU_REGS_NO_KIQ)
850 return RREG32_NO_KIQ(offset);
852 return RREG32(offset);
855 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset,
856 u32 value, u32 acc_flags, u32 hwip)
860 if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 1, &rlcg_flag)) {
861 gfx_v9_0_rlcg_rw(adev, offset, value, rlcg_flag);
865 if (acc_flags & AMDGPU_REGS_NO_KIQ)
866 WREG32_NO_KIQ(offset, value);
868 WREG32(offset, value);
871 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
872 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
873 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
874 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
876 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
877 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
878 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
879 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
880 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
881 struct amdgpu_cu_info *cu_info);
882 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
883 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
884 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
885 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
886 void *ras_error_status);
887 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
889 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
891 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
894 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
895 amdgpu_ring_write(kiq_ring,
896 PACKET3_SET_RESOURCES_VMID_MASK(0) |
897 /* vmid_mask:0* queue_type:0 (KIQ) */
898 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
899 amdgpu_ring_write(kiq_ring,
900 lower_32_bits(queue_mask)); /* queue mask lo */
901 amdgpu_ring_write(kiq_ring,
902 upper_32_bits(queue_mask)); /* queue mask hi */
903 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
904 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
905 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
906 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
909 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
910 struct amdgpu_ring *ring)
912 struct amdgpu_device *adev = kiq_ring->adev;
913 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
914 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
915 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
917 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
918 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
919 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
920 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
921 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
922 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
923 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
924 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
925 /*queue_type: normal compute queue */
926 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
927 /* alloc format: all_on_one_pipe */
928 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
929 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
930 /* num_queues: must be 1 */
931 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
932 amdgpu_ring_write(kiq_ring,
933 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
934 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
935 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
936 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
937 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
940 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
941 struct amdgpu_ring *ring,
942 enum amdgpu_unmap_queues_action action,
943 u64 gpu_addr, u64 seq)
945 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
947 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
948 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
949 PACKET3_UNMAP_QUEUES_ACTION(action) |
950 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
951 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
952 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
953 amdgpu_ring_write(kiq_ring,
954 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
956 if (action == PREEMPT_QUEUES_NO_UNMAP) {
957 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
958 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
959 amdgpu_ring_write(kiq_ring, seq);
961 amdgpu_ring_write(kiq_ring, 0);
962 amdgpu_ring_write(kiq_ring, 0);
963 amdgpu_ring_write(kiq_ring, 0);
967 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
968 struct amdgpu_ring *ring,
972 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
974 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
975 amdgpu_ring_write(kiq_ring,
976 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
977 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
978 PACKET3_QUERY_STATUS_COMMAND(2));
979 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
980 amdgpu_ring_write(kiq_ring,
981 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
982 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
983 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
984 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
985 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
986 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
989 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
990 uint16_t pasid, uint32_t flush_type,
993 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
994 amdgpu_ring_write(kiq_ring,
995 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
996 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
997 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
998 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1001 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1002 .kiq_set_resources = gfx_v9_0_kiq_set_resources,
1003 .kiq_map_queues = gfx_v9_0_kiq_map_queues,
1004 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1005 .kiq_query_status = gfx_v9_0_kiq_query_status,
1006 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1007 .set_resources_size = 8,
1008 .map_queues_size = 7,
1009 .unmap_queues_size = 6,
1010 .query_status_size = 7,
1011 .invalidate_tlbs_size = 2,
1014 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1016 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
1019 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1021 switch (adev->ip_versions[GC_HWIP][0]) {
1022 case IP_VERSION(9, 0, 1):
1023 soc15_program_register_sequence(adev,
1024 golden_settings_gc_9_0,
1025 ARRAY_SIZE(golden_settings_gc_9_0));
1026 soc15_program_register_sequence(adev,
1027 golden_settings_gc_9_0_vg10,
1028 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1030 case IP_VERSION(9, 2, 1):
1031 soc15_program_register_sequence(adev,
1032 golden_settings_gc_9_2_1,
1033 ARRAY_SIZE(golden_settings_gc_9_2_1));
1034 soc15_program_register_sequence(adev,
1035 golden_settings_gc_9_2_1_vg12,
1036 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1038 case IP_VERSION(9, 4, 0):
1039 soc15_program_register_sequence(adev,
1040 golden_settings_gc_9_0,
1041 ARRAY_SIZE(golden_settings_gc_9_0));
1042 soc15_program_register_sequence(adev,
1043 golden_settings_gc_9_0_vg20,
1044 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1046 case IP_VERSION(9, 4, 1):
1047 soc15_program_register_sequence(adev,
1048 golden_settings_gc_9_4_1_arct,
1049 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1051 case IP_VERSION(9, 2, 2):
1052 case IP_VERSION(9, 1, 0):
1053 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1054 ARRAY_SIZE(golden_settings_gc_9_1));
1055 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1056 soc15_program_register_sequence(adev,
1057 golden_settings_gc_9_1_rv2,
1058 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1060 soc15_program_register_sequence(adev,
1061 golden_settings_gc_9_1_rv1,
1062 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1064 case IP_VERSION(9, 3, 0):
1065 soc15_program_register_sequence(adev,
1066 golden_settings_gc_9_1_rn,
1067 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1068 return; /* for renoir, don't need common goldensetting */
1069 case IP_VERSION(9, 4, 2):
1070 gfx_v9_4_2_init_golden_registers(adev,
1071 adev->smuio.funcs->get_die_id(adev));
1077 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1078 (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
1079 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1080 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1083 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
1085 adev->gfx.scratch.num_reg = 8;
1086 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1087 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
1090 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1091 bool wc, uint32_t reg, uint32_t val)
1093 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1094 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1095 WRITE_DATA_DST_SEL(0) |
1096 (wc ? WR_CONFIRM : 0));
1097 amdgpu_ring_write(ring, reg);
1098 amdgpu_ring_write(ring, 0);
1099 amdgpu_ring_write(ring, val);
1102 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1103 int mem_space, int opt, uint32_t addr0,
1104 uint32_t addr1, uint32_t ref, uint32_t mask,
1107 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1108 amdgpu_ring_write(ring,
1109 /* memory (1) or register (0) */
1110 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1111 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1112 WAIT_REG_MEM_FUNCTION(3) | /* equal */
1113 WAIT_REG_MEM_ENGINE(eng_sel)));
1116 BUG_ON(addr0 & 0x3); /* Dword align */
1117 amdgpu_ring_write(ring, addr0);
1118 amdgpu_ring_write(ring, addr1);
1119 amdgpu_ring_write(ring, ref);
1120 amdgpu_ring_write(ring, mask);
1121 amdgpu_ring_write(ring, inv); /* poll interval */
1124 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1126 struct amdgpu_device *adev = ring->adev;
1132 r = amdgpu_gfx_scratch_get(adev, &scratch);
1136 WREG32(scratch, 0xCAFEDEAD);
1137 r = amdgpu_ring_alloc(ring, 3);
1139 goto error_free_scratch;
1141 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1142 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1143 amdgpu_ring_write(ring, 0xDEADBEEF);
1144 amdgpu_ring_commit(ring);
1146 for (i = 0; i < adev->usec_timeout; i++) {
1147 tmp = RREG32(scratch);
1148 if (tmp == 0xDEADBEEF)
1153 if (i >= adev->usec_timeout)
1157 amdgpu_gfx_scratch_free(adev, scratch);
1161 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1163 struct amdgpu_device *adev = ring->adev;
1164 struct amdgpu_ib ib;
1165 struct dma_fence *f = NULL;
1172 r = amdgpu_device_wb_get(adev, &index);
1176 gpu_addr = adev->wb.gpu_addr + (index * 4);
1177 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1178 memset(&ib, 0, sizeof(ib));
1179 r = amdgpu_ib_get(adev, NULL, 16,
1180 AMDGPU_IB_POOL_DIRECT, &ib);
1184 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1185 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1186 ib.ptr[2] = lower_32_bits(gpu_addr);
1187 ib.ptr[3] = upper_32_bits(gpu_addr);
1188 ib.ptr[4] = 0xDEADBEEF;
1191 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1195 r = dma_fence_wait_timeout(f, false, timeout);
1203 tmp = adev->wb.wb[index];
1204 if (tmp == 0xDEADBEEF)
1210 amdgpu_ib_free(adev, &ib, NULL);
1213 amdgpu_device_wb_free(adev, index);
1218 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1220 release_firmware(adev->gfx.pfp_fw);
1221 adev->gfx.pfp_fw = NULL;
1222 release_firmware(adev->gfx.me_fw);
1223 adev->gfx.me_fw = NULL;
1224 release_firmware(adev->gfx.ce_fw);
1225 adev->gfx.ce_fw = NULL;
1226 release_firmware(adev->gfx.rlc_fw);
1227 adev->gfx.rlc_fw = NULL;
1228 release_firmware(adev->gfx.mec_fw);
1229 adev->gfx.mec_fw = NULL;
1230 release_firmware(adev->gfx.mec2_fw);
1231 adev->gfx.mec2_fw = NULL;
1233 kfree(adev->gfx.rlc.register_list_format);
1236 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1238 const struct rlc_firmware_header_v2_1 *rlc_hdr;
1240 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1241 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1242 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1243 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1244 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1245 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1246 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1247 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1248 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1249 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1250 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1251 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1252 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1253 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1254 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1257 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1259 adev->gfx.me_fw_write_wait = false;
1260 adev->gfx.mec_fw_write_wait = false;
1262 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1263 ((adev->gfx.mec_fw_version < 0x000001a5) ||
1264 (adev->gfx.mec_feature_version < 46) ||
1265 (adev->gfx.pfp_fw_version < 0x000000b7) ||
1266 (adev->gfx.pfp_feature_version < 46)))
1267 DRM_WARN_ONCE("CP firmware version too old, please update!");
1269 switch (adev->ip_versions[GC_HWIP][0]) {
1270 case IP_VERSION(9, 0, 1):
1271 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1272 (adev->gfx.me_feature_version >= 42) &&
1273 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1274 (adev->gfx.pfp_feature_version >= 42))
1275 adev->gfx.me_fw_write_wait = true;
1277 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
1278 (adev->gfx.mec_feature_version >= 42))
1279 adev->gfx.mec_fw_write_wait = true;
1281 case IP_VERSION(9, 2, 1):
1282 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1283 (adev->gfx.me_feature_version >= 44) &&
1284 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1285 (adev->gfx.pfp_feature_version >= 44))
1286 adev->gfx.me_fw_write_wait = true;
1288 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
1289 (adev->gfx.mec_feature_version >= 44))
1290 adev->gfx.mec_fw_write_wait = true;
1292 case IP_VERSION(9, 4, 0):
1293 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1294 (adev->gfx.me_feature_version >= 44) &&
1295 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1296 (adev->gfx.pfp_feature_version >= 44))
1297 adev->gfx.me_fw_write_wait = true;
1299 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
1300 (adev->gfx.mec_feature_version >= 44))
1301 adev->gfx.mec_fw_write_wait = true;
1303 case IP_VERSION(9, 1, 0):
1304 case IP_VERSION(9, 2, 2):
1305 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1306 (adev->gfx.me_feature_version >= 42) &&
1307 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1308 (adev->gfx.pfp_feature_version >= 42))
1309 adev->gfx.me_fw_write_wait = true;
1311 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
1312 (adev->gfx.mec_feature_version >= 42))
1313 adev->gfx.mec_fw_write_wait = true;
1316 adev->gfx.me_fw_write_wait = true;
1317 adev->gfx.mec_fw_write_wait = true;
1322 struct amdgpu_gfxoff_quirk {
1330 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1331 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1332 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1333 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1334 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1335 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1336 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1340 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1342 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1344 while (p && p->chip_device != 0) {
1345 if (pdev->vendor == p->chip_vendor &&
1346 pdev->device == p->chip_device &&
1347 pdev->subsystem_vendor == p->subsys_vendor &&
1348 pdev->subsystem_device == p->subsys_device &&
1349 pdev->revision == p->revision) {
1357 static bool is_raven_kicker(struct amdgpu_device *adev)
1359 if (adev->pm.fw_version >= 0x41e2b)
1365 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1367 if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1368 (adev->gfx.me_fw_version >= 0x000000a5) &&
1369 (adev->gfx.me_feature_version >= 52))
1375 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1377 if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1378 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1380 switch (adev->ip_versions[GC_HWIP][0]) {
1381 case IP_VERSION(9, 0, 1):
1382 case IP_VERSION(9, 2, 1):
1383 case IP_VERSION(9, 4, 0):
1385 case IP_VERSION(9, 2, 2):
1386 case IP_VERSION(9, 1, 0):
1387 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1388 (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1389 ((!is_raven_kicker(adev) &&
1390 adev->gfx.rlc_fw_version < 531) ||
1391 (adev->gfx.rlc_feature_version < 1) ||
1392 !adev->gfx.rlc.is_rlc_v2_1))
1393 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1395 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1396 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1398 AMD_PG_SUPPORT_RLC_SMU_HS;
1400 case IP_VERSION(9, 3, 0):
1401 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1402 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1404 AMD_PG_SUPPORT_RLC_SMU_HS;
1411 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1412 const char *chip_name)
1416 struct amdgpu_firmware_info *info = NULL;
1417 const struct common_firmware_header *header = NULL;
1418 const struct gfx_firmware_header_v1_0 *cp_hdr;
1420 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1421 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1424 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1427 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1428 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1429 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1431 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1432 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1435 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1438 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1439 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1440 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1442 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1443 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1446 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1449 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1450 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1451 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1453 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1454 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1455 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1456 info->fw = adev->gfx.pfp_fw;
1457 header = (const struct common_firmware_header *)info->fw->data;
1458 adev->firmware.fw_size +=
1459 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1461 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1462 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1463 info->fw = adev->gfx.me_fw;
1464 header = (const struct common_firmware_header *)info->fw->data;
1465 adev->firmware.fw_size +=
1466 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1468 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1469 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1470 info->fw = adev->gfx.ce_fw;
1471 header = (const struct common_firmware_header *)info->fw->data;
1472 adev->firmware.fw_size +=
1473 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1479 "gfx9: Failed to load firmware \"%s\"\n",
1481 release_firmware(adev->gfx.pfp_fw);
1482 adev->gfx.pfp_fw = NULL;
1483 release_firmware(adev->gfx.me_fw);
1484 adev->gfx.me_fw = NULL;
1485 release_firmware(adev->gfx.ce_fw);
1486 adev->gfx.ce_fw = NULL;
1491 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1492 const char *chip_name)
1496 struct amdgpu_firmware_info *info = NULL;
1497 const struct common_firmware_header *header = NULL;
1498 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1499 unsigned int *tmp = NULL;
1501 uint16_t version_major;
1502 uint16_t version_minor;
1503 uint32_t smu_version;
1506 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1507 * instead of picasso_rlc.bin.
1509 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1510 * or revision >= 0xD8 && revision <= 0xDF
1511 * otherwise is PCO FP5
1513 if (!strcmp(chip_name, "picasso") &&
1514 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1515 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1516 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1517 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1518 (smu_version >= 0x41e2b))
1520 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1522 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1524 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1525 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1528 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1529 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1531 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1532 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1533 if (version_major == 2 && version_minor == 1)
1534 adev->gfx.rlc.is_rlc_v2_1 = true;
1536 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1537 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1538 adev->gfx.rlc.save_and_restore_offset =
1539 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1540 adev->gfx.rlc.clear_state_descriptor_offset =
1541 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1542 adev->gfx.rlc.avail_scratch_ram_locations =
1543 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1544 adev->gfx.rlc.reg_restore_list_size =
1545 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1546 adev->gfx.rlc.reg_list_format_start =
1547 le32_to_cpu(rlc_hdr->reg_list_format_start);
1548 adev->gfx.rlc.reg_list_format_separate_start =
1549 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1550 adev->gfx.rlc.starting_offsets_start =
1551 le32_to_cpu(rlc_hdr->starting_offsets_start);
1552 adev->gfx.rlc.reg_list_format_size_bytes =
1553 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1554 adev->gfx.rlc.reg_list_size_bytes =
1555 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1556 adev->gfx.rlc.register_list_format =
1557 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1558 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1559 if (!adev->gfx.rlc.register_list_format) {
1564 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1565 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1566 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1567 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1569 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1571 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1572 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1573 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1574 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1576 if (adev->gfx.rlc.is_rlc_v2_1)
1577 gfx_v9_0_init_rlc_ext_microcode(adev);
1579 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1580 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1581 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1582 info->fw = adev->gfx.rlc_fw;
1583 header = (const struct common_firmware_header *)info->fw->data;
1584 adev->firmware.fw_size +=
1585 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1587 if (adev->gfx.rlc.is_rlc_v2_1 &&
1588 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1589 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1590 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1591 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1592 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1593 info->fw = adev->gfx.rlc_fw;
1594 adev->firmware.fw_size +=
1595 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1597 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1598 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1599 info->fw = adev->gfx.rlc_fw;
1600 adev->firmware.fw_size +=
1601 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1603 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1604 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1605 info->fw = adev->gfx.rlc_fw;
1606 adev->firmware.fw_size +=
1607 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1614 "gfx9: Failed to load firmware \"%s\"\n",
1616 release_firmware(adev->gfx.rlc_fw);
1617 adev->gfx.rlc_fw = NULL;
1622 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1624 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1625 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1626 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1632 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1633 const char *chip_name)
1637 struct amdgpu_firmware_info *info = NULL;
1638 const struct common_firmware_header *header = NULL;
1639 const struct gfx_firmware_header_v1_0 *cp_hdr;
1641 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1642 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1645 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1648 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1649 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1650 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1653 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1654 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1655 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1657 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1660 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1661 adev->gfx.mec2_fw->data;
1662 adev->gfx.mec2_fw_version =
1663 le32_to_cpu(cp_hdr->header.ucode_version);
1664 adev->gfx.mec2_feature_version =
1665 le32_to_cpu(cp_hdr->ucode_feature_version);
1668 adev->gfx.mec2_fw = NULL;
1671 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1672 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1675 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1676 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1677 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1678 info->fw = adev->gfx.mec_fw;
1679 header = (const struct common_firmware_header *)info->fw->data;
1680 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1681 adev->firmware.fw_size +=
1682 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1684 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1685 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1686 info->fw = adev->gfx.mec_fw;
1687 adev->firmware.fw_size +=
1688 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1690 if (adev->gfx.mec2_fw) {
1691 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1692 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1693 info->fw = adev->gfx.mec2_fw;
1694 header = (const struct common_firmware_header *)info->fw->data;
1695 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1696 adev->firmware.fw_size +=
1697 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1699 /* TODO: Determine if MEC2 JT FW loading can be removed
1700 for all GFX V9 asic and above */
1701 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1702 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1703 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1704 info->fw = adev->gfx.mec2_fw;
1705 adev->firmware.fw_size +=
1706 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1713 gfx_v9_0_check_if_need_gfxoff(adev);
1714 gfx_v9_0_check_fw_write_wait(adev);
1717 "gfx9: Failed to load firmware \"%s\"\n",
1719 release_firmware(adev->gfx.mec_fw);
1720 adev->gfx.mec_fw = NULL;
1721 release_firmware(adev->gfx.mec2_fw);
1722 adev->gfx.mec2_fw = NULL;
1727 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1729 const char *chip_name;
1734 switch (adev->ip_versions[GC_HWIP][0]) {
1735 case IP_VERSION(9, 0, 1):
1736 chip_name = "vega10";
1738 case IP_VERSION(9, 2, 1):
1739 chip_name = "vega12";
1741 case IP_VERSION(9, 4, 0):
1742 chip_name = "vega20";
1744 case IP_VERSION(9, 2, 2):
1745 case IP_VERSION(9, 1, 0):
1746 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1747 chip_name = "raven2";
1748 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1749 chip_name = "picasso";
1751 chip_name = "raven";
1753 case IP_VERSION(9, 4, 1):
1754 chip_name = "arcturus";
1756 case IP_VERSION(9, 3, 0):
1757 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1758 chip_name = "renoir";
1760 chip_name = "green_sardine";
1762 case IP_VERSION(9, 4, 2):
1763 chip_name = "aldebaran";
1769 /* No CPG in Arcturus */
1770 if (adev->gfx.num_gfx_rings) {
1771 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1776 r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1780 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1787 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1790 const struct cs_section_def *sect = NULL;
1791 const struct cs_extent_def *ext = NULL;
1793 /* begin clear state */
1795 /* context control state */
1798 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1799 for (ext = sect->section; ext->extent != NULL; ++ext) {
1800 if (sect->id == SECT_CONTEXT)
1801 count += 2 + ext->reg_count;
1807 /* end clear state */
1815 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1816 volatile u32 *buffer)
1819 const struct cs_section_def *sect = NULL;
1820 const struct cs_extent_def *ext = NULL;
1822 if (adev->gfx.rlc.cs_data == NULL)
1827 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1828 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1830 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1831 buffer[count++] = cpu_to_le32(0x80000000);
1832 buffer[count++] = cpu_to_le32(0x80000000);
1834 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1835 for (ext = sect->section; ext->extent != NULL; ++ext) {
1836 if (sect->id == SECT_CONTEXT) {
1838 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1839 buffer[count++] = cpu_to_le32(ext->reg_index -
1840 PACKET3_SET_CONTEXT_REG_START);
1841 for (i = 0; i < ext->reg_count; i++)
1842 buffer[count++] = cpu_to_le32(ext->extent[i]);
1849 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1850 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1852 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1853 buffer[count++] = cpu_to_le32(0);
1856 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1858 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1859 uint32_t pg_always_on_cu_num = 2;
1860 uint32_t always_on_cu_num;
1862 uint32_t mask, cu_bitmap, counter;
1864 if (adev->flags & AMD_IS_APU)
1865 always_on_cu_num = 4;
1866 else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1867 always_on_cu_num = 8;
1869 always_on_cu_num = 12;
1871 mutex_lock(&adev->grbm_idx_mutex);
1872 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1873 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1877 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1879 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1880 if (cu_info->bitmap[i][j] & mask) {
1881 if (counter == pg_always_on_cu_num)
1882 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1883 if (counter < always_on_cu_num)
1892 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1893 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1896 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1897 mutex_unlock(&adev->grbm_idx_mutex);
1900 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1904 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1905 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1906 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1907 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1908 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1910 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1911 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1913 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1914 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1916 mutex_lock(&adev->grbm_idx_mutex);
1917 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1918 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1919 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1921 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1922 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1923 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1924 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1925 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1927 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1928 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1931 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1934 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1935 * programmed in gfx_v9_0_init_always_on_cu_mask()
1938 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1939 * but used for RLC_LB_CNTL configuration */
1940 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1941 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1942 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1943 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1944 mutex_unlock(&adev->grbm_idx_mutex);
1946 gfx_v9_0_init_always_on_cu_mask(adev);
1949 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1953 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1954 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1955 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1956 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1957 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1959 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1960 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1962 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1963 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1965 mutex_lock(&adev->grbm_idx_mutex);
1966 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1967 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1968 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1970 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1971 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1972 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1973 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1974 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1976 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1977 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1980 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1983 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1984 * programmed in gfx_v9_0_init_always_on_cu_mask()
1987 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1988 * but used for RLC_LB_CNTL configuration */
1989 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1990 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1991 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1992 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1993 mutex_unlock(&adev->grbm_idx_mutex);
1995 gfx_v9_0_init_always_on_cu_mask(adev);
1998 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
2000 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
2003 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
2005 if (gfx_v9_0_load_mec2_fw_bin_support(adev))
2011 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
2013 const struct cs_section_def *cs_data;
2016 adev->gfx.rlc.cs_data = gfx9_cs_data;
2018 cs_data = adev->gfx.rlc.cs_data;
2021 /* init clear state block */
2022 r = amdgpu_gfx_rlc_init_csb(adev);
2027 if (adev->flags & AMD_IS_APU) {
2028 /* TODO: double check the cp_table_size for RV */
2029 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
2030 r = amdgpu_gfx_rlc_init_cpt(adev);
2035 switch (adev->ip_versions[GC_HWIP][0]) {
2036 case IP_VERSION(9, 2, 2):
2037 case IP_VERSION(9, 1, 0):
2038 gfx_v9_0_init_lbpw(adev);
2040 case IP_VERSION(9, 4, 0):
2041 gfx_v9_4_init_lbpw(adev);
2047 /* init spm vmid with 0xf */
2048 if (adev->gfx.rlc.funcs->update_spm_vmid)
2049 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
2054 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
2056 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
2057 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
2060 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
2064 const __le32 *fw_data;
2067 size_t mec_hpd_size;
2069 const struct gfx_firmware_header_v1_0 *mec_hdr;
2071 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2073 /* take ownership of the relevant compute queues */
2074 amdgpu_gfx_compute_queue_acquire(adev);
2075 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
2077 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2078 AMDGPU_GEM_DOMAIN_VRAM,
2079 &adev->gfx.mec.hpd_eop_obj,
2080 &adev->gfx.mec.hpd_eop_gpu_addr,
2083 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
2084 gfx_v9_0_mec_fini(adev);
2088 memset(hpd, 0, mec_hpd_size);
2090 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2091 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2094 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2096 fw_data = (const __le32 *)
2097 (adev->gfx.mec_fw->data +
2098 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2099 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
2101 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
2102 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2103 &adev->gfx.mec.mec_fw_obj,
2104 &adev->gfx.mec.mec_fw_gpu_addr,
2107 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
2108 gfx_v9_0_mec_fini(adev);
2112 memcpy(fw, fw_data, fw_size);
2114 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2115 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2120 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2122 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2123 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2124 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2125 (address << SQ_IND_INDEX__INDEX__SHIFT) |
2126 (SQ_IND_INDEX__FORCE_READ_MASK));
2127 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2130 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2131 uint32_t wave, uint32_t thread,
2132 uint32_t regno, uint32_t num, uint32_t *out)
2134 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2135 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2136 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2137 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2138 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2139 (SQ_IND_INDEX__FORCE_READ_MASK) |
2140 (SQ_IND_INDEX__AUTO_INCR_MASK));
2142 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2145 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2147 /* type 1 wave data */
2148 dst[(*no_fields)++] = 1;
2149 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2150 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2151 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2152 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2153 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2154 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2155 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2156 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2157 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2158 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2159 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2160 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2161 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2162 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2163 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2166 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2167 uint32_t wave, uint32_t start,
2168 uint32_t size, uint32_t *dst)
2171 adev, simd, wave, 0,
2172 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2175 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2176 uint32_t wave, uint32_t thread,
2177 uint32_t start, uint32_t size,
2181 adev, simd, wave, thread,
2182 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2185 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2186 u32 me, u32 pipe, u32 q, u32 vm)
2188 soc15_grbm_select(adev, me, pipe, q, vm);
2191 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2192 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2193 .select_se_sh = &gfx_v9_0_select_se_sh,
2194 .read_wave_data = &gfx_v9_0_read_wave_data,
2195 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2196 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2197 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2200 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
2201 .ras_late_init = amdgpu_gfx_ras_late_init,
2202 .ras_fini = amdgpu_gfx_ras_fini,
2203 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2204 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2205 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2208 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2213 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2215 switch (adev->ip_versions[GC_HWIP][0]) {
2216 case IP_VERSION(9, 0, 1):
2217 adev->gfx.config.max_hw_contexts = 8;
2218 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2219 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2220 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2221 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2222 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2224 case IP_VERSION(9, 2, 1):
2225 adev->gfx.config.max_hw_contexts = 8;
2226 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2227 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2228 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2229 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2230 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2231 DRM_INFO("fix gfx.config for vega12\n");
2233 case IP_VERSION(9, 4, 0):
2234 adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
2235 adev->gfx.config.max_hw_contexts = 8;
2236 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2237 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2238 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2239 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2240 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2241 gb_addr_config &= ~0xf3e777ff;
2242 gb_addr_config |= 0x22014042;
2243 /* check vbios table if gpu info is not available */
2244 err = amdgpu_atomfirmware_get_gfx_info(adev);
2248 case IP_VERSION(9, 2, 2):
2249 case IP_VERSION(9, 1, 0):
2250 adev->gfx.config.max_hw_contexts = 8;
2251 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2252 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2253 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2254 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2255 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2256 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2258 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2260 case IP_VERSION(9, 4, 1):
2261 adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
2262 adev->gfx.config.max_hw_contexts = 8;
2263 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2264 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2265 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2266 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2267 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2268 gb_addr_config &= ~0xf3e777ff;
2269 gb_addr_config |= 0x22014042;
2271 case IP_VERSION(9, 3, 0):
2272 adev->gfx.config.max_hw_contexts = 8;
2273 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2274 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2275 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2276 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2277 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2278 gb_addr_config &= ~0xf3e777ff;
2279 gb_addr_config |= 0x22010042;
2281 case IP_VERSION(9, 4, 2):
2282 adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
2283 adev->gfx.config.max_hw_contexts = 8;
2284 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2285 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2286 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2287 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2288 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2289 gb_addr_config &= ~0xf3e777ff;
2290 gb_addr_config |= 0x22014042;
2291 /* check vbios table if gpu info is not available */
2292 err = amdgpu_atomfirmware_get_gfx_info(adev);
2301 adev->gfx.config.gb_addr_config = gb_addr_config;
2303 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2305 adev->gfx.config.gb_addr_config,
2309 adev->gfx.config.max_tile_pipes =
2310 adev->gfx.config.gb_addr_config_fields.num_pipes;
2312 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2314 adev->gfx.config.gb_addr_config,
2317 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2319 adev->gfx.config.gb_addr_config,
2321 MAX_COMPRESSED_FRAGS);
2322 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2324 adev->gfx.config.gb_addr_config,
2327 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2329 adev->gfx.config.gb_addr_config,
2331 NUM_SHADER_ENGINES);
2332 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2334 adev->gfx.config.gb_addr_config,
2336 PIPE_INTERLEAVE_SIZE));
2341 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2342 int mec, int pipe, int queue)
2345 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2346 unsigned int hw_prio;
2348 ring = &adev->gfx.compute_ring[ring_id];
2353 ring->queue = queue;
2355 ring->ring_obj = NULL;
2356 ring->use_doorbell = true;
2357 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2358 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2359 + (ring_id * GFX9_MEC_HPD_SIZE);
2360 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2362 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2363 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2365 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2366 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2367 /* type-2 packets are deprecated on MEC, use type-3 instead */
2368 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2372 static int gfx_v9_0_sw_init(void *handle)
2374 int i, j, k, r, ring_id;
2375 struct amdgpu_ring *ring;
2376 struct amdgpu_kiq *kiq;
2377 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2379 switch (adev->ip_versions[GC_HWIP][0]) {
2380 case IP_VERSION(9, 0, 1):
2381 case IP_VERSION(9, 2, 1):
2382 case IP_VERSION(9, 4, 0):
2383 case IP_VERSION(9, 2, 2):
2384 case IP_VERSION(9, 1, 0):
2385 case IP_VERSION(9, 4, 1):
2386 case IP_VERSION(9, 3, 0):
2387 case IP_VERSION(9, 4, 2):
2388 adev->gfx.mec.num_mec = 2;
2391 adev->gfx.mec.num_mec = 1;
2395 adev->gfx.mec.num_pipe_per_mec = 4;
2396 adev->gfx.mec.num_queue_per_pipe = 8;
2399 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2403 /* Privileged reg */
2404 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2405 &adev->gfx.priv_reg_irq);
2409 /* Privileged inst */
2410 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2411 &adev->gfx.priv_inst_irq);
2416 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2417 &adev->gfx.cp_ecc_error_irq);
2422 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2423 &adev->gfx.cp_ecc_error_irq);
2427 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2429 gfx_v9_0_scratch_init(adev);
2431 r = gfx_v9_0_init_microcode(adev);
2433 DRM_ERROR("Failed to load gfx firmware!\n");
2437 r = adev->gfx.rlc.funcs->init(adev);
2439 DRM_ERROR("Failed to init rlc BOs!\n");
2443 r = gfx_v9_0_mec_init(adev);
2445 DRM_ERROR("Failed to init MEC BOs!\n");
2449 /* set up the gfx ring */
2450 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2451 ring = &adev->gfx.gfx_ring[i];
2452 ring->ring_obj = NULL;
2454 sprintf(ring->name, "gfx");
2456 sprintf(ring->name, "gfx_%d", i);
2457 ring->use_doorbell = true;
2458 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2459 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2460 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2461 AMDGPU_RING_PRIO_DEFAULT, NULL);
2466 /* set up the compute queues - allocate horizontally across pipes */
2468 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2469 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2470 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2471 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2474 r = gfx_v9_0_compute_ring_init(adev,
2485 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2487 DRM_ERROR("Failed to init KIQ BOs!\n");
2491 kiq = &adev->gfx.kiq;
2492 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2496 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2497 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2501 adev->gfx.ce_ram_size = 0x8000;
2503 r = gfx_v9_0_gpu_early_init(adev);
2511 static int gfx_v9_0_sw_fini(void *handle)
2514 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2516 if (adev->gfx.ras_funcs &&
2517 adev->gfx.ras_funcs->ras_fini)
2518 adev->gfx.ras_funcs->ras_fini(adev);
2520 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2521 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2522 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2523 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2525 amdgpu_gfx_mqd_sw_fini(adev);
2526 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2527 amdgpu_gfx_kiq_fini(adev);
2529 gfx_v9_0_mec_fini(adev);
2530 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2531 &adev->gfx.rlc.clear_state_gpu_addr,
2532 (void **)&adev->gfx.rlc.cs_ptr);
2533 if (adev->flags & AMD_IS_APU) {
2534 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2535 &adev->gfx.rlc.cp_table_gpu_addr,
2536 (void **)&adev->gfx.rlc.cp_table_ptr);
2538 gfx_v9_0_free_microcode(adev);
2544 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2549 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2554 if (instance == 0xffffffff)
2555 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2557 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2559 if (se_num == 0xffffffff)
2560 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2562 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2564 if (sh_num == 0xffffffff)
2565 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2567 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2569 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2572 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2576 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2577 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2579 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2580 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2582 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2583 adev->gfx.config.max_sh_per_se);
2585 return (~data) & mask;
2588 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2593 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2594 adev->gfx.config.max_sh_per_se;
2596 mutex_lock(&adev->grbm_idx_mutex);
2597 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2598 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2599 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2600 data = gfx_v9_0_get_rb_active_bitmap(adev);
2601 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2602 rb_bitmap_width_per_sh);
2605 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2606 mutex_unlock(&adev->grbm_idx_mutex);
2608 adev->gfx.config.backend_enable_mask = active_rbs;
2609 adev->gfx.config.num_rbs = hweight32(active_rbs);
2612 #define DEFAULT_SH_MEM_BASES (0x6000)
2613 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2616 uint32_t sh_mem_config;
2617 uint32_t sh_mem_bases;
2620 * Configure apertures:
2621 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2622 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2623 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2625 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2627 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2628 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2629 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2631 mutex_lock(&adev->srbm_mutex);
2632 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2633 soc15_grbm_select(adev, 0, 0, 0, i);
2634 /* CP and shaders */
2635 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2636 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2638 soc15_grbm_select(adev, 0, 0, 0, 0);
2639 mutex_unlock(&adev->srbm_mutex);
2641 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2642 acccess. These should be enabled by FW for target VMIDs. */
2643 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2644 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2645 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2646 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2647 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2651 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2656 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2657 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2658 * the driver can enable them for graphics. VMID0 should maintain
2659 * access so that HWS firmware can save/restore entries.
2661 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2662 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2663 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2664 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2665 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2669 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2673 switch (adev->ip_versions[GC_HWIP][0]) {
2674 case IP_VERSION(9, 4, 1):
2675 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2676 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2677 DISABLE_BARRIER_WAITCNT, 1);
2678 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2685 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2690 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2692 gfx_v9_0_tiling_mode_table_init(adev);
2694 gfx_v9_0_setup_rb(adev);
2695 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2696 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2698 /* XXX SH_MEM regs */
2699 /* where to put LDS, scratch, GPUVM in FSA64 space */
2700 mutex_lock(&adev->srbm_mutex);
2701 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2702 soc15_grbm_select(adev, 0, 0, 0, i);
2703 /* CP and shaders */
2705 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2706 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2707 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2708 !!adev->gmc.noretry);
2709 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2710 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2712 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2713 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2714 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2715 !!adev->gmc.noretry);
2716 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2717 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2718 (adev->gmc.private_aperture_start >> 48));
2719 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2720 (adev->gmc.shared_aperture_start >> 48));
2721 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2724 soc15_grbm_select(adev, 0, 0, 0, 0);
2726 mutex_unlock(&adev->srbm_mutex);
2728 gfx_v9_0_init_compute_vmid(adev);
2729 gfx_v9_0_init_gds_vmid(adev);
2730 gfx_v9_0_init_sq_config(adev);
2733 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2738 mutex_lock(&adev->grbm_idx_mutex);
2739 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2740 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2741 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2742 for (k = 0; k < adev->usec_timeout; k++) {
2743 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2747 if (k == adev->usec_timeout) {
2748 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2749 0xffffffff, 0xffffffff);
2750 mutex_unlock(&adev->grbm_idx_mutex);
2751 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2757 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2758 mutex_unlock(&adev->grbm_idx_mutex);
2760 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2761 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2762 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2763 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2764 for (k = 0; k < adev->usec_timeout; k++) {
2765 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2771 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2776 /* These interrupts should be enabled to drive DS clock */
2778 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2780 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2781 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2782 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2783 if(adev->gfx.num_gfx_rings)
2784 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2786 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2789 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2791 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2793 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2794 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2795 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2796 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2797 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2798 adev->gfx.rlc.clear_state_size);
2801 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2802 int indirect_offset,
2804 int *unique_indirect_regs,
2805 int unique_indirect_reg_count,
2806 int *indirect_start_offsets,
2807 int *indirect_start_offsets_count,
2808 int max_start_offsets_count)
2812 for (; indirect_offset < list_size; indirect_offset++) {
2813 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2814 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2815 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2817 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2818 indirect_offset += 2;
2820 /* look for the matching indice */
2821 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2822 if (unique_indirect_regs[idx] ==
2823 register_list_format[indirect_offset] ||
2824 !unique_indirect_regs[idx])
2828 BUG_ON(idx >= unique_indirect_reg_count);
2830 if (!unique_indirect_regs[idx])
2831 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2838 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2840 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2841 int unique_indirect_reg_count = 0;
2843 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2844 int indirect_start_offsets_count = 0;
2850 u32 *register_list_format =
2851 kmemdup(adev->gfx.rlc.register_list_format,
2852 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2853 if (!register_list_format)
2856 /* setup unique_indirect_regs array and indirect_start_offsets array */
2857 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2858 gfx_v9_1_parse_ind_reg_list(register_list_format,
2859 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2860 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2861 unique_indirect_regs,
2862 unique_indirect_reg_count,
2863 indirect_start_offsets,
2864 &indirect_start_offsets_count,
2865 ARRAY_SIZE(indirect_start_offsets));
2867 /* enable auto inc in case it is disabled */
2868 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2869 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2870 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2872 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2873 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2874 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2875 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2876 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2877 adev->gfx.rlc.register_restore[i]);
2879 /* load indirect register */
2880 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2881 adev->gfx.rlc.reg_list_format_start);
2883 /* direct register portion */
2884 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2885 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2886 register_list_format[i]);
2888 /* indirect register portion */
2889 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2890 if (register_list_format[i] == 0xFFFFFFFF) {
2891 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2895 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2896 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2898 for (j = 0; j < unique_indirect_reg_count; j++) {
2899 if (register_list_format[i] == unique_indirect_regs[j]) {
2900 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2905 BUG_ON(j >= unique_indirect_reg_count);
2910 /* set save/restore list size */
2911 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2912 list_size = list_size >> 1;
2913 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2914 adev->gfx.rlc.reg_restore_list_size);
2915 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2917 /* write the starting offsets to RLC scratch ram */
2918 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2919 adev->gfx.rlc.starting_offsets_start);
2920 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2921 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2922 indirect_start_offsets[i]);
2924 /* load unique indirect regs*/
2925 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2926 if (unique_indirect_regs[i] != 0) {
2927 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2928 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2929 unique_indirect_regs[i] & 0x3FFFF);
2931 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2932 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2933 unique_indirect_regs[i] >> 20);
2937 kfree(register_list_format);
2941 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2943 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2946 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2950 uint32_t default_data = 0;
2952 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2954 /* enable GFXIP control over CGPG */
2955 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2956 if(default_data != data)
2957 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2960 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2961 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2962 if(default_data != data)
2963 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2965 /* restore GFXIP control over GCPG */
2966 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2967 if(default_data != data)
2968 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2972 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2976 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2977 AMD_PG_SUPPORT_GFX_SMG |
2978 AMD_PG_SUPPORT_GFX_DMG)) {
2979 /* init IDLE_POLL_COUNT = 60 */
2980 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2981 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2982 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2983 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2985 /* init RLC PG Delay */
2987 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2988 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2989 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2990 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2991 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2993 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2994 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2995 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2996 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2998 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2999 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
3000 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
3001 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
3003 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
3004 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3006 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
3007 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3008 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
3009 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
3010 pwr_10_0_gfxip_control_over_cgpg(adev, true);
3014 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3018 uint32_t default_data = 0;
3020 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3021 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3022 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
3024 if (default_data != data)
3025 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3028 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3032 uint32_t default_data = 0;
3034 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3035 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3036 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
3038 if(default_data != data)
3039 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3042 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
3046 uint32_t default_data = 0;
3048 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3049 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3052 if(default_data != data)
3053 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3056 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
3059 uint32_t data, default_data;
3061 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3062 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3063 GFX_POWER_GATING_ENABLE,
3065 if(default_data != data)
3066 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3069 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3072 uint32_t data, default_data;
3074 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3075 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3076 GFX_PIPELINE_PG_ENABLE,
3078 if(default_data != data)
3079 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3082 /* read any GFX register to wake up GFX */
3083 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3086 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3089 uint32_t data, default_data;
3091 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3092 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3093 STATIC_PER_CU_PG_ENABLE,
3095 if(default_data != data)
3096 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3099 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3102 uint32_t data, default_data;
3104 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3105 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3106 DYN_PER_CU_PG_ENABLE,
3108 if(default_data != data)
3109 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3112 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3114 gfx_v9_0_init_csb(adev);
3117 * Rlc save restore list is workable since v2_1.
3118 * And it's needed by gfxoff feature.
3120 if (adev->gfx.rlc.is_rlc_v2_1) {
3121 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3122 (adev->apu_flags & AMD_APU_IS_RAVEN2))
3123 gfx_v9_1_init_rlc_save_restore_list(adev);
3124 gfx_v9_0_enable_save_restore_machine(adev);
3127 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3128 AMD_PG_SUPPORT_GFX_SMG |
3129 AMD_PG_SUPPORT_GFX_DMG |
3131 AMD_PG_SUPPORT_GDS |
3132 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3133 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3134 adev->gfx.rlc.cp_table_gpu_addr >> 8);
3135 gfx_v9_0_init_gfx_power_gating(adev);
3139 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3141 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3142 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3143 gfx_v9_0_wait_for_rlc_serdes(adev);
3146 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3148 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3150 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3154 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3156 #ifdef AMDGPU_RLC_DEBUG_RETRY
3160 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3163 /* carrizo do enable cp interrupt after cp inited */
3164 if (!(adev->flags & AMD_IS_APU)) {
3165 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3169 #ifdef AMDGPU_RLC_DEBUG_RETRY
3170 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3171 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3172 if(rlc_ucode_ver == 0x108) {
3173 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3174 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3175 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3176 * default is 0x9C4 to create a 100us interval */
3177 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3178 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3179 * to disable the page fault retry interrupts, default is
3181 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3186 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3188 const struct rlc_firmware_header_v2_0 *hdr;
3189 const __le32 *fw_data;
3190 unsigned i, fw_size;
3192 if (!adev->gfx.rlc_fw)
3195 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3196 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3198 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3199 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3200 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3202 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3203 RLCG_UCODE_LOADING_START_ADDRESS);
3204 for (i = 0; i < fw_size; i++)
3205 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3206 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3211 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3215 if (amdgpu_sriov_vf(adev)) {
3216 gfx_v9_0_init_csb(adev);
3220 adev->gfx.rlc.funcs->stop(adev);
3223 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3225 gfx_v9_0_init_pg(adev);
3227 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3228 /* legacy rlc firmware loading */
3229 r = gfx_v9_0_rlc_load_microcode(adev);
3234 switch (adev->ip_versions[GC_HWIP][0]) {
3235 case IP_VERSION(9, 2, 2):
3236 case IP_VERSION(9, 1, 0):
3237 if (amdgpu_lbpw == 0)
3238 gfx_v9_0_enable_lbpw(adev, false);
3240 gfx_v9_0_enable_lbpw(adev, true);
3242 case IP_VERSION(9, 4, 0):
3243 if (amdgpu_lbpw > 0)
3244 gfx_v9_0_enable_lbpw(adev, true);
3246 gfx_v9_0_enable_lbpw(adev, false);
3252 adev->gfx.rlc.funcs->start(adev);
3257 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3259 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3261 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3262 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3263 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3264 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3268 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3270 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3271 const struct gfx_firmware_header_v1_0 *ce_hdr;
3272 const struct gfx_firmware_header_v1_0 *me_hdr;
3273 const __le32 *fw_data;
3274 unsigned i, fw_size;
3276 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3279 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3280 adev->gfx.pfp_fw->data;
3281 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3282 adev->gfx.ce_fw->data;
3283 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3284 adev->gfx.me_fw->data;
3286 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3287 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3288 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3290 gfx_v9_0_cp_gfx_enable(adev, false);
3293 fw_data = (const __le32 *)
3294 (adev->gfx.pfp_fw->data +
3295 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3296 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3297 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3298 for (i = 0; i < fw_size; i++)
3299 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3300 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3303 fw_data = (const __le32 *)
3304 (adev->gfx.ce_fw->data +
3305 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3306 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3307 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3308 for (i = 0; i < fw_size; i++)
3309 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3310 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3313 fw_data = (const __le32 *)
3314 (adev->gfx.me_fw->data +
3315 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3316 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3317 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3318 for (i = 0; i < fw_size; i++)
3319 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3320 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3325 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3327 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3328 const struct cs_section_def *sect = NULL;
3329 const struct cs_extent_def *ext = NULL;
3333 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3334 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3336 gfx_v9_0_cp_gfx_enable(adev, true);
3338 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3340 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3344 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3345 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3347 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3348 amdgpu_ring_write(ring, 0x80000000);
3349 amdgpu_ring_write(ring, 0x80000000);
3351 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3352 for (ext = sect->section; ext->extent != NULL; ++ext) {
3353 if (sect->id == SECT_CONTEXT) {
3354 amdgpu_ring_write(ring,
3355 PACKET3(PACKET3_SET_CONTEXT_REG,
3357 amdgpu_ring_write(ring,
3358 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3359 for (i = 0; i < ext->reg_count; i++)
3360 amdgpu_ring_write(ring, ext->extent[i]);
3365 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3366 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3368 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3369 amdgpu_ring_write(ring, 0);
3371 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3372 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3373 amdgpu_ring_write(ring, 0x8000);
3374 amdgpu_ring_write(ring, 0x8000);
3376 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3377 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3378 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3379 amdgpu_ring_write(ring, tmp);
3380 amdgpu_ring_write(ring, 0);
3382 amdgpu_ring_commit(ring);
3387 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3389 struct amdgpu_ring *ring;
3392 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3394 /* Set the write pointer delay */
3395 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3397 /* set the RB to use vmid 0 */
3398 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3400 /* Set ring buffer size */
3401 ring = &adev->gfx.gfx_ring[0];
3402 rb_bufsz = order_base_2(ring->ring_size / 8);
3403 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3404 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3406 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3408 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3410 /* Initialize the ring buffer's write pointers */
3412 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3413 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3415 /* set the wb address wether it's enabled or not */
3416 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3417 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3418 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3420 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3421 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3422 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3425 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3427 rb_addr = ring->gpu_addr >> 8;
3428 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3429 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3431 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3432 if (ring->use_doorbell) {
3433 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3434 DOORBELL_OFFSET, ring->doorbell_index);
3435 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3438 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3440 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3442 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3443 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3444 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3446 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3447 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3450 /* start the ring */
3451 gfx_v9_0_cp_gfx_start(adev);
3452 ring->sched.ready = true;
3457 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3460 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3462 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3463 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3464 adev->gfx.kiq.ring.sched.ready = false;
3469 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3471 const struct gfx_firmware_header_v1_0 *mec_hdr;
3472 const __le32 *fw_data;
3476 if (!adev->gfx.mec_fw)
3479 gfx_v9_0_cp_compute_enable(adev, false);
3481 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3482 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3484 fw_data = (const __le32 *)
3485 (adev->gfx.mec_fw->data +
3486 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3488 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3489 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3490 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3492 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3493 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3494 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3495 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3498 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3499 mec_hdr->jt_offset);
3500 for (i = 0; i < mec_hdr->jt_size; i++)
3501 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3502 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3504 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3505 adev->gfx.mec_fw_version);
3506 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3512 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3515 struct amdgpu_device *adev = ring->adev;
3517 /* tell RLC which is KIQ queue */
3518 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3520 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3521 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3523 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3526 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3528 struct amdgpu_device *adev = ring->adev;
3530 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3531 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3532 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3533 mqd->cp_hqd_queue_priority =
3534 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3539 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3541 struct amdgpu_device *adev = ring->adev;
3542 struct v9_mqd *mqd = ring->mqd_ptr;
3543 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3546 mqd->header = 0xC0310800;
3547 mqd->compute_pipelinestat_enable = 0x00000001;
3548 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3549 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3550 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3551 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3552 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3553 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3554 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3555 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3556 mqd->compute_misc_reserved = 0x00000003;
3558 mqd->dynamic_cu_mask_addr_lo =
3559 lower_32_bits(ring->mqd_gpu_addr
3560 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3561 mqd->dynamic_cu_mask_addr_hi =
3562 upper_32_bits(ring->mqd_gpu_addr
3563 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3565 eop_base_addr = ring->eop_gpu_addr >> 8;
3566 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3567 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3569 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3570 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3571 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3572 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3574 mqd->cp_hqd_eop_control = tmp;
3576 /* enable doorbell? */
3577 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3579 if (ring->use_doorbell) {
3580 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3581 DOORBELL_OFFSET, ring->doorbell_index);
3582 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3584 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3585 DOORBELL_SOURCE, 0);
3586 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3589 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3593 mqd->cp_hqd_pq_doorbell_control = tmp;
3595 /* disable the queue if it's active */
3597 mqd->cp_hqd_dequeue_request = 0;
3598 mqd->cp_hqd_pq_rptr = 0;
3599 mqd->cp_hqd_pq_wptr_lo = 0;
3600 mqd->cp_hqd_pq_wptr_hi = 0;
3602 /* set the pointer to the MQD */
3603 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3604 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3606 /* set MQD vmid to 0 */
3607 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3608 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3609 mqd->cp_mqd_control = tmp;
3611 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3612 hqd_gpu_addr = ring->gpu_addr >> 8;
3613 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3614 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3616 /* set up the HQD, this is similar to CP_RB0_CNTL */
3617 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3618 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3619 (order_base_2(ring->ring_size / 4) - 1));
3620 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3621 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3623 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3625 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3626 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3627 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3628 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3629 mqd->cp_hqd_pq_control = tmp;
3631 /* set the wb address whether it's enabled or not */
3632 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3633 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3634 mqd->cp_hqd_pq_rptr_report_addr_hi =
3635 upper_32_bits(wb_gpu_addr) & 0xffff;
3637 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3638 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3639 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3640 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3643 /* enable the doorbell if requested */
3644 if (ring->use_doorbell) {
3645 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3646 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3647 DOORBELL_OFFSET, ring->doorbell_index);
3649 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3651 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3652 DOORBELL_SOURCE, 0);
3653 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3657 mqd->cp_hqd_pq_doorbell_control = tmp;
3659 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3661 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3663 /* set the vmid for the queue */
3664 mqd->cp_hqd_vmid = 0;
3666 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3667 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3668 mqd->cp_hqd_persistent_state = tmp;
3670 /* set MIN_IB_AVAIL_SIZE */
3671 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3672 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3673 mqd->cp_hqd_ib_control = tmp;
3675 /* set static priority for a queue/ring */
3676 gfx_v9_0_mqd_set_priority(ring, mqd);
3677 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3679 /* map_queues packet doesn't need activate the queue,
3680 * so only kiq need set this field.
3682 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3683 mqd->cp_hqd_active = 1;
3688 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3690 struct amdgpu_device *adev = ring->adev;
3691 struct v9_mqd *mqd = ring->mqd_ptr;
3694 /* disable wptr polling */
3695 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3697 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3698 mqd->cp_hqd_eop_base_addr_lo);
3699 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3700 mqd->cp_hqd_eop_base_addr_hi);
3702 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3703 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3704 mqd->cp_hqd_eop_control);
3706 /* enable doorbell? */
3707 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3708 mqd->cp_hqd_pq_doorbell_control);
3710 /* disable the queue if it's active */
3711 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3712 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3713 for (j = 0; j < adev->usec_timeout; j++) {
3714 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3718 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3719 mqd->cp_hqd_dequeue_request);
3720 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3721 mqd->cp_hqd_pq_rptr);
3722 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3723 mqd->cp_hqd_pq_wptr_lo);
3724 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3725 mqd->cp_hqd_pq_wptr_hi);
3728 /* set the pointer to the MQD */
3729 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3730 mqd->cp_mqd_base_addr_lo);
3731 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3732 mqd->cp_mqd_base_addr_hi);
3734 /* set MQD vmid to 0 */
3735 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3736 mqd->cp_mqd_control);
3738 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3739 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3740 mqd->cp_hqd_pq_base_lo);
3741 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3742 mqd->cp_hqd_pq_base_hi);
3744 /* set up the HQD, this is similar to CP_RB0_CNTL */
3745 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3746 mqd->cp_hqd_pq_control);
3748 /* set the wb address whether it's enabled or not */
3749 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3750 mqd->cp_hqd_pq_rptr_report_addr_lo);
3751 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3752 mqd->cp_hqd_pq_rptr_report_addr_hi);
3754 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3755 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3756 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3757 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3758 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3760 /* enable the doorbell if requested */
3761 if (ring->use_doorbell) {
3762 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3763 (adev->doorbell_index.kiq * 2) << 2);
3764 /* If GC has entered CGPG, ringing doorbell > first page
3765 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3766 * workaround this issue. And this change has to align with firmware
3769 if (check_if_enlarge_doorbell_range(adev))
3770 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3771 (adev->doorbell.size - 4));
3773 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3774 (adev->doorbell_index.userqueue_end * 2) << 2);
3777 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3778 mqd->cp_hqd_pq_doorbell_control);
3780 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3781 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3782 mqd->cp_hqd_pq_wptr_lo);
3783 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3784 mqd->cp_hqd_pq_wptr_hi);
3786 /* set the vmid for the queue */
3787 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3789 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3790 mqd->cp_hqd_persistent_state);
3792 /* activate the queue */
3793 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3794 mqd->cp_hqd_active);
3796 if (ring->use_doorbell)
3797 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3802 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3804 struct amdgpu_device *adev = ring->adev;
3807 /* disable the queue if it's active */
3808 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3810 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3812 for (j = 0; j < adev->usec_timeout; j++) {
3813 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3818 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3819 DRM_DEBUG("KIQ dequeue request failed.\n");
3821 /* Manual disable if dequeue request times out */
3822 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3825 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3829 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3830 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3831 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3832 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3833 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3834 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3835 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3836 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3841 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3843 struct amdgpu_device *adev = ring->adev;
3844 struct v9_mqd *mqd = ring->mqd_ptr;
3845 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3846 struct v9_mqd *tmp_mqd;
3848 gfx_v9_0_kiq_setting(ring);
3850 /* GPU could be in bad state during probe, driver trigger the reset
3851 * after load the SMU, in this case , the mqd is not be initialized.
3852 * driver need to re-init the mqd.
3853 * check mqd->cp_hqd_pq_control since this value should not be 0
3855 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3856 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3857 /* for GPU_RESET case , reset MQD to a clean status */
3858 if (adev->gfx.mec.mqd_backup[mqd_idx])
3859 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3861 /* reset ring buffer */
3863 amdgpu_ring_clear_ring(ring);
3865 mutex_lock(&adev->srbm_mutex);
3866 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3867 gfx_v9_0_kiq_init_register(ring);
3868 soc15_grbm_select(adev, 0, 0, 0, 0);
3869 mutex_unlock(&adev->srbm_mutex);
3871 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3872 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3873 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3874 mutex_lock(&adev->srbm_mutex);
3875 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3876 gfx_v9_0_mqd_init(ring);
3877 gfx_v9_0_kiq_init_register(ring);
3878 soc15_grbm_select(adev, 0, 0, 0, 0);
3879 mutex_unlock(&adev->srbm_mutex);
3881 if (adev->gfx.mec.mqd_backup[mqd_idx])
3882 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3888 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3890 struct amdgpu_device *adev = ring->adev;
3891 struct v9_mqd *mqd = ring->mqd_ptr;
3892 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3893 struct v9_mqd *tmp_mqd;
3895 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3896 * is not be initialized before
3898 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3900 if (!tmp_mqd->cp_hqd_pq_control ||
3901 (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3902 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3903 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3904 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3905 mutex_lock(&adev->srbm_mutex);
3906 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3907 gfx_v9_0_mqd_init(ring);
3908 soc15_grbm_select(adev, 0, 0, 0, 0);
3909 mutex_unlock(&adev->srbm_mutex);
3911 if (adev->gfx.mec.mqd_backup[mqd_idx])
3912 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3913 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3914 /* reset MQD to a clean status */
3915 if (adev->gfx.mec.mqd_backup[mqd_idx])
3916 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3918 /* reset ring buffer */
3920 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3921 amdgpu_ring_clear_ring(ring);
3923 amdgpu_ring_clear_ring(ring);
3929 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3931 struct amdgpu_ring *ring;
3934 ring = &adev->gfx.kiq.ring;
3936 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3937 if (unlikely(r != 0))
3940 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3941 if (unlikely(r != 0))
3944 gfx_v9_0_kiq_init_queue(ring);
3945 amdgpu_bo_kunmap(ring->mqd_obj);
3946 ring->mqd_ptr = NULL;
3947 amdgpu_bo_unreserve(ring->mqd_obj);
3948 ring->sched.ready = true;
3952 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3954 struct amdgpu_ring *ring = NULL;
3957 gfx_v9_0_cp_compute_enable(adev, true);
3959 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3960 ring = &adev->gfx.compute_ring[i];
3962 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3963 if (unlikely(r != 0))
3965 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3967 r = gfx_v9_0_kcq_init_queue(ring);
3968 amdgpu_bo_kunmap(ring->mqd_obj);
3969 ring->mqd_ptr = NULL;
3971 amdgpu_bo_unreserve(ring->mqd_obj);
3976 r = amdgpu_gfx_enable_kcq(adev);
3981 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3984 struct amdgpu_ring *ring;
3986 if (!(adev->flags & AMD_IS_APU))
3987 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3989 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3990 if (adev->gfx.num_gfx_rings) {
3991 /* legacy firmware loading */
3992 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3997 r = gfx_v9_0_cp_compute_load_microcode(adev);
4002 r = gfx_v9_0_kiq_resume(adev);
4006 if (adev->gfx.num_gfx_rings) {
4007 r = gfx_v9_0_cp_gfx_resume(adev);
4012 r = gfx_v9_0_kcq_resume(adev);
4016 if (adev->gfx.num_gfx_rings) {
4017 ring = &adev->gfx.gfx_ring[0];
4018 r = amdgpu_ring_test_helper(ring);
4023 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4024 ring = &adev->gfx.compute_ring[i];
4025 amdgpu_ring_test_helper(ring);
4028 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
4033 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
4037 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
4038 adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
4041 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
4042 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
4043 adev->df.hash_status.hash_64k);
4044 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
4045 adev->df.hash_status.hash_2m);
4046 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
4047 adev->df.hash_status.hash_1g);
4048 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
4051 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
4053 if (adev->gfx.num_gfx_rings)
4054 gfx_v9_0_cp_gfx_enable(adev, enable);
4055 gfx_v9_0_cp_compute_enable(adev, enable);
4058 static int gfx_v9_0_hw_init(void *handle)
4061 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4063 if (!amdgpu_sriov_vf(adev))
4064 gfx_v9_0_init_golden_registers(adev);
4066 gfx_v9_0_constants_init(adev);
4068 gfx_v9_0_init_tcp_config(adev);
4070 r = adev->gfx.rlc.funcs->resume(adev);
4074 r = gfx_v9_0_cp_resume(adev);
4078 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4079 gfx_v9_4_2_set_power_brake_sequence(adev);
4084 static int gfx_v9_0_hw_fini(void *handle)
4086 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4088 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4089 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4090 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4092 /* DF freeze and kcq disable will fail */
4093 if (!amdgpu_ras_intr_triggered())
4094 /* disable KCQ to avoid CPC touch memory not valid anymore */
4095 amdgpu_gfx_disable_kcq(adev);
4097 if (amdgpu_sriov_vf(adev)) {
4098 gfx_v9_0_cp_gfx_enable(adev, false);
4099 /* must disable polling for SRIOV when hw finished, otherwise
4100 * CPC engine may still keep fetching WB address which is already
4101 * invalid after sw finished and trigger DMAR reading error in
4104 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4108 /* Use deinitialize sequence from CAIL when unbinding device from driver,
4109 * otherwise KIQ is hanging when binding back
4111 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4112 mutex_lock(&adev->srbm_mutex);
4113 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4114 adev->gfx.kiq.ring.pipe,
4115 adev->gfx.kiq.ring.queue, 0);
4116 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4117 soc15_grbm_select(adev, 0, 0, 0, 0);
4118 mutex_unlock(&adev->srbm_mutex);
4121 gfx_v9_0_cp_enable(adev, false);
4123 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4124 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4125 (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
4126 dev_dbg(adev->dev, "Skipping RLC halt\n");
4130 adev->gfx.rlc.funcs->stop(adev);
4134 static int gfx_v9_0_suspend(void *handle)
4136 return gfx_v9_0_hw_fini(handle);
4139 static int gfx_v9_0_resume(void *handle)
4141 return gfx_v9_0_hw_init(handle);
4144 static bool gfx_v9_0_is_idle(void *handle)
4146 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4148 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4149 GRBM_STATUS, GUI_ACTIVE))
4155 static int gfx_v9_0_wait_for_idle(void *handle)
4158 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4160 for (i = 0; i < adev->usec_timeout; i++) {
4161 if (gfx_v9_0_is_idle(handle))
4168 static int gfx_v9_0_soft_reset(void *handle)
4170 u32 grbm_soft_reset = 0;
4172 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4175 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4176 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4177 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4178 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4179 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4180 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4181 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4182 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4183 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4184 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4185 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4188 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4189 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4190 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4194 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4195 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4196 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4197 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4200 if (grbm_soft_reset) {
4202 adev->gfx.rlc.funcs->stop(adev);
4204 if (adev->gfx.num_gfx_rings)
4205 /* Disable GFX parsing/prefetching */
4206 gfx_v9_0_cp_gfx_enable(adev, false);
4208 /* Disable MEC parsing/prefetching */
4209 gfx_v9_0_cp_compute_enable(adev, false);
4211 if (grbm_soft_reset) {
4212 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4213 tmp |= grbm_soft_reset;
4214 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4215 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4216 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4220 tmp &= ~grbm_soft_reset;
4221 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4222 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4225 /* Wait a little for things to settle down */
4231 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4233 signed long r, cnt = 0;
4234 unsigned long flags;
4235 uint32_t seq, reg_val_offs = 0;
4237 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4238 struct amdgpu_ring *ring = &kiq->ring;
4240 BUG_ON(!ring->funcs->emit_rreg);
4242 spin_lock_irqsave(&kiq->ring_lock, flags);
4243 if (amdgpu_device_wb_get(adev, ®_val_offs)) {
4244 pr_err("critical bug! too many kiq readers\n");
4247 amdgpu_ring_alloc(ring, 32);
4248 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4249 amdgpu_ring_write(ring, 9 | /* src: register*/
4250 (5 << 8) | /* dst: memory */
4251 (1 << 16) | /* count sel */
4252 (1 << 20)); /* write confirm */
4253 amdgpu_ring_write(ring, 0);
4254 amdgpu_ring_write(ring, 0);
4255 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4257 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4259 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4263 amdgpu_ring_commit(ring);
4264 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4266 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4268 /* don't wait anymore for gpu reset case because this way may
4269 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4270 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4271 * never return if we keep waiting in virt_kiq_rreg, which cause
4272 * gpu_recover() hang there.
4274 * also don't wait anymore for IRQ context
4276 if (r < 1 && (amdgpu_in_reset(adev)))
4277 goto failed_kiq_read;
4280 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4281 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4282 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4285 if (cnt > MAX_KIQ_REG_TRY)
4286 goto failed_kiq_read;
4289 value = (uint64_t)adev->wb.wb[reg_val_offs] |
4290 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4291 amdgpu_device_wb_free(adev, reg_val_offs);
4295 amdgpu_ring_undo(ring);
4297 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4300 amdgpu_device_wb_free(adev, reg_val_offs);
4301 pr_err("failed to read gpu clock\n");
4305 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4307 uint64_t clock, clock_lo, clock_hi, hi_check;
4309 switch (adev->ip_versions[GC_HWIP][0]) {
4310 case IP_VERSION(9, 3, 0):
4312 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4313 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4314 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4315 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4316 * roughly every 42 seconds.
4318 if (hi_check != clock_hi) {
4319 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4320 clock_hi = hi_check;
4323 clock = clock_lo | (clock_hi << 32ULL);
4326 amdgpu_gfx_off_ctrl(adev, false);
4327 mutex_lock(&adev->gfx.gpu_clock_mutex);
4328 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4329 clock = gfx_v9_0_kiq_read_clock(adev);
4331 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4332 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4333 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4335 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4336 amdgpu_gfx_off_ctrl(adev, true);
4342 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4344 uint32_t gds_base, uint32_t gds_size,
4345 uint32_t gws_base, uint32_t gws_size,
4346 uint32_t oa_base, uint32_t oa_size)
4348 struct amdgpu_device *adev = ring->adev;
4351 gfx_v9_0_write_data_to_reg(ring, 0, false,
4352 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4356 gfx_v9_0_write_data_to_reg(ring, 0, false,
4357 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4361 gfx_v9_0_write_data_to_reg(ring, 0, false,
4362 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4363 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4366 gfx_v9_0_write_data_to_reg(ring, 0, false,
4367 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4368 (1 << (oa_size + oa_base)) - (1 << oa_base));
4371 static const u32 vgpr_init_compute_shader[] =
4373 0xb07c0000, 0xbe8000ff,
4374 0x000000f8, 0xbf110800,
4375 0x7e000280, 0x7e020280,
4376 0x7e040280, 0x7e060280,
4377 0x7e080280, 0x7e0a0280,
4378 0x7e0c0280, 0x7e0e0280,
4379 0x80808800, 0xbe803200,
4380 0xbf84fff5, 0xbf9c0000,
4381 0xd28c0001, 0x0001007f,
4382 0xd28d0001, 0x0002027e,
4383 0x10020288, 0xb8810904,
4384 0xb7814000, 0xd1196a01,
4385 0x00000301, 0xbe800087,
4386 0xbefc00c1, 0xd89c4000,
4387 0x00020201, 0xd89cc080,
4388 0x00040401, 0x320202ff,
4389 0x00000800, 0x80808100,
4390 0xbf84fff8, 0x7e020280,
4391 0xbf810000, 0x00000000,
4394 static const u32 sgpr_init_compute_shader[] =
4396 0xb07c0000, 0xbe8000ff,
4397 0x0000005f, 0xbee50080,
4398 0xbe812c65, 0xbe822c65,
4399 0xbe832c65, 0xbe842c65,
4400 0xbe852c65, 0xb77c0005,
4401 0x80808500, 0xbf84fff8,
4402 0xbe800080, 0xbf810000,
4405 static const u32 vgpr_init_compute_shader_arcturus[] = {
4406 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4407 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4408 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4409 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4410 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4411 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4412 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4413 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4414 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4415 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4416 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4417 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4418 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4419 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4420 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4421 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4422 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4423 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4424 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4425 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4426 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4427 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4428 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4429 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4430 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4431 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4432 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4433 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4434 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4435 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4436 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4437 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4438 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4439 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4440 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4441 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4442 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4443 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4444 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4445 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4446 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4447 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4448 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4449 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4450 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4451 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4452 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4453 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4454 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4455 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4456 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4457 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4458 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4459 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4460 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4461 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4462 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4463 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4464 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4465 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4466 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4467 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4468 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4469 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4470 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4471 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4472 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4473 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4474 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4475 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4476 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4477 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4478 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4479 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4480 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4481 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4482 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4483 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4484 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4485 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4486 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4487 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4488 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4489 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4490 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4491 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4492 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4493 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4494 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4495 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4496 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4497 0xbf84fff8, 0xbf810000,
4500 /* When below register arrays changed, please update gpr_reg_size,
4501 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4502 to cover all gfx9 ASICs */
4503 static const struct soc15_reg_entry vgpr_init_regs[] = {
4504 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4505 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4506 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4507 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4508 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4509 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4510 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4511 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4512 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4513 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4514 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4515 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4516 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4517 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4520 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4521 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4522 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4523 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4524 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4525 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4526 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4527 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4528 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4529 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4530 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4531 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4532 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4533 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4534 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4537 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4538 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4539 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4540 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4541 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4542 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4543 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4544 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4545 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4546 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4547 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4548 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4549 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4550 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4551 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4554 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4555 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4556 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4557 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4558 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4559 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4560 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4561 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4562 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4563 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4564 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4565 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4566 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4567 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4568 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4571 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4572 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4573 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4574 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4575 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4576 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4577 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4578 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4579 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4580 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4581 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4582 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4583 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4584 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4585 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4586 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4587 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4588 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4589 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4590 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4591 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4592 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4593 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4594 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4595 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4596 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4597 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4598 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4599 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4600 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4601 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4602 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4603 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4604 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4607 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4609 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4612 /* only support when RAS is enabled */
4613 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4616 r = amdgpu_ring_alloc(ring, 7);
4618 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4623 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4624 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4626 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4627 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4628 PACKET3_DMA_DATA_DST_SEL(1) |
4629 PACKET3_DMA_DATA_SRC_SEL(2) |
4630 PACKET3_DMA_DATA_ENGINE(0)));
4631 amdgpu_ring_write(ring, 0);
4632 amdgpu_ring_write(ring, 0);
4633 amdgpu_ring_write(ring, 0);
4634 amdgpu_ring_write(ring, 0);
4635 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4636 adev->gds.gds_size);
4638 amdgpu_ring_commit(ring);
4640 for (i = 0; i < adev->usec_timeout; i++) {
4641 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4646 if (i >= adev->usec_timeout)
4649 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4654 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4656 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4657 struct amdgpu_ib ib;
4658 struct dma_fence *f = NULL;
4660 unsigned total_size, vgpr_offset, sgpr_offset;
4663 int compute_dim_x = adev->gfx.config.max_shader_engines *
4664 adev->gfx.config.max_cu_per_sh *
4665 adev->gfx.config.max_sh_per_se;
4666 int sgpr_work_group_size = 5;
4667 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4668 int vgpr_init_shader_size;
4669 const u32 *vgpr_init_shader_ptr;
4670 const struct soc15_reg_entry *vgpr_init_regs_ptr;
4672 /* only support when RAS is enabled */
4673 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4676 /* bail if the compute ring is not ready */
4677 if (!ring->sched.ready)
4680 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4681 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4682 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4683 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4685 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4686 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4687 vgpr_init_regs_ptr = vgpr_init_regs;
4691 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4693 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4695 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4696 total_size = ALIGN(total_size, 256);
4697 vgpr_offset = total_size;
4698 total_size += ALIGN(vgpr_init_shader_size, 256);
4699 sgpr_offset = total_size;
4700 total_size += sizeof(sgpr_init_compute_shader);
4702 /* allocate an indirect buffer to put the commands in */
4703 memset(&ib, 0, sizeof(ib));
4704 r = amdgpu_ib_get(adev, NULL, total_size,
4705 AMDGPU_IB_POOL_DIRECT, &ib);
4707 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4711 /* load the compute shaders */
4712 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4713 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4715 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4716 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4718 /* init the ib length to 0 */
4722 /* write the register state for the compute dispatch */
4723 for (i = 0; i < gpr_reg_size; i++) {
4724 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4725 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4726 - PACKET3_SET_SH_REG_START;
4727 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4729 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4730 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4731 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4732 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4733 - PACKET3_SET_SH_REG_START;
4734 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4735 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4737 /* write dispatch packet */
4738 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4739 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4740 ib.ptr[ib.length_dw++] = 1; /* y */
4741 ib.ptr[ib.length_dw++] = 1; /* z */
4742 ib.ptr[ib.length_dw++] =
4743 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4745 /* write CS partial flush packet */
4746 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4747 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4750 /* write the register state for the compute dispatch */
4751 for (i = 0; i < gpr_reg_size; i++) {
4752 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4753 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4754 - PACKET3_SET_SH_REG_START;
4755 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4757 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4758 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4760 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4761 - PACKET3_SET_SH_REG_START;
4762 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4763 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4765 /* write dispatch packet */
4766 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4767 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4768 ib.ptr[ib.length_dw++] = 1; /* y */
4769 ib.ptr[ib.length_dw++] = 1; /* z */
4770 ib.ptr[ib.length_dw++] =
4771 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4773 /* write CS partial flush packet */
4774 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4775 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4778 /* write the register state for the compute dispatch */
4779 for (i = 0; i < gpr_reg_size; i++) {
4780 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4781 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4782 - PACKET3_SET_SH_REG_START;
4783 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4785 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4786 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4787 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4788 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4789 - PACKET3_SET_SH_REG_START;
4790 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4791 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4793 /* write dispatch packet */
4794 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4795 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4796 ib.ptr[ib.length_dw++] = 1; /* y */
4797 ib.ptr[ib.length_dw++] = 1; /* z */
4798 ib.ptr[ib.length_dw++] =
4799 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4801 /* write CS partial flush packet */
4802 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4803 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4805 /* shedule the ib on the ring */
4806 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4808 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4812 /* wait for the GPU to finish processing the IB */
4813 r = dma_fence_wait(f, false);
4815 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4820 amdgpu_ib_free(adev, &ib, NULL);
4826 static int gfx_v9_0_early_init(void *handle)
4828 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4830 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4831 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4832 adev->gfx.num_gfx_rings = 0;
4834 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4835 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4836 AMDGPU_MAX_COMPUTE_RINGS);
4837 gfx_v9_0_set_kiq_pm4_funcs(adev);
4838 gfx_v9_0_set_ring_funcs(adev);
4839 gfx_v9_0_set_irq_funcs(adev);
4840 gfx_v9_0_set_gds_init(adev);
4841 gfx_v9_0_set_rlc_funcs(adev);
4846 static int gfx_v9_0_ecc_late_init(void *handle)
4848 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4852 * Temp workaround to fix the issue that CP firmware fails to
4853 * update read pointer when CPDMA is writing clearing operation
4854 * to GDS in suspend/resume sequence on several cards. So just
4855 * limit this operation in cold boot sequence.
4857 if ((!adev->in_suspend) &&
4858 (adev->gds.gds_size)) {
4859 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4864 /* requires IBs so do in late init after IB pool is initialized */
4865 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4866 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4868 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4873 if (adev->gfx.ras_funcs &&
4874 adev->gfx.ras_funcs->ras_late_init) {
4875 r = adev->gfx.ras_funcs->ras_late_init(adev);
4880 if (adev->gfx.ras_funcs &&
4881 adev->gfx.ras_funcs->enable_watchdog_timer)
4882 adev->gfx.ras_funcs->enable_watchdog_timer(adev);
4887 static int gfx_v9_0_late_init(void *handle)
4889 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4892 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4896 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4900 r = gfx_v9_0_ecc_late_init(handle);
4907 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4909 uint32_t rlc_setting;
4911 /* if RLC is not enabled, do nothing */
4912 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4913 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4919 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4924 data = RLC_SAFE_MODE__CMD_MASK;
4925 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4926 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4928 /* wait for RLC_SAFE_MODE */
4929 for (i = 0; i < adev->usec_timeout; i++) {
4930 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4936 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4940 data = RLC_SAFE_MODE__CMD_MASK;
4941 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4944 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4947 amdgpu_gfx_rlc_enter_safe_mode(adev);
4949 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4950 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4951 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4952 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4954 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4955 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4956 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4959 amdgpu_gfx_rlc_exit_safe_mode(adev);
4962 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4965 /* TODO: double check if we need to perform under safe mode */
4966 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4968 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4969 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4971 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4973 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4974 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4976 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4978 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4981 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4986 amdgpu_gfx_rlc_enter_safe_mode(adev);
4988 /* It is disabled by HW by default */
4989 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4990 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4991 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4993 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4994 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4996 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4997 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4998 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
5000 /* only for Vega10 & Raven1 */
5001 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
5004 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5006 /* MGLS is a global flag to control all MGLS in GFX */
5007 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5008 /* 2 - RLC memory Light sleep */
5009 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5010 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
5011 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5013 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
5015 /* 3 - CP memory Light sleep */
5016 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5017 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5018 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5020 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5024 /* 1 - MGCG_OVERRIDE */
5025 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5027 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
5028 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
5030 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5031 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5032 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
5033 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
5036 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5038 /* 2 - disable MGLS in RLC */
5039 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
5040 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5041 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5042 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
5045 /* 3 - disable MGLS in CP */
5046 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5047 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5048 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5049 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5053 amdgpu_gfx_rlc_exit_safe_mode(adev);
5056 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
5061 if (!adev->gfx.num_gfx_rings)
5064 amdgpu_gfx_rlc_enter_safe_mode(adev);
5066 /* Enable 3D CGCG/CGLS */
5068 /* write cmd to clear cgcg/cgls ov */
5069 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5070 /* unset CGCG override */
5071 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5072 /* update CGCG and CGLS override bits */
5074 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5076 /* enable 3Dcgcg FSM(0x0000363f) */
5077 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5079 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5080 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5081 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5083 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5085 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5086 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5087 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5089 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5091 /* set IDLE_POLL_COUNT(0x00900100) */
5092 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5093 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5094 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5096 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5098 /* Disable CGCG/CGLS */
5099 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5100 /* disable cgcg, cgls should be disabled */
5101 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5102 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5103 /* disable cgcg and cgls in FSM */
5105 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5108 amdgpu_gfx_rlc_exit_safe_mode(adev);
5111 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5116 amdgpu_gfx_rlc_enter_safe_mode(adev);
5118 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5119 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5120 /* unset CGCG override */
5121 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5122 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5123 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5125 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5126 /* update CGCG and CGLS override bits */
5128 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5130 /* enable cgcg FSM(0x0000363F) */
5131 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5133 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5134 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5135 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5137 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5138 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5139 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5140 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5141 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5143 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5145 /* set IDLE_POLL_COUNT(0x00900100) */
5146 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5147 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5148 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5150 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5152 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5153 /* reset CGCG/CGLS bits */
5154 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5155 /* disable cgcg and cgls in FSM */
5157 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5160 amdgpu_gfx_rlc_exit_safe_mode(adev);
5163 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5167 /* CGCG/CGLS should be enabled after MGCG/MGLS
5168 * === MGCG + MGLS ===
5170 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5171 /* === CGCG /CGLS for GFX 3D Only === */
5172 gfx_v9_0_update_3d_clock_gating(adev, enable);
5173 /* === CGCG + CGLS === */
5174 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5176 /* CGCG/CGLS should be disabled before MGCG/MGLS
5177 * === CGCG + CGLS ===
5179 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5180 /* === CGCG /CGLS for GFX 3D Only === */
5181 gfx_v9_0_update_3d_clock_gating(adev, enable);
5182 /* === MGCG + MGLS === */
5183 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5188 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5192 amdgpu_gfx_off_ctrl(adev, false);
5194 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5195 if (amdgpu_sriov_is_pp_one_vf(adev))
5196 data = RREG32_NO_KIQ(reg);
5198 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5200 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5201 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5203 if (amdgpu_sriov_is_pp_one_vf(adev))
5204 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5206 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5208 amdgpu_gfx_off_ctrl(adev, true);
5211 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5213 struct soc15_reg_rlcg *entries, int arr_size)
5221 for (i = 0; i < arr_size; i++) {
5222 const struct soc15_reg_rlcg *entry;
5224 entry = &entries[i];
5225 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5233 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5235 return gfx_v9_0_check_rlcg_range(adev, offset,
5236 (void *)rlcg_access_gc_9_0,
5237 ARRAY_SIZE(rlcg_access_gc_9_0));
5240 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5241 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5242 .set_safe_mode = gfx_v9_0_set_safe_mode,
5243 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5244 .init = gfx_v9_0_rlc_init,
5245 .get_csb_size = gfx_v9_0_get_csb_size,
5246 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5247 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5248 .resume = gfx_v9_0_rlc_resume,
5249 .stop = gfx_v9_0_rlc_stop,
5250 .reset = gfx_v9_0_rlc_reset,
5251 .start = gfx_v9_0_rlc_start,
5252 .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5253 .sriov_wreg = gfx_v9_0_sriov_wreg,
5254 .sriov_rreg = gfx_v9_0_sriov_rreg,
5255 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5258 static int gfx_v9_0_set_powergating_state(void *handle,
5259 enum amd_powergating_state state)
5261 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5262 bool enable = (state == AMD_PG_STATE_GATE);
5264 switch (adev->ip_versions[GC_HWIP][0]) {
5265 case IP_VERSION(9, 2, 2):
5266 case IP_VERSION(9, 1, 0):
5267 case IP_VERSION(9, 3, 0):
5269 amdgpu_gfx_off_ctrl(adev, false);
5271 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5272 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5273 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5275 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5276 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5279 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5280 gfx_v9_0_enable_cp_power_gating(adev, true);
5282 gfx_v9_0_enable_cp_power_gating(adev, false);
5284 /* update gfx cgpg state */
5285 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5287 /* update mgcg state */
5288 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5291 amdgpu_gfx_off_ctrl(adev, true);
5293 case IP_VERSION(9, 2, 1):
5294 amdgpu_gfx_off_ctrl(adev, enable);
5303 static int gfx_v9_0_set_clockgating_state(void *handle,
5304 enum amd_clockgating_state state)
5306 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5308 if (amdgpu_sriov_vf(adev))
5311 switch (adev->ip_versions[GC_HWIP][0]) {
5312 case IP_VERSION(9, 0, 1):
5313 case IP_VERSION(9, 2, 1):
5314 case IP_VERSION(9, 4, 0):
5315 case IP_VERSION(9, 2, 2):
5316 case IP_VERSION(9, 1, 0):
5317 case IP_VERSION(9, 4, 1):
5318 case IP_VERSION(9, 3, 0):
5319 case IP_VERSION(9, 4, 2):
5320 gfx_v9_0_update_gfx_clock_gating(adev,
5321 state == AMD_CG_STATE_GATE);
5329 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5331 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5334 if (amdgpu_sriov_vf(adev))
5337 /* AMD_CG_SUPPORT_GFX_MGCG */
5338 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5339 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5340 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5342 /* AMD_CG_SUPPORT_GFX_CGCG */
5343 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5344 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5345 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5347 /* AMD_CG_SUPPORT_GFX_CGLS */
5348 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5349 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5351 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5352 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5353 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5354 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5356 /* AMD_CG_SUPPORT_GFX_CP_LS */
5357 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5358 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5359 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5361 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5362 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5363 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5364 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5365 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5367 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5368 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5369 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5373 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5375 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5378 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5380 struct amdgpu_device *adev = ring->adev;
5383 /* XXX check if swapping is necessary on BE */
5384 if (ring->use_doorbell) {
5385 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5387 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5388 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5394 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5396 struct amdgpu_device *adev = ring->adev;
5398 if (ring->use_doorbell) {
5399 /* XXX check if swapping is necessary on BE */
5400 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5401 WDOORBELL64(ring->doorbell_index, ring->wptr);
5403 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5404 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5408 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5410 struct amdgpu_device *adev = ring->adev;
5411 u32 ref_and_mask, reg_mem_engine;
5412 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5414 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5417 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5420 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5427 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5428 reg_mem_engine = 1; /* pfp */
5431 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5432 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5433 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5434 ref_and_mask, ref_and_mask, 0x20);
5437 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5438 struct amdgpu_job *job,
5439 struct amdgpu_ib *ib,
5442 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5443 u32 header, control = 0;
5445 if (ib->flags & AMDGPU_IB_FLAG_CE)
5446 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5448 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5450 control |= ib->length_dw | (vmid << 24);
5452 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5453 control |= INDIRECT_BUFFER_PRE_ENB(1);
5455 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5456 gfx_v9_0_ring_emit_de_meta(ring);
5459 amdgpu_ring_write(ring, header);
5460 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5461 amdgpu_ring_write(ring,
5465 lower_32_bits(ib->gpu_addr));
5466 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5467 amdgpu_ring_write(ring, control);
5470 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5471 struct amdgpu_job *job,
5472 struct amdgpu_ib *ib,
5475 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5476 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5478 /* Currently, there is a high possibility to get wave ID mismatch
5479 * between ME and GDS, leading to a hw deadlock, because ME generates
5480 * different wave IDs than the GDS expects. This situation happens
5481 * randomly when at least 5 compute pipes use GDS ordered append.
5482 * The wave IDs generated by ME are also wrong after suspend/resume.
5483 * Those are probably bugs somewhere else in the kernel driver.
5485 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5486 * GDS to 0 for this ring (me/pipe).
5488 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5489 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5490 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5491 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5494 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5495 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5496 amdgpu_ring_write(ring,
5500 lower_32_bits(ib->gpu_addr));
5501 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5502 amdgpu_ring_write(ring, control);
5505 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5506 u64 seq, unsigned flags)
5508 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5509 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5510 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5512 /* RELEASE_MEM - flush caches, send int */
5513 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5514 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5515 EOP_TC_NC_ACTION_EN) :
5516 (EOP_TCL1_ACTION_EN |
5518 EOP_TC_WB_ACTION_EN |
5519 EOP_TC_MD_ACTION_EN)) |
5520 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5522 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5525 * the address should be Qword aligned if 64bit write, Dword
5526 * aligned if only send 32bit data low (discard data high)
5532 amdgpu_ring_write(ring, lower_32_bits(addr));
5533 amdgpu_ring_write(ring, upper_32_bits(addr));
5534 amdgpu_ring_write(ring, lower_32_bits(seq));
5535 amdgpu_ring_write(ring, upper_32_bits(seq));
5536 amdgpu_ring_write(ring, 0);
5539 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5541 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5542 uint32_t seq = ring->fence_drv.sync_seq;
5543 uint64_t addr = ring->fence_drv.gpu_addr;
5545 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5546 lower_32_bits(addr), upper_32_bits(addr),
5547 seq, 0xffffffff, 4);
5550 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5551 unsigned vmid, uint64_t pd_addr)
5553 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5555 /* compute doesn't have PFP */
5556 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5557 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5558 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5559 amdgpu_ring_write(ring, 0x0);
5563 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5565 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5568 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5572 /* XXX check if swapping is necessary on BE */
5573 if (ring->use_doorbell)
5574 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5580 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5582 struct amdgpu_device *adev = ring->adev;
5584 /* XXX check if swapping is necessary on BE */
5585 if (ring->use_doorbell) {
5586 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5587 WDOORBELL64(ring->doorbell_index, ring->wptr);
5589 BUG(); /* only DOORBELL method supported on gfx9 now */
5593 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5594 u64 seq, unsigned int flags)
5596 struct amdgpu_device *adev = ring->adev;
5598 /* we only allocate 32bit for each seq wb address */
5599 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5601 /* write fence seq to the "addr" */
5602 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5603 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5604 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5605 amdgpu_ring_write(ring, lower_32_bits(addr));
5606 amdgpu_ring_write(ring, upper_32_bits(addr));
5607 amdgpu_ring_write(ring, lower_32_bits(seq));
5609 if (flags & AMDGPU_FENCE_FLAG_INT) {
5610 /* set register to trigger INT */
5611 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5612 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5613 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5614 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5615 amdgpu_ring_write(ring, 0);
5616 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5620 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5622 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5623 amdgpu_ring_write(ring, 0);
5626 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5628 struct v9_ce_ib_state ce_payload = {0};
5632 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5633 csa_addr = amdgpu_csa_vaddr(ring->adev);
5635 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5636 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5637 WRITE_DATA_DST_SEL(8) |
5639 WRITE_DATA_CACHE_POLICY(0));
5640 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5641 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5642 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5645 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5647 struct v9_de_ib_state de_payload = {0};
5648 uint64_t csa_addr, gds_addr;
5651 csa_addr = amdgpu_csa_vaddr(ring->adev);
5652 gds_addr = csa_addr + 4096;
5653 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5654 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5656 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5657 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5658 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5659 WRITE_DATA_DST_SEL(8) |
5661 WRITE_DATA_CACHE_POLICY(0));
5662 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5663 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5664 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5667 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5670 uint32_t v = secure ? FRAME_TMZ : 0;
5672 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5673 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5676 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5680 if (amdgpu_sriov_vf(ring->adev))
5681 gfx_v9_0_ring_emit_ce_meta(ring);
5683 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5684 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5685 /* set load_global_config & load_global_uconfig */
5687 /* set load_cs_sh_regs */
5689 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5692 /* set load_ce_ram if preamble presented */
5693 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5696 /* still load_ce_ram if this is the first time preamble presented
5697 * although there is no context switch happens.
5699 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5703 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5704 amdgpu_ring_write(ring, dw2);
5705 amdgpu_ring_write(ring, 0);
5708 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5711 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5712 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5713 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5714 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5715 ret = ring->wptr & ring->buf_mask;
5716 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5720 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5723 BUG_ON(offset > ring->buf_mask);
5724 BUG_ON(ring->ring[offset] != 0x55aa55aa);
5726 cur = (ring->wptr & ring->buf_mask) - 1;
5727 if (likely(cur > offset))
5728 ring->ring[offset] = cur - offset;
5730 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5733 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5734 uint32_t reg_val_offs)
5736 struct amdgpu_device *adev = ring->adev;
5738 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5739 amdgpu_ring_write(ring, 0 | /* src: register*/
5740 (5 << 8) | /* dst: memory */
5741 (1 << 20)); /* write confirm */
5742 amdgpu_ring_write(ring, reg);
5743 amdgpu_ring_write(ring, 0);
5744 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5746 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5750 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5755 switch (ring->funcs->type) {
5756 case AMDGPU_RING_TYPE_GFX:
5757 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5759 case AMDGPU_RING_TYPE_KIQ:
5760 cmd = (1 << 16); /* no inc addr */
5766 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5767 amdgpu_ring_write(ring, cmd);
5768 amdgpu_ring_write(ring, reg);
5769 amdgpu_ring_write(ring, 0);
5770 amdgpu_ring_write(ring, val);
5773 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5774 uint32_t val, uint32_t mask)
5776 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5779 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5780 uint32_t reg0, uint32_t reg1,
5781 uint32_t ref, uint32_t mask)
5783 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5784 struct amdgpu_device *adev = ring->adev;
5785 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5786 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5789 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5792 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5796 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5798 struct amdgpu_device *adev = ring->adev;
5801 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5802 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5803 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5804 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5805 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5808 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5809 enum amdgpu_interrupt_state state)
5812 case AMDGPU_IRQ_STATE_DISABLE:
5813 case AMDGPU_IRQ_STATE_ENABLE:
5814 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5815 TIME_STAMP_INT_ENABLE,
5816 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5823 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5825 enum amdgpu_interrupt_state state)
5827 u32 mec_int_cntl, mec_int_cntl_reg;
5830 * amdgpu controls only the first MEC. That's why this function only
5831 * handles the setting of interrupts for this specific MEC. All other
5832 * pipes' interrupts are set by amdkfd.
5838 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5841 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5844 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5847 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5850 DRM_DEBUG("invalid pipe %d\n", pipe);
5854 DRM_DEBUG("invalid me %d\n", me);
5859 case AMDGPU_IRQ_STATE_DISABLE:
5860 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5861 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5862 TIME_STAMP_INT_ENABLE, 0);
5863 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5865 case AMDGPU_IRQ_STATE_ENABLE:
5866 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5867 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5868 TIME_STAMP_INT_ENABLE, 1);
5869 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5876 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5877 struct amdgpu_irq_src *source,
5879 enum amdgpu_interrupt_state state)
5882 case AMDGPU_IRQ_STATE_DISABLE:
5883 case AMDGPU_IRQ_STATE_ENABLE:
5884 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5885 PRIV_REG_INT_ENABLE,
5886 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5895 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5896 struct amdgpu_irq_src *source,
5898 enum amdgpu_interrupt_state state)
5901 case AMDGPU_IRQ_STATE_DISABLE:
5902 case AMDGPU_IRQ_STATE_ENABLE:
5903 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5904 PRIV_INSTR_INT_ENABLE,
5905 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5914 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
5915 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5916 CP_ECC_ERROR_INT_ENABLE, 1)
5918 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
5919 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5920 CP_ECC_ERROR_INT_ENABLE, 0)
5922 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5923 struct amdgpu_irq_src *source,
5925 enum amdgpu_interrupt_state state)
5928 case AMDGPU_IRQ_STATE_DISABLE:
5929 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5930 CP_ECC_ERROR_INT_ENABLE, 0);
5931 DISABLE_ECC_ON_ME_PIPE(1, 0);
5932 DISABLE_ECC_ON_ME_PIPE(1, 1);
5933 DISABLE_ECC_ON_ME_PIPE(1, 2);
5934 DISABLE_ECC_ON_ME_PIPE(1, 3);
5937 case AMDGPU_IRQ_STATE_ENABLE:
5938 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5939 CP_ECC_ERROR_INT_ENABLE, 1);
5940 ENABLE_ECC_ON_ME_PIPE(1, 0);
5941 ENABLE_ECC_ON_ME_PIPE(1, 1);
5942 ENABLE_ECC_ON_ME_PIPE(1, 2);
5943 ENABLE_ECC_ON_ME_PIPE(1, 3);
5953 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5954 struct amdgpu_irq_src *src,
5956 enum amdgpu_interrupt_state state)
5959 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5960 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5962 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5963 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5965 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5966 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5968 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5969 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5971 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5972 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5974 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5975 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5977 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5978 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5980 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5981 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5983 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5984 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5992 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5993 struct amdgpu_irq_src *source,
5994 struct amdgpu_iv_entry *entry)
5997 u8 me_id, pipe_id, queue_id;
5998 struct amdgpu_ring *ring;
6000 DRM_DEBUG("IH: CP EOP\n");
6001 me_id = (entry->ring_id & 0x0c) >> 2;
6002 pipe_id = (entry->ring_id & 0x03) >> 0;
6003 queue_id = (entry->ring_id & 0x70) >> 4;
6007 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6011 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6012 ring = &adev->gfx.compute_ring[i];
6013 /* Per-queue interrupt is supported for MEC starting from VI.
6014 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6016 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6017 amdgpu_fence_process(ring);
6024 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6025 struct amdgpu_iv_entry *entry)
6027 u8 me_id, pipe_id, queue_id;
6028 struct amdgpu_ring *ring;
6031 me_id = (entry->ring_id & 0x0c) >> 2;
6032 pipe_id = (entry->ring_id & 0x03) >> 0;
6033 queue_id = (entry->ring_id & 0x70) >> 4;
6037 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6041 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6042 ring = &adev->gfx.compute_ring[i];
6043 if (ring->me == me_id && ring->pipe == pipe_id &&
6044 ring->queue == queue_id)
6045 drm_sched_fault(&ring->sched);
6051 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6052 struct amdgpu_irq_src *source,
6053 struct amdgpu_iv_entry *entry)
6055 DRM_ERROR("Illegal register access in command stream\n");
6056 gfx_v9_0_fault(adev, entry);
6060 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6061 struct amdgpu_irq_src *source,
6062 struct amdgpu_iv_entry *entry)
6064 DRM_ERROR("Illegal instruction in command stream\n");
6065 gfx_v9_0_fault(adev, entry);
6070 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6071 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6072 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6073 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6075 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6076 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6077 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6079 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6080 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6083 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6084 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6087 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6088 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6089 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6091 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6092 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6095 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6096 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6097 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6099 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6100 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6101 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6103 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6104 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6107 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6108 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6111 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6112 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6115 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6116 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6117 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6119 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6120 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6123 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6124 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6125 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6127 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6128 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6129 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6130 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6132 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6133 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6134 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6137 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6138 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6139 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6140 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6142 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6143 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6144 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6145 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6147 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6148 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6149 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6150 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6152 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6153 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6154 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6155 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6157 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6158 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6161 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6162 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6163 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6165 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6166 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6169 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6170 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6173 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6174 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6177 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6178 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6181 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6182 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6185 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6186 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6189 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6190 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6191 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6193 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6194 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6195 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6197 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6198 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6199 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6201 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6202 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6203 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6205 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6206 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6207 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6209 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6210 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6213 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6214 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6217 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6218 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6221 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6222 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6225 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6226 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6229 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6230 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6233 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6234 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6237 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6238 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6241 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6242 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6245 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6246 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6249 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6250 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6253 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6254 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6257 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6258 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6261 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6262 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6265 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6266 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6267 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6269 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6270 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6271 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6273 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6274 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6277 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6278 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6281 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6282 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6285 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6286 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6287 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6289 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6290 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6291 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6293 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6294 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6295 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6297 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6298 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6299 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6301 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6302 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6305 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6306 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6307 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6309 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6310 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6311 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6313 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6314 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6315 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6317 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6318 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6319 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6321 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6322 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6323 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6325 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6326 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6327 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6329 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6330 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6331 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6333 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6334 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6335 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6337 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6338 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6339 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6341 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6342 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6343 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6345 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6346 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6347 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6349 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6350 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6351 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6353 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6354 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6355 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6357 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6358 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6359 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6361 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6362 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6363 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6365 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6366 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6367 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6369 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6370 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6371 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6373 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6374 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6377 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6378 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6381 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6382 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6385 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6386 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6389 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6390 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6393 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6394 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6395 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6397 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6398 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6399 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6401 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6402 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6403 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6405 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6406 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6407 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6409 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6410 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6411 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6413 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6414 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6417 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6418 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6421 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6422 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6425 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6426 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6429 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6430 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6433 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6434 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6435 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6437 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6438 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6439 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6441 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6442 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6443 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6445 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6446 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6447 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6449 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6450 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6451 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6453 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6454 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6457 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6458 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6461 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6462 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6465 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6466 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6469 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6470 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6473 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6474 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6475 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6477 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6478 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6479 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6481 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6482 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6483 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6485 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6486 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6489 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6490 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6493 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6494 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6497 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6498 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6501 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6502 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6505 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6506 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6511 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6514 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6516 struct ta_ras_trigger_error_input block_info = { 0 };
6518 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6521 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6524 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6527 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6529 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6530 ras_gfx_subblocks[info->head.sub_block_index].name,
6535 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6537 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6538 ras_gfx_subblocks[info->head.sub_block_index].name,
6543 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6544 block_info.sub_block_index =
6545 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6546 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6547 block_info.address = info->address;
6548 block_info.value = info->value;
6550 mutex_lock(&adev->grbm_idx_mutex);
6551 ret = psp_ras_trigger_error(&adev->psp, &block_info);
6552 mutex_unlock(&adev->grbm_idx_mutex);
6557 static const char *vml2_mems[] = {
6558 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6559 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6560 "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6561 "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6562 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6563 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6564 "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6565 "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6566 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6567 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6568 "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6569 "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6570 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6571 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6572 "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6573 "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6576 static const char *vml2_walker_mems[] = {
6577 "UTC_VML2_CACHE_PDE0_MEM0",
6578 "UTC_VML2_CACHE_PDE0_MEM1",
6579 "UTC_VML2_CACHE_PDE1_MEM0",
6580 "UTC_VML2_CACHE_PDE1_MEM1",
6581 "UTC_VML2_CACHE_PDE2_MEM0",
6582 "UTC_VML2_CACHE_PDE2_MEM1",
6583 "UTC_VML2_RDIF_LOG_FIFO",
6586 static const char *atc_l2_cache_2m_mems[] = {
6587 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6588 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6589 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6590 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6593 static const char *atc_l2_cache_4k_mems[] = {
6594 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6595 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6596 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6597 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6598 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6599 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6600 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6601 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6602 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6603 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6604 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6605 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6606 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6607 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6608 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6609 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6610 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6611 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6612 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6613 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6614 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6615 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6616 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6617 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6618 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6619 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6620 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6621 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6622 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6623 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6624 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6625 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6628 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6629 struct ras_err_data *err_data)
6632 uint32_t sec_count, ded_count;
6634 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6635 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6636 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6637 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6638 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6639 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6640 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6641 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6643 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6644 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6645 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6647 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6649 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6650 "SEC %d\n", i, vml2_mems[i], sec_count);
6651 err_data->ce_count += sec_count;
6654 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6656 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6657 "DED %d\n", i, vml2_mems[i], ded_count);
6658 err_data->ue_count += ded_count;
6662 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6663 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6664 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6666 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6669 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6670 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6671 err_data->ce_count += sec_count;
6674 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6677 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6678 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6679 err_data->ue_count += ded_count;
6683 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6684 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6685 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6687 sec_count = (data & 0x00006000L) >> 0xd;
6689 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6690 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6692 err_data->ce_count += sec_count;
6696 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6697 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6698 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6700 sec_count = (data & 0x00006000L) >> 0xd;
6702 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6703 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6705 err_data->ce_count += sec_count;
6708 ded_count = (data & 0x00018000L) >> 0xf;
6710 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6711 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6713 err_data->ue_count += ded_count;
6717 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6718 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6719 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6720 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6725 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6726 const struct soc15_reg_entry *reg,
6727 uint32_t se_id, uint32_t inst_id, uint32_t value,
6728 uint32_t *sec_count, uint32_t *ded_count)
6731 uint32_t sec_cnt, ded_cnt;
6733 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6734 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6735 gfx_v9_0_ras_fields[i].seg != reg->seg ||
6736 gfx_v9_0_ras_fields[i].inst != reg->inst)
6740 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6741 gfx_v9_0_ras_fields[i].sec_count_shift;
6743 dev_info(adev->dev, "GFX SubBlock %s, "
6744 "Instance[%d][%d], SEC %d\n",
6745 gfx_v9_0_ras_fields[i].name,
6748 *sec_count += sec_cnt;
6752 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6753 gfx_v9_0_ras_fields[i].ded_count_shift;
6755 dev_info(adev->dev, "GFX SubBlock %s, "
6756 "Instance[%d][%d], DED %d\n",
6757 gfx_v9_0_ras_fields[i].name,
6760 *ded_count += ded_cnt;
6767 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6771 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6774 /* read back registers to clear the counters */
6775 mutex_lock(&adev->grbm_idx_mutex);
6776 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6777 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6778 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6779 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6780 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6784 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6785 mutex_unlock(&adev->grbm_idx_mutex);
6787 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6788 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6789 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6790 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6791 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6792 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6793 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6794 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6796 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6797 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6798 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6801 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6802 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6803 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6806 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6807 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6808 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6811 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6812 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6813 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6816 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6817 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6818 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6819 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6822 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6823 void *ras_error_status)
6825 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6826 uint32_t sec_count = 0, ded_count = 0;
6830 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6833 err_data->ue_count = 0;
6834 err_data->ce_count = 0;
6836 mutex_lock(&adev->grbm_idx_mutex);
6838 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6839 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6840 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6841 gfx_v9_0_select_se_sh(adev, j, 0, k);
6843 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6845 gfx_v9_0_ras_error_count(adev,
6846 &gfx_v9_0_edc_counter_regs[i],
6848 &sec_count, &ded_count);
6853 err_data->ce_count += sec_count;
6854 err_data->ue_count += ded_count;
6856 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6857 mutex_unlock(&adev->grbm_idx_mutex);
6859 gfx_v9_0_query_utc_edc_status(adev, err_data);
6864 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6866 const unsigned int cp_coher_cntl =
6867 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6868 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6869 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6870 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6871 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6873 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6874 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6875 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6876 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6877 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
6878 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6879 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
6880 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6883 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6884 uint32_t pipe, bool enable)
6886 struct amdgpu_device *adev = ring->adev;
6888 uint32_t wcl_cs_reg;
6890 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6891 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6895 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6898 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6901 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6904 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6907 DRM_DEBUG("invalid pipe %d\n", pipe);
6911 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6914 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6916 struct amdgpu_device *adev = ring->adev;
6921 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6922 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6923 * around 25% of gpu resources.
6925 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6926 amdgpu_ring_emit_wreg(ring,
6927 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6930 /* Restrict waves for normal/low priority compute queues as well
6931 * to get best QoS for high priority compute jobs.
6933 * amdgpu controls only 1st ME(0-3 CS pipes).
6935 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6936 if (i != ring->pipe)
6937 gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6942 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6944 .early_init = gfx_v9_0_early_init,
6945 .late_init = gfx_v9_0_late_init,
6946 .sw_init = gfx_v9_0_sw_init,
6947 .sw_fini = gfx_v9_0_sw_fini,
6948 .hw_init = gfx_v9_0_hw_init,
6949 .hw_fini = gfx_v9_0_hw_fini,
6950 .suspend = gfx_v9_0_suspend,
6951 .resume = gfx_v9_0_resume,
6952 .is_idle = gfx_v9_0_is_idle,
6953 .wait_for_idle = gfx_v9_0_wait_for_idle,
6954 .soft_reset = gfx_v9_0_soft_reset,
6955 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6956 .set_powergating_state = gfx_v9_0_set_powergating_state,
6957 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6960 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6961 .type = AMDGPU_RING_TYPE_GFX,
6963 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6964 .support_64bit_ptrs = true,
6965 .vmhub = AMDGPU_GFXHUB_0,
6966 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6967 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6968 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6969 .emit_frame_size = /* totally 242 maximum if 16 IBs */
6971 7 + /* PIPELINE_SYNC */
6972 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6973 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6975 8 + /* FENCE for VM_FLUSH */
6976 20 + /* GDS switch */
6977 4 + /* double SWITCH_BUFFER,
6978 the first COND_EXEC jump to the place just
6979 prior to this double SWITCH_BUFFER */
6987 8 + 8 + /* FENCE x2 */
6988 2 + /* SWITCH_BUFFER */
6989 7, /* gfx_v9_0_emit_mem_sync */
6990 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6991 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6992 .emit_fence = gfx_v9_0_ring_emit_fence,
6993 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6994 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6995 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6996 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6997 .test_ring = gfx_v9_0_ring_test_ring,
6998 .test_ib = gfx_v9_0_ring_test_ib,
6999 .insert_nop = amdgpu_ring_insert_nop,
7000 .pad_ib = amdgpu_ring_generic_pad_ib,
7001 .emit_switch_buffer = gfx_v9_ring_emit_sb,
7002 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7003 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7004 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
7005 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7006 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7007 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7008 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7009 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7010 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7013 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7014 .type = AMDGPU_RING_TYPE_COMPUTE,
7016 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7017 .support_64bit_ptrs = true,
7018 .vmhub = AMDGPU_GFXHUB_0,
7019 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7020 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7021 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7023 20 + /* gfx_v9_0_ring_emit_gds_switch */
7024 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7025 5 + /* hdp invalidate */
7026 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7027 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7028 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7029 2 + /* gfx_v9_0_ring_emit_vm_flush */
7030 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7031 7 + /* gfx_v9_0_emit_mem_sync */
7032 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7033 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7034 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7035 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
7036 .emit_fence = gfx_v9_0_ring_emit_fence,
7037 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7038 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7039 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7040 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7041 .test_ring = gfx_v9_0_ring_test_ring,
7042 .test_ib = gfx_v9_0_ring_test_ib,
7043 .insert_nop = amdgpu_ring_insert_nop,
7044 .pad_ib = amdgpu_ring_generic_pad_ib,
7045 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7046 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7047 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7048 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7049 .emit_wave_limit = gfx_v9_0_emit_wave_limit,
7052 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7053 .type = AMDGPU_RING_TYPE_KIQ,
7055 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7056 .support_64bit_ptrs = true,
7057 .vmhub = AMDGPU_GFXHUB_0,
7058 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7059 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7060 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7062 20 + /* gfx_v9_0_ring_emit_gds_switch */
7063 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7064 5 + /* hdp invalidate */
7065 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7066 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7067 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7068 2 + /* gfx_v9_0_ring_emit_vm_flush */
7069 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7070 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7071 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7072 .test_ring = gfx_v9_0_ring_test_ring,
7073 .insert_nop = amdgpu_ring_insert_nop,
7074 .pad_ib = amdgpu_ring_generic_pad_ib,
7075 .emit_rreg = gfx_v9_0_ring_emit_rreg,
7076 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7077 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7078 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7081 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7085 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7087 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7088 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7090 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7091 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7094 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7095 .set = gfx_v9_0_set_eop_interrupt_state,
7096 .process = gfx_v9_0_eop_irq,
7099 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7100 .set = gfx_v9_0_set_priv_reg_fault_state,
7101 .process = gfx_v9_0_priv_reg_irq,
7104 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7105 .set = gfx_v9_0_set_priv_inst_fault_state,
7106 .process = gfx_v9_0_priv_inst_irq,
7109 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7110 .set = gfx_v9_0_set_cp_ecc_error_state,
7111 .process = amdgpu_gfx_cp_ecc_error_irq,
7115 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7117 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7118 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7120 adev->gfx.priv_reg_irq.num_types = 1;
7121 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7123 adev->gfx.priv_inst_irq.num_types = 1;
7124 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7126 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7127 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7130 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7132 switch (adev->ip_versions[GC_HWIP][0]) {
7133 case IP_VERSION(9, 0, 1):
7134 case IP_VERSION(9, 2, 1):
7135 case IP_VERSION(9, 4, 0):
7136 case IP_VERSION(9, 2, 2):
7137 case IP_VERSION(9, 1, 0):
7138 case IP_VERSION(9, 4, 1):
7139 case IP_VERSION(9, 3, 0):
7140 case IP_VERSION(9, 4, 2):
7141 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7148 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7150 /* init asci gds info */
7151 switch (adev->ip_versions[GC_HWIP][0]) {
7152 case IP_VERSION(9, 0, 1):
7153 case IP_VERSION(9, 2, 1):
7154 case IP_VERSION(9, 4, 0):
7155 adev->gds.gds_size = 0x10000;
7157 case IP_VERSION(9, 2, 2):
7158 case IP_VERSION(9, 1, 0):
7159 case IP_VERSION(9, 4, 1):
7160 adev->gds.gds_size = 0x1000;
7162 case IP_VERSION(9, 4, 2):
7163 /* aldebaran removed all the GDS internal memory,
7164 * only support GWS opcode in kernel, like barrier
7166 adev->gds.gds_size = 0;
7169 adev->gds.gds_size = 0x10000;
7173 switch (adev->ip_versions[GC_HWIP][0]) {
7174 case IP_VERSION(9, 0, 1):
7175 case IP_VERSION(9, 4, 0):
7176 adev->gds.gds_compute_max_wave_id = 0x7ff;
7178 case IP_VERSION(9, 2, 1):
7179 adev->gds.gds_compute_max_wave_id = 0x27f;
7181 case IP_VERSION(9, 2, 2):
7182 case IP_VERSION(9, 1, 0):
7183 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7184 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7186 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7188 case IP_VERSION(9, 4, 1):
7189 adev->gds.gds_compute_max_wave_id = 0xfff;
7191 case IP_VERSION(9, 4, 2):
7192 /* deprecated for Aldebaran, no usage at all */
7193 adev->gds.gds_compute_max_wave_id = 0;
7196 /* this really depends on the chip */
7197 adev->gds.gds_compute_max_wave_id = 0x7ff;
7201 adev->gds.gws_size = 64;
7202 adev->gds.oa_size = 16;
7205 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7213 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7214 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7216 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7219 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7223 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7224 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7226 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7227 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7229 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7231 return (~data) & mask;
7234 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7235 struct amdgpu_cu_info *cu_info)
7237 int i, j, k, counter, active_cu_number = 0;
7238 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7239 unsigned disable_masks[4 * 4];
7241 if (!adev || !cu_info)
7245 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7247 if (adev->gfx.config.max_shader_engines *
7248 adev->gfx.config.max_sh_per_se > 16)
7251 amdgpu_gfx_parse_disable_cu(disable_masks,
7252 adev->gfx.config.max_shader_engines,
7253 adev->gfx.config.max_sh_per_se);
7255 mutex_lock(&adev->grbm_idx_mutex);
7256 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7257 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7261 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7262 gfx_v9_0_set_user_cu_inactive_bitmap(
7263 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7264 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7267 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7268 * 4x4 size array, and it's usually suitable for Vega
7269 * ASICs which has 4*2 SE/SH layout.
7270 * But for Arcturus, SE/SH layout is changed to 8*1.
7271 * To mostly reduce the impact, we make it compatible
7272 * with current bitmap array as below:
7273 * SE4,SH0 --> bitmap[0][1]
7274 * SE5,SH0 --> bitmap[1][1]
7275 * SE6,SH0 --> bitmap[2][1]
7276 * SE7,SH0 --> bitmap[3][1]
7278 cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7280 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7281 if (bitmap & mask) {
7282 if (counter < adev->gfx.config.max_cu_per_sh)
7288 active_cu_number += counter;
7290 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7291 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7294 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7295 mutex_unlock(&adev->grbm_idx_mutex);
7297 cu_info->number = active_cu_number;
7298 cu_info->ao_cu_mask = ao_cu_mask;
7299 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7304 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7306 .type = AMD_IP_BLOCK_TYPE_GFX,
7310 .funcs = &gfx_v9_0_ip_funcs,