2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
31 #include "amdgpu_gfx.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
49 #include "amdgpu_ras.h"
51 #define GFX9_NUM_GFX_RINGS 1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
56 #define mmPWR_MISC_CNTL_STATUS 0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
232 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
276 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
288 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308 struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
313 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
315 switch (adev->asic_type) {
317 if (!amdgpu_virt_support_skip_setting(adev)) {
318 soc15_program_register_sequence(adev,
319 golden_settings_gc_9_0,
320 ARRAY_SIZE(golden_settings_gc_9_0));
321 soc15_program_register_sequence(adev,
322 golden_settings_gc_9_0_vg10,
323 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
327 soc15_program_register_sequence(adev,
328 golden_settings_gc_9_2_1,
329 ARRAY_SIZE(golden_settings_gc_9_2_1));
330 soc15_program_register_sequence(adev,
331 golden_settings_gc_9_2_1_vg12,
332 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
335 soc15_program_register_sequence(adev,
336 golden_settings_gc_9_0,
337 ARRAY_SIZE(golden_settings_gc_9_0));
338 soc15_program_register_sequence(adev,
339 golden_settings_gc_9_0_vg20,
340 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
343 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
344 ARRAY_SIZE(golden_settings_gc_9_1));
345 if (adev->rev_id >= 8)
346 soc15_program_register_sequence(adev,
347 golden_settings_gc_9_1_rv2,
348 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
350 soc15_program_register_sequence(adev,
351 golden_settings_gc_9_1_rv1,
352 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
358 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
359 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
362 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
364 adev->gfx.scratch.num_reg = 8;
365 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
366 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
369 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
370 bool wc, uint32_t reg, uint32_t val)
372 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
373 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
374 WRITE_DATA_DST_SEL(0) |
375 (wc ? WR_CONFIRM : 0));
376 amdgpu_ring_write(ring, reg);
377 amdgpu_ring_write(ring, 0);
378 amdgpu_ring_write(ring, val);
381 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
382 int mem_space, int opt, uint32_t addr0,
383 uint32_t addr1, uint32_t ref, uint32_t mask,
386 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
387 amdgpu_ring_write(ring,
388 /* memory (1) or register (0) */
389 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
390 WAIT_REG_MEM_OPERATION(opt) | /* wait */
391 WAIT_REG_MEM_FUNCTION(3) | /* equal */
392 WAIT_REG_MEM_ENGINE(eng_sel)));
395 BUG_ON(addr0 & 0x3); /* Dword align */
396 amdgpu_ring_write(ring, addr0);
397 amdgpu_ring_write(ring, addr1);
398 amdgpu_ring_write(ring, ref);
399 amdgpu_ring_write(ring, mask);
400 amdgpu_ring_write(ring, inv); /* poll interval */
403 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
405 struct amdgpu_device *adev = ring->adev;
411 r = amdgpu_gfx_scratch_get(adev, &scratch);
415 WREG32(scratch, 0xCAFEDEAD);
416 r = amdgpu_ring_alloc(ring, 3);
418 goto error_free_scratch;
420 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
421 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
422 amdgpu_ring_write(ring, 0xDEADBEEF);
423 amdgpu_ring_commit(ring);
425 for (i = 0; i < adev->usec_timeout; i++) {
426 tmp = RREG32(scratch);
427 if (tmp == 0xDEADBEEF)
432 if (i >= adev->usec_timeout)
436 amdgpu_gfx_scratch_free(adev, scratch);
440 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 struct amdgpu_device *adev = ring->adev;
444 struct dma_fence *f = NULL;
451 r = amdgpu_device_wb_get(adev, &index);
455 gpu_addr = adev->wb.gpu_addr + (index * 4);
456 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
457 memset(&ib, 0, sizeof(ib));
458 r = amdgpu_ib_get(adev, NULL, 16, &ib);
462 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
463 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
464 ib.ptr[2] = lower_32_bits(gpu_addr);
465 ib.ptr[3] = upper_32_bits(gpu_addr);
466 ib.ptr[4] = 0xDEADBEEF;
469 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
473 r = dma_fence_wait_timeout(f, false, timeout);
481 tmp = adev->wb.wb[index];
482 if (tmp == 0xDEADBEEF)
488 amdgpu_ib_free(adev, &ib, NULL);
491 amdgpu_device_wb_free(adev, index);
496 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
498 release_firmware(adev->gfx.pfp_fw);
499 adev->gfx.pfp_fw = NULL;
500 release_firmware(adev->gfx.me_fw);
501 adev->gfx.me_fw = NULL;
502 release_firmware(adev->gfx.ce_fw);
503 adev->gfx.ce_fw = NULL;
504 release_firmware(adev->gfx.rlc_fw);
505 adev->gfx.rlc_fw = NULL;
506 release_firmware(adev->gfx.mec_fw);
507 adev->gfx.mec_fw = NULL;
508 release_firmware(adev->gfx.mec2_fw);
509 adev->gfx.mec2_fw = NULL;
511 kfree(adev->gfx.rlc.register_list_format);
514 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
516 const struct rlc_firmware_header_v2_1 *rlc_hdr;
518 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
519 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
520 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
521 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
522 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
523 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
524 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
525 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
526 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
527 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
528 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
529 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
530 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
531 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
532 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
535 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
537 adev->gfx.me_fw_write_wait = false;
538 adev->gfx.mec_fw_write_wait = false;
540 switch (adev->asic_type) {
542 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
543 (adev->gfx.me_feature_version >= 42) &&
544 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
545 (adev->gfx.pfp_feature_version >= 42))
546 adev->gfx.me_fw_write_wait = true;
548 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
549 (adev->gfx.mec_feature_version >= 42))
550 adev->gfx.mec_fw_write_wait = true;
553 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
554 (adev->gfx.me_feature_version >= 44) &&
555 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
556 (adev->gfx.pfp_feature_version >= 44))
557 adev->gfx.me_fw_write_wait = true;
559 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
560 (adev->gfx.mec_feature_version >= 44))
561 adev->gfx.mec_fw_write_wait = true;
564 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
565 (adev->gfx.me_feature_version >= 44) &&
566 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
567 (adev->gfx.pfp_feature_version >= 44))
568 adev->gfx.me_fw_write_wait = true;
570 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
571 (adev->gfx.mec_feature_version >= 44))
572 adev->gfx.mec_fw_write_wait = true;
575 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
576 (adev->gfx.me_feature_version >= 42) &&
577 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
578 (adev->gfx.pfp_feature_version >= 42))
579 adev->gfx.me_fw_write_wait = true;
581 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
582 (adev->gfx.mec_feature_version >= 42))
583 adev->gfx.mec_fw_write_wait = true;
590 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
592 switch (adev->asic_type) {
598 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
600 if ((adev->gfx.rlc_fw_version != 106 &&
601 adev->gfx.rlc_fw_version < 531) ||
602 (adev->gfx.rlc_fw_version == 53815) ||
603 (adev->gfx.rlc_feature_version < 1) ||
604 !adev->gfx.rlc.is_rlc_v2_1)
605 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
612 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
614 const char *chip_name;
617 struct amdgpu_firmware_info *info = NULL;
618 const struct common_firmware_header *header = NULL;
619 const struct gfx_firmware_header_v1_0 *cp_hdr;
620 const struct rlc_firmware_header_v2_0 *rlc_hdr;
621 unsigned int *tmp = NULL;
623 uint16_t version_major;
624 uint16_t version_minor;
625 uint32_t smu_version;
629 switch (adev->asic_type) {
631 chip_name = "vega10";
634 chip_name = "vega12";
637 chip_name = "vega20";
640 if (adev->rev_id >= 8)
641 chip_name = "raven2";
642 else if (adev->pdev->device == 0x15d8)
643 chip_name = "picasso";
651 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
652 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
655 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
658 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
659 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
660 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
662 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
663 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
666 err = amdgpu_ucode_validate(adev->gfx.me_fw);
669 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
670 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
671 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
673 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
674 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
677 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
680 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
681 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
682 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
685 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
686 * instead of picasso_rlc.bin.
688 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
689 * or revision >= 0xD8 && revision <= 0xDF
690 * otherwise is PCO FP5
692 if (!strcmp(chip_name, "picasso") &&
693 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
694 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
695 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
696 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
697 (smu_version >= 0x41e2b))
699 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
701 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
703 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
704 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
707 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
708 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
710 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
711 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
712 if (version_major == 2 && version_minor == 1)
713 adev->gfx.rlc.is_rlc_v2_1 = true;
715 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
716 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
717 adev->gfx.rlc.save_and_restore_offset =
718 le32_to_cpu(rlc_hdr->save_and_restore_offset);
719 adev->gfx.rlc.clear_state_descriptor_offset =
720 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
721 adev->gfx.rlc.avail_scratch_ram_locations =
722 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
723 adev->gfx.rlc.reg_restore_list_size =
724 le32_to_cpu(rlc_hdr->reg_restore_list_size);
725 adev->gfx.rlc.reg_list_format_start =
726 le32_to_cpu(rlc_hdr->reg_list_format_start);
727 adev->gfx.rlc.reg_list_format_separate_start =
728 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
729 adev->gfx.rlc.starting_offsets_start =
730 le32_to_cpu(rlc_hdr->starting_offsets_start);
731 adev->gfx.rlc.reg_list_format_size_bytes =
732 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
733 adev->gfx.rlc.reg_list_size_bytes =
734 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
735 adev->gfx.rlc.register_list_format =
736 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
737 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
738 if (!adev->gfx.rlc.register_list_format) {
743 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
744 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
745 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
746 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
748 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
750 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
751 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
752 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
753 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
755 if (adev->gfx.rlc.is_rlc_v2_1)
756 gfx_v9_0_init_rlc_ext_microcode(adev);
758 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
759 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
762 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
765 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
766 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
767 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
770 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
771 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
773 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
776 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
777 adev->gfx.mec2_fw->data;
778 adev->gfx.mec2_fw_version =
779 le32_to_cpu(cp_hdr->header.ucode_version);
780 adev->gfx.mec2_feature_version =
781 le32_to_cpu(cp_hdr->ucode_feature_version);
784 adev->gfx.mec2_fw = NULL;
787 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
788 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
789 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
790 info->fw = adev->gfx.pfp_fw;
791 header = (const struct common_firmware_header *)info->fw->data;
792 adev->firmware.fw_size +=
793 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
795 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
796 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
797 info->fw = adev->gfx.me_fw;
798 header = (const struct common_firmware_header *)info->fw->data;
799 adev->firmware.fw_size +=
800 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
802 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
803 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
804 info->fw = adev->gfx.ce_fw;
805 header = (const struct common_firmware_header *)info->fw->data;
806 adev->firmware.fw_size +=
807 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
809 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
810 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
811 info->fw = adev->gfx.rlc_fw;
812 header = (const struct common_firmware_header *)info->fw->data;
813 adev->firmware.fw_size +=
814 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
816 if (adev->gfx.rlc.is_rlc_v2_1 &&
817 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
818 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
819 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
820 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
821 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
822 info->fw = adev->gfx.rlc_fw;
823 adev->firmware.fw_size +=
824 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
826 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
827 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
828 info->fw = adev->gfx.rlc_fw;
829 adev->firmware.fw_size +=
830 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
832 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
833 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
834 info->fw = adev->gfx.rlc_fw;
835 adev->firmware.fw_size +=
836 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
839 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
840 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
841 info->fw = adev->gfx.mec_fw;
842 header = (const struct common_firmware_header *)info->fw->data;
843 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
844 adev->firmware.fw_size +=
845 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
847 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
848 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
849 info->fw = adev->gfx.mec_fw;
850 adev->firmware.fw_size +=
851 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
853 if (adev->gfx.mec2_fw) {
854 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
855 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
856 info->fw = adev->gfx.mec2_fw;
857 header = (const struct common_firmware_header *)info->fw->data;
858 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
859 adev->firmware.fw_size +=
860 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
861 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
862 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
863 info->fw = adev->gfx.mec2_fw;
864 adev->firmware.fw_size +=
865 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
871 gfx_v9_0_check_if_need_gfxoff(adev);
872 gfx_v9_0_check_fw_write_wait(adev);
875 "gfx9: Failed to load firmware \"%s\"\n",
877 release_firmware(adev->gfx.pfp_fw);
878 adev->gfx.pfp_fw = NULL;
879 release_firmware(adev->gfx.me_fw);
880 adev->gfx.me_fw = NULL;
881 release_firmware(adev->gfx.ce_fw);
882 adev->gfx.ce_fw = NULL;
883 release_firmware(adev->gfx.rlc_fw);
884 adev->gfx.rlc_fw = NULL;
885 release_firmware(adev->gfx.mec_fw);
886 adev->gfx.mec_fw = NULL;
887 release_firmware(adev->gfx.mec2_fw);
888 adev->gfx.mec2_fw = NULL;
893 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
896 const struct cs_section_def *sect = NULL;
897 const struct cs_extent_def *ext = NULL;
899 /* begin clear state */
901 /* context control state */
904 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
905 for (ext = sect->section; ext->extent != NULL; ++ext) {
906 if (sect->id == SECT_CONTEXT)
907 count += 2 + ext->reg_count;
913 /* end clear state */
921 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
922 volatile u32 *buffer)
925 const struct cs_section_def *sect = NULL;
926 const struct cs_extent_def *ext = NULL;
928 if (adev->gfx.rlc.cs_data == NULL)
933 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
934 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
936 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
937 buffer[count++] = cpu_to_le32(0x80000000);
938 buffer[count++] = cpu_to_le32(0x80000000);
940 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
941 for (ext = sect->section; ext->extent != NULL; ++ext) {
942 if (sect->id == SECT_CONTEXT) {
944 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
945 buffer[count++] = cpu_to_le32(ext->reg_index -
946 PACKET3_SET_CONTEXT_REG_START);
947 for (i = 0; i < ext->reg_count; i++)
948 buffer[count++] = cpu_to_le32(ext->extent[i]);
955 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
956 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
958 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
959 buffer[count++] = cpu_to_le32(0);
962 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
964 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
965 uint32_t pg_always_on_cu_num = 2;
966 uint32_t always_on_cu_num;
968 uint32_t mask, cu_bitmap, counter;
970 if (adev->flags & AMD_IS_APU)
971 always_on_cu_num = 4;
972 else if (adev->asic_type == CHIP_VEGA12)
973 always_on_cu_num = 8;
975 always_on_cu_num = 12;
977 mutex_lock(&adev->grbm_idx_mutex);
978 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
979 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
983 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
985 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
986 if (cu_info->bitmap[i][j] & mask) {
987 if (counter == pg_always_on_cu_num)
988 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
989 if (counter < always_on_cu_num)
998 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
999 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1002 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1003 mutex_unlock(&adev->grbm_idx_mutex);
1006 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1010 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1011 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1012 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1013 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1014 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1016 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1017 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1019 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1020 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1022 mutex_lock(&adev->grbm_idx_mutex);
1023 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1024 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1025 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1027 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1028 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1029 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1030 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1031 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1033 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1034 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1037 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1040 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1041 * programmed in gfx_v9_0_init_always_on_cu_mask()
1044 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1045 * but used for RLC_LB_CNTL configuration */
1046 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1047 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1048 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1049 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1050 mutex_unlock(&adev->grbm_idx_mutex);
1052 gfx_v9_0_init_always_on_cu_mask(adev);
1055 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1059 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1060 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1061 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1062 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1063 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1065 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1066 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1068 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1069 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1071 mutex_lock(&adev->grbm_idx_mutex);
1072 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1073 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1074 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1076 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1077 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1078 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1079 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1080 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1082 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1083 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1086 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1089 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1090 * programmed in gfx_v9_0_init_always_on_cu_mask()
1093 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1094 * but used for RLC_LB_CNTL configuration */
1095 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1096 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1097 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1098 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1099 mutex_unlock(&adev->grbm_idx_mutex);
1101 gfx_v9_0_init_always_on_cu_mask(adev);
1104 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1106 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1109 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1114 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1116 const struct cs_section_def *cs_data;
1119 adev->gfx.rlc.cs_data = gfx9_cs_data;
1121 cs_data = adev->gfx.rlc.cs_data;
1124 /* init clear state block */
1125 r = amdgpu_gfx_rlc_init_csb(adev);
1130 if (adev->asic_type == CHIP_RAVEN) {
1131 /* TODO: double check the cp_table_size for RV */
1132 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1133 r = amdgpu_gfx_rlc_init_cpt(adev);
1138 switch (adev->asic_type) {
1140 gfx_v9_0_init_lbpw(adev);
1143 gfx_v9_4_init_lbpw(adev);
1152 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1156 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1157 if (unlikely(r != 0))
1160 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1161 AMDGPU_GEM_DOMAIN_VRAM);
1163 adev->gfx.rlc.clear_state_gpu_addr =
1164 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1166 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1171 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1175 if (!adev->gfx.rlc.clear_state_obj)
1178 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1179 if (likely(r == 0)) {
1180 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1181 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1185 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1187 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1188 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1191 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1195 const __le32 *fw_data;
1198 size_t mec_hpd_size;
1200 const struct gfx_firmware_header_v1_0 *mec_hdr;
1202 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1204 /* take ownership of the relevant compute queues */
1205 amdgpu_gfx_compute_queue_acquire(adev);
1206 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1208 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1209 AMDGPU_GEM_DOMAIN_VRAM,
1210 &adev->gfx.mec.hpd_eop_obj,
1211 &adev->gfx.mec.hpd_eop_gpu_addr,
1214 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1215 gfx_v9_0_mec_fini(adev);
1219 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1221 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1222 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1224 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1226 fw_data = (const __le32 *)
1227 (adev->gfx.mec_fw->data +
1228 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1229 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1231 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1232 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1233 &adev->gfx.mec.mec_fw_obj,
1234 &adev->gfx.mec.mec_fw_gpu_addr,
1237 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1238 gfx_v9_0_mec_fini(adev);
1242 memcpy(fw, fw_data, fw_size);
1244 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1245 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1250 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1252 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1253 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1254 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1255 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1256 (SQ_IND_INDEX__FORCE_READ_MASK));
1257 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1260 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1261 uint32_t wave, uint32_t thread,
1262 uint32_t regno, uint32_t num, uint32_t *out)
1264 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1265 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1266 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1267 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1268 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1269 (SQ_IND_INDEX__FORCE_READ_MASK) |
1270 (SQ_IND_INDEX__AUTO_INCR_MASK));
1272 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1275 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1277 /* type 1 wave data */
1278 dst[(*no_fields)++] = 1;
1279 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1280 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1281 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1282 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1283 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1284 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1285 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1286 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1287 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1288 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1289 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1290 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1291 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1292 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1295 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1296 uint32_t wave, uint32_t start,
1297 uint32_t size, uint32_t *dst)
1300 adev, simd, wave, 0,
1301 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1304 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1305 uint32_t wave, uint32_t thread,
1306 uint32_t start, uint32_t size,
1310 adev, simd, wave, thread,
1311 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1314 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1315 u32 me, u32 pipe, u32 q)
1317 soc15_grbm_select(adev, me, pipe, q, 0);
1320 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1321 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1322 .select_se_sh = &gfx_v9_0_select_se_sh,
1323 .read_wave_data = &gfx_v9_0_read_wave_data,
1324 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1325 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1326 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1329 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1334 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1336 switch (adev->asic_type) {
1338 adev->gfx.config.max_hw_contexts = 8;
1339 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1340 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1341 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1342 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1343 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1346 adev->gfx.config.max_hw_contexts = 8;
1347 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1348 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1349 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1350 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1351 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1352 DRM_INFO("fix gfx.config for vega12\n");
1355 adev->gfx.config.max_hw_contexts = 8;
1356 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1357 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1358 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1359 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1360 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1361 gb_addr_config &= ~0xf3e777ff;
1362 gb_addr_config |= 0x22014042;
1363 /* check vbios table if gpu info is not available */
1364 err = amdgpu_atomfirmware_get_gfx_info(adev);
1369 adev->gfx.config.max_hw_contexts = 8;
1370 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1371 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1372 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1373 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1374 if (adev->rev_id >= 8)
1375 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1377 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1384 adev->gfx.config.gb_addr_config = gb_addr_config;
1386 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1388 adev->gfx.config.gb_addr_config,
1392 adev->gfx.config.max_tile_pipes =
1393 adev->gfx.config.gb_addr_config_fields.num_pipes;
1395 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1397 adev->gfx.config.gb_addr_config,
1400 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1402 adev->gfx.config.gb_addr_config,
1404 MAX_COMPRESSED_FRAGS);
1405 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1407 adev->gfx.config.gb_addr_config,
1410 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1412 adev->gfx.config.gb_addr_config,
1414 NUM_SHADER_ENGINES);
1415 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1417 adev->gfx.config.gb_addr_config,
1419 PIPE_INTERLEAVE_SIZE));
1424 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1425 struct amdgpu_ngg_buf *ngg_buf,
1427 int default_size_se)
1432 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1435 size_se = size_se ? size_se : default_size_se;
1437 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1438 r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1439 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1444 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1447 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1452 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1456 for (i = 0; i < NGG_BUF_MAX; i++)
1457 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1458 &adev->gfx.ngg.buf[i].gpu_addr,
1461 memset(&adev->gfx.ngg.buf[0], 0,
1462 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1464 adev->gfx.ngg.init = false;
1469 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1473 if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1476 /* GDS reserve memory: 64 bytes alignment */
1477 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1478 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1479 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1480 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1482 /* Primitive Buffer */
1483 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1484 amdgpu_prim_buf_per_se,
1487 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1491 /* Position Buffer */
1492 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1493 amdgpu_pos_buf_per_se,
1496 dev_err(adev->dev, "Failed to create Position Buffer\n");
1500 /* Control Sideband */
1501 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1502 amdgpu_cntl_sb_buf_per_se,
1505 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1509 /* Parameter Cache, not created by default */
1510 if (amdgpu_param_buf_per_se <= 0)
1513 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1514 amdgpu_param_buf_per_se,
1517 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1522 adev->gfx.ngg.init = true;
1525 gfx_v9_0_ngg_fini(adev);
1529 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1531 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1538 /* Program buffer size */
1539 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1540 adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1541 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1542 adev->gfx.ngg.buf[NGG_POS].size >> 8);
1543 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1545 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1546 adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1547 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1548 adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1549 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1551 /* Program buffer base address */
1552 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1553 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1554 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1556 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1557 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1558 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1560 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1561 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1562 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1564 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1565 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1566 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1568 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1569 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1570 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1572 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1573 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1574 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1576 /* Clear GDS reserved memory */
1577 r = amdgpu_ring_alloc(ring, 17);
1579 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1584 gfx_v9_0_write_data_to_reg(ring, 0, false,
1585 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1586 (adev->gds.gds_size +
1587 adev->gfx.ngg.gds_reserve_size));
1589 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1590 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1591 PACKET3_DMA_DATA_DST_SEL(1) |
1592 PACKET3_DMA_DATA_SRC_SEL(2)));
1593 amdgpu_ring_write(ring, 0);
1594 amdgpu_ring_write(ring, 0);
1595 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1596 amdgpu_ring_write(ring, 0);
1597 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1598 adev->gfx.ngg.gds_reserve_size);
1600 gfx_v9_0_write_data_to_reg(ring, 0, false,
1601 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1603 amdgpu_ring_commit(ring);
1608 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1609 int mec, int pipe, int queue)
1613 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1615 ring = &adev->gfx.compute_ring[ring_id];
1620 ring->queue = queue;
1622 ring->ring_obj = NULL;
1623 ring->use_doorbell = true;
1624 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1625 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1626 + (ring_id * GFX9_MEC_HPD_SIZE);
1627 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1629 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1630 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1633 /* type-2 packets are deprecated on MEC, use type-3 instead */
1634 r = amdgpu_ring_init(adev, ring, 1024,
1635 &adev->gfx.eop_irq, irq_type);
1643 static int gfx_v9_0_sw_init(void *handle)
1645 int i, j, k, r, ring_id;
1646 struct amdgpu_ring *ring;
1647 struct amdgpu_kiq *kiq;
1648 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1650 switch (adev->asic_type) {
1655 adev->gfx.mec.num_mec = 2;
1658 adev->gfx.mec.num_mec = 1;
1662 adev->gfx.mec.num_pipe_per_mec = 4;
1663 adev->gfx.mec.num_queue_per_pipe = 8;
1666 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1670 /* Privileged reg */
1671 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1672 &adev->gfx.priv_reg_irq);
1676 /* Privileged inst */
1677 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1678 &adev->gfx.priv_inst_irq);
1683 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1684 &adev->gfx.cp_ecc_error_irq);
1689 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1690 &adev->gfx.cp_ecc_error_irq);
1694 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1696 gfx_v9_0_scratch_init(adev);
1698 r = gfx_v9_0_init_microcode(adev);
1700 DRM_ERROR("Failed to load gfx firmware!\n");
1704 r = adev->gfx.rlc.funcs->init(adev);
1706 DRM_ERROR("Failed to init rlc BOs!\n");
1710 r = gfx_v9_0_mec_init(adev);
1712 DRM_ERROR("Failed to init MEC BOs!\n");
1716 /* set up the gfx ring */
1717 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1718 ring = &adev->gfx.gfx_ring[i];
1719 ring->ring_obj = NULL;
1721 sprintf(ring->name, "gfx");
1723 sprintf(ring->name, "gfx_%d", i);
1724 ring->use_doorbell = true;
1725 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1726 r = amdgpu_ring_init(adev, ring, 1024,
1727 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
1732 /* set up the compute queues - allocate horizontally across pipes */
1734 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1735 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1736 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1737 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1740 r = gfx_v9_0_compute_ring_init(adev,
1751 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1753 DRM_ERROR("Failed to init KIQ BOs!\n");
1757 kiq = &adev->gfx.kiq;
1758 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1762 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1763 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1767 adev->gfx.ce_ram_size = 0x8000;
1769 r = gfx_v9_0_gpu_early_init(adev);
1773 r = gfx_v9_0_ngg_init(adev);
1781 static int gfx_v9_0_sw_fini(void *handle)
1784 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1786 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1788 struct ras_common_if *ras_if = adev->gfx.ras_if;
1789 struct ras_ih_if ih_info = {
1793 amdgpu_ras_debugfs_remove(adev, ras_if);
1794 amdgpu_ras_sysfs_remove(adev, ras_if);
1795 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
1796 amdgpu_ras_feature_enable(adev, ras_if, 0);
1800 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1801 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1802 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1803 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1805 amdgpu_gfx_compute_mqd_sw_fini(adev);
1806 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1807 amdgpu_gfx_kiq_fini(adev);
1809 gfx_v9_0_mec_fini(adev);
1810 gfx_v9_0_ngg_fini(adev);
1811 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1812 if (adev->asic_type == CHIP_RAVEN) {
1813 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1814 &adev->gfx.rlc.cp_table_gpu_addr,
1815 (void **)&adev->gfx.rlc.cp_table_ptr);
1817 gfx_v9_0_free_microcode(adev);
1823 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1828 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1832 if (instance == 0xffffffff)
1833 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1835 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1837 if (se_num == 0xffffffff)
1838 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1840 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1842 if (sh_num == 0xffffffff)
1843 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1845 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1847 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1850 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1854 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1855 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1857 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1858 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1860 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1861 adev->gfx.config.max_sh_per_se);
1863 return (~data) & mask;
1866 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1871 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1872 adev->gfx.config.max_sh_per_se;
1874 mutex_lock(&adev->grbm_idx_mutex);
1875 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1876 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1877 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1878 data = gfx_v9_0_get_rb_active_bitmap(adev);
1879 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1880 rb_bitmap_width_per_sh);
1883 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1884 mutex_unlock(&adev->grbm_idx_mutex);
1886 adev->gfx.config.backend_enable_mask = active_rbs;
1887 adev->gfx.config.num_rbs = hweight32(active_rbs);
1890 #define DEFAULT_SH_MEM_BASES (0x6000)
1891 #define FIRST_COMPUTE_VMID (8)
1892 #define LAST_COMPUTE_VMID (16)
1893 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1896 uint32_t sh_mem_config;
1897 uint32_t sh_mem_bases;
1900 * Configure apertures:
1901 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
1902 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
1903 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
1905 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1907 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1908 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1909 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1911 mutex_lock(&adev->srbm_mutex);
1912 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1913 soc15_grbm_select(adev, 0, 0, 0, i);
1914 /* CP and shaders */
1915 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1916 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1918 soc15_grbm_select(adev, 0, 0, 0, 0);
1919 mutex_unlock(&adev->srbm_mutex);
1922 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1927 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1929 gfx_v9_0_tiling_mode_table_init(adev);
1931 gfx_v9_0_setup_rb(adev);
1932 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1933 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1935 /* XXX SH_MEM regs */
1936 /* where to put LDS, scratch, GPUVM in FSA64 space */
1937 mutex_lock(&adev->srbm_mutex);
1938 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1939 soc15_grbm_select(adev, 0, 0, 0, i);
1940 /* CP and shaders */
1942 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1943 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1944 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1945 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1947 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1948 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1949 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1950 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1951 (adev->gmc.private_aperture_start >> 48));
1952 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1953 (adev->gmc.shared_aperture_start >> 48));
1954 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1957 soc15_grbm_select(adev, 0, 0, 0, 0);
1959 mutex_unlock(&adev->srbm_mutex);
1961 gfx_v9_0_init_compute_vmid(adev);
1963 mutex_lock(&adev->grbm_idx_mutex);
1965 * making sure that the following register writes will be broadcasted
1966 * to all the shaders
1968 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1970 WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
1971 (adev->gfx.config.sc_prim_fifo_size_frontend <<
1972 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1973 (adev->gfx.config.sc_prim_fifo_size_backend <<
1974 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1975 (adev->gfx.config.sc_hiz_tile_fifo_size <<
1976 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1977 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1978 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1979 mutex_unlock(&adev->grbm_idx_mutex);
1983 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1988 mutex_lock(&adev->grbm_idx_mutex);
1989 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1990 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1991 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1992 for (k = 0; k < adev->usec_timeout; k++) {
1993 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1997 if (k == adev->usec_timeout) {
1998 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1999 0xffffffff, 0xffffffff);
2000 mutex_unlock(&adev->grbm_idx_mutex);
2001 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2007 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2008 mutex_unlock(&adev->grbm_idx_mutex);
2010 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2011 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2012 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2013 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2014 for (k = 0; k < adev->usec_timeout; k++) {
2015 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2021 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2024 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2026 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2027 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2028 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2029 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2031 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2034 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2037 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2038 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2039 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2040 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2041 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2042 adev->gfx.rlc.clear_state_size);
2045 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2046 int indirect_offset,
2048 int *unique_indirect_regs,
2049 int unique_indirect_reg_count,
2050 int *indirect_start_offsets,
2051 int *indirect_start_offsets_count,
2052 int max_start_offsets_count)
2056 for (; indirect_offset < list_size; indirect_offset++) {
2057 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2058 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2059 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2061 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2062 indirect_offset += 2;
2064 /* look for the matching indice */
2065 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2066 if (unique_indirect_regs[idx] ==
2067 register_list_format[indirect_offset] ||
2068 !unique_indirect_regs[idx])
2072 BUG_ON(idx >= unique_indirect_reg_count);
2074 if (!unique_indirect_regs[idx])
2075 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2082 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2084 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2085 int unique_indirect_reg_count = 0;
2087 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2088 int indirect_start_offsets_count = 0;
2094 u32 *register_list_format =
2095 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2096 if (!register_list_format)
2098 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
2099 adev->gfx.rlc.reg_list_format_size_bytes);
2101 /* setup unique_indirect_regs array and indirect_start_offsets array */
2102 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2103 gfx_v9_1_parse_ind_reg_list(register_list_format,
2104 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2105 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2106 unique_indirect_regs,
2107 unique_indirect_reg_count,
2108 indirect_start_offsets,
2109 &indirect_start_offsets_count,
2110 ARRAY_SIZE(indirect_start_offsets));
2112 /* enable auto inc in case it is disabled */
2113 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2114 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2115 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2117 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2118 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2119 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2120 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2121 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2122 adev->gfx.rlc.register_restore[i]);
2124 /* load indirect register */
2125 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2126 adev->gfx.rlc.reg_list_format_start);
2128 /* direct register portion */
2129 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2130 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2131 register_list_format[i]);
2133 /* indirect register portion */
2134 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2135 if (register_list_format[i] == 0xFFFFFFFF) {
2136 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2140 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2141 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2143 for (j = 0; j < unique_indirect_reg_count; j++) {
2144 if (register_list_format[i] == unique_indirect_regs[j]) {
2145 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2150 BUG_ON(j >= unique_indirect_reg_count);
2155 /* set save/restore list size */
2156 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2157 list_size = list_size >> 1;
2158 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2159 adev->gfx.rlc.reg_restore_list_size);
2160 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2162 /* write the starting offsets to RLC scratch ram */
2163 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2164 adev->gfx.rlc.starting_offsets_start);
2165 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2166 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2167 indirect_start_offsets[i]);
2169 /* load unique indirect regs*/
2170 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2171 if (unique_indirect_regs[i] != 0) {
2172 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2173 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2174 unique_indirect_regs[i] & 0x3FFFF);
2176 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2177 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2178 unique_indirect_regs[i] >> 20);
2182 kfree(register_list_format);
2186 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2188 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2191 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2195 uint32_t default_data = 0;
2197 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2198 if (enable == true) {
2199 /* enable GFXIP control over CGPG */
2200 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2201 if(default_data != data)
2202 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2205 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2206 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2207 if(default_data != data)
2208 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2210 /* restore GFXIP control over GCPG */
2211 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2212 if(default_data != data)
2213 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2217 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2221 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2222 AMD_PG_SUPPORT_GFX_SMG |
2223 AMD_PG_SUPPORT_GFX_DMG)) {
2224 /* init IDLE_POLL_COUNT = 60 */
2225 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2226 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2227 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2228 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2230 /* init RLC PG Delay */
2232 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2233 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2234 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2235 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2236 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2238 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2239 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2240 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2241 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2243 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2244 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2245 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2246 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2248 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2249 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2251 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2252 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2253 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2255 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2259 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2263 uint32_t default_data = 0;
2265 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2266 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2267 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2269 if (default_data != data)
2270 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2273 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2277 uint32_t default_data = 0;
2279 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2280 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2281 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2283 if(default_data != data)
2284 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2287 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2291 uint32_t default_data = 0;
2293 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2294 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2297 if(default_data != data)
2298 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2301 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2304 uint32_t data, default_data;
2306 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2307 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2308 GFX_POWER_GATING_ENABLE,
2310 if(default_data != data)
2311 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2314 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2317 uint32_t data, default_data;
2319 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2320 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2321 GFX_PIPELINE_PG_ENABLE,
2323 if(default_data != data)
2324 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2327 /* read any GFX register to wake up GFX */
2328 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2331 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2334 uint32_t data, default_data;
2336 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2337 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2338 STATIC_PER_CU_PG_ENABLE,
2340 if(default_data != data)
2341 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2344 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2347 uint32_t data, default_data;
2349 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2350 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2351 DYN_PER_CU_PG_ENABLE,
2353 if(default_data != data)
2354 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2357 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2359 gfx_v9_0_init_csb(adev);
2362 * Rlc save restore list is workable since v2_1.
2363 * And it's needed by gfxoff feature.
2365 if (adev->gfx.rlc.is_rlc_v2_1) {
2366 gfx_v9_1_init_rlc_save_restore_list(adev);
2367 gfx_v9_0_enable_save_restore_machine(adev);
2370 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2371 AMD_PG_SUPPORT_GFX_SMG |
2372 AMD_PG_SUPPORT_GFX_DMG |
2374 AMD_PG_SUPPORT_GDS |
2375 AMD_PG_SUPPORT_RLC_SMU_HS)) {
2376 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2377 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2378 gfx_v9_0_init_gfx_power_gating(adev);
2382 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2384 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2385 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2386 gfx_v9_0_wait_for_rlc_serdes(adev);
2389 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2391 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2393 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2397 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2399 #ifdef AMDGPU_RLC_DEBUG_RETRY
2403 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2406 /* carrizo do enable cp interrupt after cp inited */
2407 if (!(adev->flags & AMD_IS_APU)) {
2408 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2412 #ifdef AMDGPU_RLC_DEBUG_RETRY
2413 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2414 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2415 if(rlc_ucode_ver == 0x108) {
2416 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2417 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2418 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2419 * default is 0x9C4 to create a 100us interval */
2420 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2421 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2422 * to disable the page fault retry interrupts, default is
2424 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2429 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2431 const struct rlc_firmware_header_v2_0 *hdr;
2432 const __le32 *fw_data;
2433 unsigned i, fw_size;
2435 if (!adev->gfx.rlc_fw)
2438 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2439 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2441 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2442 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2443 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2445 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2446 RLCG_UCODE_LOADING_START_ADDRESS);
2447 for (i = 0; i < fw_size; i++)
2448 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2449 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2454 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2458 if (amdgpu_sriov_vf(adev)) {
2459 gfx_v9_0_init_csb(adev);
2463 adev->gfx.rlc.funcs->stop(adev);
2466 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2468 gfx_v9_0_init_pg(adev);
2470 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2471 /* legacy rlc firmware loading */
2472 r = gfx_v9_0_rlc_load_microcode(adev);
2477 switch (adev->asic_type) {
2479 if (amdgpu_lbpw == 0)
2480 gfx_v9_0_enable_lbpw(adev, false);
2482 gfx_v9_0_enable_lbpw(adev, true);
2485 if (amdgpu_lbpw > 0)
2486 gfx_v9_0_enable_lbpw(adev, true);
2488 gfx_v9_0_enable_lbpw(adev, false);
2494 adev->gfx.rlc.funcs->start(adev);
2499 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2502 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2504 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2505 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2506 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2508 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2509 adev->gfx.gfx_ring[i].sched.ready = false;
2511 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2515 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2517 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2518 const struct gfx_firmware_header_v1_0 *ce_hdr;
2519 const struct gfx_firmware_header_v1_0 *me_hdr;
2520 const __le32 *fw_data;
2521 unsigned i, fw_size;
2523 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2526 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2527 adev->gfx.pfp_fw->data;
2528 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2529 adev->gfx.ce_fw->data;
2530 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2531 adev->gfx.me_fw->data;
2533 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2534 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2535 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2537 gfx_v9_0_cp_gfx_enable(adev, false);
2540 fw_data = (const __le32 *)
2541 (adev->gfx.pfp_fw->data +
2542 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2543 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2544 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2545 for (i = 0; i < fw_size; i++)
2546 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2547 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2550 fw_data = (const __le32 *)
2551 (adev->gfx.ce_fw->data +
2552 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2553 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2554 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2555 for (i = 0; i < fw_size; i++)
2556 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2557 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2560 fw_data = (const __le32 *)
2561 (adev->gfx.me_fw->data +
2562 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2563 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2564 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2565 for (i = 0; i < fw_size; i++)
2566 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2567 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2572 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2574 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2575 const struct cs_section_def *sect = NULL;
2576 const struct cs_extent_def *ext = NULL;
2580 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2581 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2583 gfx_v9_0_cp_gfx_enable(adev, true);
2585 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2587 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2591 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2592 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2594 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2595 amdgpu_ring_write(ring, 0x80000000);
2596 amdgpu_ring_write(ring, 0x80000000);
2598 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2599 for (ext = sect->section; ext->extent != NULL; ++ext) {
2600 if (sect->id == SECT_CONTEXT) {
2601 amdgpu_ring_write(ring,
2602 PACKET3(PACKET3_SET_CONTEXT_REG,
2604 amdgpu_ring_write(ring,
2605 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2606 for (i = 0; i < ext->reg_count; i++)
2607 amdgpu_ring_write(ring, ext->extent[i]);
2612 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2613 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2615 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2616 amdgpu_ring_write(ring, 0);
2618 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2619 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2620 amdgpu_ring_write(ring, 0x8000);
2621 amdgpu_ring_write(ring, 0x8000);
2623 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2624 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2625 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2626 amdgpu_ring_write(ring, tmp);
2627 amdgpu_ring_write(ring, 0);
2629 amdgpu_ring_commit(ring);
2634 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2636 struct amdgpu_ring *ring;
2639 u64 rb_addr, rptr_addr, wptr_gpu_addr;
2641 /* Set the write pointer delay */
2642 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2644 /* set the RB to use vmid 0 */
2645 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2647 /* Set ring buffer size */
2648 ring = &adev->gfx.gfx_ring[0];
2649 rb_bufsz = order_base_2(ring->ring_size / 8);
2650 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2651 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2653 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2655 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2657 /* Initialize the ring buffer's write pointers */
2659 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2660 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2662 /* set the wb address wether it's enabled or not */
2663 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2664 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2665 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2667 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2668 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2669 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2672 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2674 rb_addr = ring->gpu_addr >> 8;
2675 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2676 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2678 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2679 if (ring->use_doorbell) {
2680 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2681 DOORBELL_OFFSET, ring->doorbell_index);
2682 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2685 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2687 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2689 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2690 DOORBELL_RANGE_LOWER, ring->doorbell_index);
2691 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2693 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2694 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2697 /* start the ring */
2698 gfx_v9_0_cp_gfx_start(adev);
2699 ring->sched.ready = true;
2704 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2709 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2711 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2712 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2713 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2714 adev->gfx.compute_ring[i].sched.ready = false;
2715 adev->gfx.kiq.ring.sched.ready = false;
2720 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2722 const struct gfx_firmware_header_v1_0 *mec_hdr;
2723 const __le32 *fw_data;
2727 if (!adev->gfx.mec_fw)
2730 gfx_v9_0_cp_compute_enable(adev, false);
2732 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2733 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2735 fw_data = (const __le32 *)
2736 (adev->gfx.mec_fw->data +
2737 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2739 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2740 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2741 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2743 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2744 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2745 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2746 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2749 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2750 mec_hdr->jt_offset);
2751 for (i = 0; i < mec_hdr->jt_size; i++)
2752 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2753 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2755 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2756 adev->gfx.mec_fw_version);
2757 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2763 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2766 struct amdgpu_device *adev = ring->adev;
2768 /* tell RLC which is KIQ queue */
2769 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2771 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2772 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2774 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2777 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2779 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2780 uint64_t queue_mask = 0;
2783 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2784 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2787 /* This situation may be hit in the future if a new HW
2788 * generation exposes more than 64 queues. If so, the
2789 * definition of queue_mask needs updating */
2790 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2791 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2795 queue_mask |= (1ull << i);
2798 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2800 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2805 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2806 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2807 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2808 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2809 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2810 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2811 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2812 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2813 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2814 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2815 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2816 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2817 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2819 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2820 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2821 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2822 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2823 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2824 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2825 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2826 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2827 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2828 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2829 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2830 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2831 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2832 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2833 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2834 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2835 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2838 r = amdgpu_ring_test_helper(kiq_ring);
2840 DRM_ERROR("KCQ enable failed\n");
2845 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2847 struct amdgpu_device *adev = ring->adev;
2848 struct v9_mqd *mqd = ring->mqd_ptr;
2849 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2852 mqd->header = 0xC0310800;
2853 mqd->compute_pipelinestat_enable = 0x00000001;
2854 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2855 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2856 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2857 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2858 mqd->compute_misc_reserved = 0x00000003;
2860 mqd->dynamic_cu_mask_addr_lo =
2861 lower_32_bits(ring->mqd_gpu_addr
2862 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2863 mqd->dynamic_cu_mask_addr_hi =
2864 upper_32_bits(ring->mqd_gpu_addr
2865 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2867 eop_base_addr = ring->eop_gpu_addr >> 8;
2868 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2869 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2871 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2872 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2873 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2874 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2876 mqd->cp_hqd_eop_control = tmp;
2878 /* enable doorbell? */
2879 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2881 if (ring->use_doorbell) {
2882 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2883 DOORBELL_OFFSET, ring->doorbell_index);
2884 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2886 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2887 DOORBELL_SOURCE, 0);
2888 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2891 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2895 mqd->cp_hqd_pq_doorbell_control = tmp;
2897 /* disable the queue if it's active */
2899 mqd->cp_hqd_dequeue_request = 0;
2900 mqd->cp_hqd_pq_rptr = 0;
2901 mqd->cp_hqd_pq_wptr_lo = 0;
2902 mqd->cp_hqd_pq_wptr_hi = 0;
2904 /* set the pointer to the MQD */
2905 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2906 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2908 /* set MQD vmid to 0 */
2909 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2910 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2911 mqd->cp_mqd_control = tmp;
2913 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2914 hqd_gpu_addr = ring->gpu_addr >> 8;
2915 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2916 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2918 /* set up the HQD, this is similar to CP_RB0_CNTL */
2919 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2920 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2921 (order_base_2(ring->ring_size / 4) - 1));
2922 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2923 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2925 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2927 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2928 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2929 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2930 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2931 mqd->cp_hqd_pq_control = tmp;
2933 /* set the wb address whether it's enabled or not */
2934 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2935 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2936 mqd->cp_hqd_pq_rptr_report_addr_hi =
2937 upper_32_bits(wb_gpu_addr) & 0xffff;
2939 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2940 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2941 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2942 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2945 /* enable the doorbell if requested */
2946 if (ring->use_doorbell) {
2947 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2948 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2949 DOORBELL_OFFSET, ring->doorbell_index);
2951 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2953 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2954 DOORBELL_SOURCE, 0);
2955 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2959 mqd->cp_hqd_pq_doorbell_control = tmp;
2961 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2963 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2965 /* set the vmid for the queue */
2966 mqd->cp_hqd_vmid = 0;
2968 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2969 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2970 mqd->cp_hqd_persistent_state = tmp;
2972 /* set MIN_IB_AVAIL_SIZE */
2973 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2974 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2975 mqd->cp_hqd_ib_control = tmp;
2977 /* activate the queue */
2978 mqd->cp_hqd_active = 1;
2983 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2985 struct amdgpu_device *adev = ring->adev;
2986 struct v9_mqd *mqd = ring->mqd_ptr;
2989 /* disable wptr polling */
2990 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2992 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2993 mqd->cp_hqd_eop_base_addr_lo);
2994 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2995 mqd->cp_hqd_eop_base_addr_hi);
2997 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2998 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2999 mqd->cp_hqd_eop_control);
3001 /* enable doorbell? */
3002 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3003 mqd->cp_hqd_pq_doorbell_control);
3005 /* disable the queue if it's active */
3006 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3007 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3008 for (j = 0; j < adev->usec_timeout; j++) {
3009 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3013 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3014 mqd->cp_hqd_dequeue_request);
3015 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3016 mqd->cp_hqd_pq_rptr);
3017 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3018 mqd->cp_hqd_pq_wptr_lo);
3019 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3020 mqd->cp_hqd_pq_wptr_hi);
3023 /* set the pointer to the MQD */
3024 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3025 mqd->cp_mqd_base_addr_lo);
3026 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3027 mqd->cp_mqd_base_addr_hi);
3029 /* set MQD vmid to 0 */
3030 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3031 mqd->cp_mqd_control);
3033 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3034 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3035 mqd->cp_hqd_pq_base_lo);
3036 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3037 mqd->cp_hqd_pq_base_hi);
3039 /* set up the HQD, this is similar to CP_RB0_CNTL */
3040 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3041 mqd->cp_hqd_pq_control);
3043 /* set the wb address whether it's enabled or not */
3044 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3045 mqd->cp_hqd_pq_rptr_report_addr_lo);
3046 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3047 mqd->cp_hqd_pq_rptr_report_addr_hi);
3049 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3050 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3051 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3052 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3053 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3055 /* enable the doorbell if requested */
3056 if (ring->use_doorbell) {
3057 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3058 (adev->doorbell_index.kiq * 2) << 2);
3059 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3060 (adev->doorbell_index.userqueue_end * 2) << 2);
3063 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3064 mqd->cp_hqd_pq_doorbell_control);
3066 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3067 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3068 mqd->cp_hqd_pq_wptr_lo);
3069 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3070 mqd->cp_hqd_pq_wptr_hi);
3072 /* set the vmid for the queue */
3073 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3075 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3076 mqd->cp_hqd_persistent_state);
3078 /* activate the queue */
3079 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3080 mqd->cp_hqd_active);
3082 if (ring->use_doorbell)
3083 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3088 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3090 struct amdgpu_device *adev = ring->adev;
3093 /* disable the queue if it's active */
3094 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3096 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3098 for (j = 0; j < adev->usec_timeout; j++) {
3099 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3104 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3105 DRM_DEBUG("KIQ dequeue request failed.\n");
3107 /* Manual disable if dequeue request times out */
3108 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3111 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3115 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3116 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3117 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3118 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3119 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3120 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3121 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3122 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3127 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3129 struct amdgpu_device *adev = ring->adev;
3130 struct v9_mqd *mqd = ring->mqd_ptr;
3131 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3133 gfx_v9_0_kiq_setting(ring);
3135 if (adev->in_gpu_reset) { /* for GPU_RESET case */
3136 /* reset MQD to a clean status */
3137 if (adev->gfx.mec.mqd_backup[mqd_idx])
3138 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3140 /* reset ring buffer */
3142 amdgpu_ring_clear_ring(ring);
3144 mutex_lock(&adev->srbm_mutex);
3145 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3146 gfx_v9_0_kiq_init_register(ring);
3147 soc15_grbm_select(adev, 0, 0, 0, 0);
3148 mutex_unlock(&adev->srbm_mutex);
3150 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3151 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3152 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3153 mutex_lock(&adev->srbm_mutex);
3154 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3155 gfx_v9_0_mqd_init(ring);
3156 gfx_v9_0_kiq_init_register(ring);
3157 soc15_grbm_select(adev, 0, 0, 0, 0);
3158 mutex_unlock(&adev->srbm_mutex);
3160 if (adev->gfx.mec.mqd_backup[mqd_idx])
3161 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3167 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3169 struct amdgpu_device *adev = ring->adev;
3170 struct v9_mqd *mqd = ring->mqd_ptr;
3171 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3173 if (!adev->in_gpu_reset && !adev->in_suspend) {
3174 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3175 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3176 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3177 mutex_lock(&adev->srbm_mutex);
3178 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3179 gfx_v9_0_mqd_init(ring);
3180 soc15_grbm_select(adev, 0, 0, 0, 0);
3181 mutex_unlock(&adev->srbm_mutex);
3183 if (adev->gfx.mec.mqd_backup[mqd_idx])
3184 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3185 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3186 /* reset MQD to a clean status */
3187 if (adev->gfx.mec.mqd_backup[mqd_idx])
3188 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3190 /* reset ring buffer */
3192 amdgpu_ring_clear_ring(ring);
3194 amdgpu_ring_clear_ring(ring);
3200 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3202 struct amdgpu_ring *ring;
3205 ring = &adev->gfx.kiq.ring;
3207 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3208 if (unlikely(r != 0))
3211 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3212 if (unlikely(r != 0))
3215 gfx_v9_0_kiq_init_queue(ring);
3216 amdgpu_bo_kunmap(ring->mqd_obj);
3217 ring->mqd_ptr = NULL;
3218 amdgpu_bo_unreserve(ring->mqd_obj);
3219 ring->sched.ready = true;
3223 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3225 struct amdgpu_ring *ring = NULL;
3228 gfx_v9_0_cp_compute_enable(adev, true);
3230 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3231 ring = &adev->gfx.compute_ring[i];
3233 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3234 if (unlikely(r != 0))
3236 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3238 r = gfx_v9_0_kcq_init_queue(ring);
3239 amdgpu_bo_kunmap(ring->mqd_obj);
3240 ring->mqd_ptr = NULL;
3242 amdgpu_bo_unreserve(ring->mqd_obj);
3247 r = gfx_v9_0_kiq_kcq_enable(adev);
3252 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3255 struct amdgpu_ring *ring;
3257 if (!(adev->flags & AMD_IS_APU))
3258 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3260 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3261 /* legacy firmware loading */
3262 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3266 r = gfx_v9_0_cp_compute_load_microcode(adev);
3271 r = gfx_v9_0_kiq_resume(adev);
3275 r = gfx_v9_0_cp_gfx_resume(adev);
3279 r = gfx_v9_0_kcq_resume(adev);
3283 ring = &adev->gfx.gfx_ring[0];
3284 r = amdgpu_ring_test_helper(ring);
3288 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3289 ring = &adev->gfx.compute_ring[i];
3290 amdgpu_ring_test_helper(ring);
3293 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3298 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3300 gfx_v9_0_cp_gfx_enable(adev, enable);
3301 gfx_v9_0_cp_compute_enable(adev, enable);
3304 static int gfx_v9_0_hw_init(void *handle)
3307 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3309 gfx_v9_0_init_golden_registers(adev);
3311 gfx_v9_0_constants_init(adev);
3313 r = gfx_v9_0_csb_vram_pin(adev);
3317 r = adev->gfx.rlc.funcs->resume(adev);
3321 r = gfx_v9_0_cp_resume(adev);
3325 r = gfx_v9_0_ngg_en(adev);
3332 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3335 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3337 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3339 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3341 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3342 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3344 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3345 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3346 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3347 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3348 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3349 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3350 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3351 amdgpu_ring_write(kiq_ring, 0);
3352 amdgpu_ring_write(kiq_ring, 0);
3353 amdgpu_ring_write(kiq_ring, 0);
3355 r = amdgpu_ring_test_helper(kiq_ring);
3357 DRM_ERROR("KCQ disable failed\n");
3362 static int gfx_v9_0_hw_fini(void *handle)
3364 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3366 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3367 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3368 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3370 /* disable KCQ to avoid CPC touch memory not valid anymore */
3371 gfx_v9_0_kcq_disable(adev);
3373 if (amdgpu_sriov_vf(adev)) {
3374 gfx_v9_0_cp_gfx_enable(adev, false);
3375 /* must disable polling for SRIOV when hw finished, otherwise
3376 * CPC engine may still keep fetching WB address which is already
3377 * invalid after sw finished and trigger DMAR reading error in
3380 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3384 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3385 * otherwise KIQ is hanging when binding back
3387 if (!adev->in_gpu_reset && !adev->in_suspend) {
3388 mutex_lock(&adev->srbm_mutex);
3389 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3390 adev->gfx.kiq.ring.pipe,
3391 adev->gfx.kiq.ring.queue, 0);
3392 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3393 soc15_grbm_select(adev, 0, 0, 0, 0);
3394 mutex_unlock(&adev->srbm_mutex);
3397 gfx_v9_0_cp_enable(adev, false);
3398 adev->gfx.rlc.funcs->stop(adev);
3400 gfx_v9_0_csb_vram_unpin(adev);
3405 static int gfx_v9_0_suspend(void *handle)
3407 return gfx_v9_0_hw_fini(handle);
3410 static int gfx_v9_0_resume(void *handle)
3412 return gfx_v9_0_hw_init(handle);
3415 static bool gfx_v9_0_is_idle(void *handle)
3417 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3419 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3420 GRBM_STATUS, GUI_ACTIVE))
3426 static int gfx_v9_0_wait_for_idle(void *handle)
3429 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3431 for (i = 0; i < adev->usec_timeout; i++) {
3432 if (gfx_v9_0_is_idle(handle))
3439 static int gfx_v9_0_soft_reset(void *handle)
3441 u32 grbm_soft_reset = 0;
3443 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3446 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3447 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3448 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3449 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3450 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3451 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3452 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3453 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3454 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3455 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3456 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3459 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3460 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3461 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3465 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3466 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3467 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3468 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3471 if (grbm_soft_reset) {
3473 adev->gfx.rlc.funcs->stop(adev);
3475 /* Disable GFX parsing/prefetching */
3476 gfx_v9_0_cp_gfx_enable(adev, false);
3478 /* Disable MEC parsing/prefetching */
3479 gfx_v9_0_cp_compute_enable(adev, false);
3481 if (grbm_soft_reset) {
3482 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3483 tmp |= grbm_soft_reset;
3484 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3485 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3486 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3490 tmp &= ~grbm_soft_reset;
3491 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3492 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3495 /* Wait a little for things to settle down */
3501 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3505 mutex_lock(&adev->gfx.gpu_clock_mutex);
3506 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3507 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3508 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3509 mutex_unlock(&adev->gfx.gpu_clock_mutex);
3513 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3515 uint32_t gds_base, uint32_t gds_size,
3516 uint32_t gws_base, uint32_t gws_size,
3517 uint32_t oa_base, uint32_t oa_size)
3519 struct amdgpu_device *adev = ring->adev;
3522 gfx_v9_0_write_data_to_reg(ring, 0, false,
3523 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3527 gfx_v9_0_write_data_to_reg(ring, 0, false,
3528 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3532 gfx_v9_0_write_data_to_reg(ring, 0, false,
3533 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3534 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3537 gfx_v9_0_write_data_to_reg(ring, 0, false,
3538 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3539 (1 << (oa_size + oa_base)) - (1 << oa_base));
3542 static const u32 vgpr_init_compute_shader[] =
3544 0xb07c0000, 0xbe8000ff,
3545 0x000000f8, 0xbf110800,
3546 0x7e000280, 0x7e020280,
3547 0x7e040280, 0x7e060280,
3548 0x7e080280, 0x7e0a0280,
3549 0x7e0c0280, 0x7e0e0280,
3550 0x80808800, 0xbe803200,
3551 0xbf84fff5, 0xbf9c0000,
3552 0xd28c0001, 0x0001007f,
3553 0xd28d0001, 0x0002027e,
3554 0x10020288, 0xb8810904,
3555 0xb7814000, 0xd1196a01,
3556 0x00000301, 0xbe800087,
3557 0xbefc00c1, 0xd89c4000,
3558 0x00020201, 0xd89cc080,
3559 0x00040401, 0x320202ff,
3560 0x00000800, 0x80808100,
3561 0xbf84fff8, 0x7e020280,
3562 0xbf810000, 0x00000000,
3565 static const u32 sgpr_init_compute_shader[] =
3567 0xb07c0000, 0xbe8000ff,
3568 0x0000005f, 0xbee50080,
3569 0xbe812c65, 0xbe822c65,
3570 0xbe832c65, 0xbe842c65,
3571 0xbe852c65, 0xb77c0005,
3572 0x80808500, 0xbf84fff8,
3573 0xbe800080, 0xbf810000,
3576 static const struct soc15_reg_entry vgpr_init_regs[] = {
3577 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3578 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3579 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3580 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3581 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3582 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3583 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3584 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3585 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3586 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
3589 static const struct soc15_reg_entry sgpr_init_regs[] = {
3590 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3591 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3592 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3593 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3594 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3595 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3596 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3597 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3598 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3599 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3602 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3603 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
3604 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
3605 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
3606 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
3607 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
3608 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
3609 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
3610 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
3611 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
3612 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
3613 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
3614 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
3615 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
3616 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
3617 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
3618 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
3619 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
3620 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
3621 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
3622 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
3623 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
3624 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
3625 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
3626 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
3627 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
3628 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
3629 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
3630 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
3631 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
3632 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
3633 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
3636 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3638 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3639 struct amdgpu_ib ib;
3640 struct dma_fence *f = NULL;
3642 unsigned total_size, vgpr_offset, sgpr_offset;
3645 /* only support when RAS is enabled */
3646 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3649 /* bail if the compute ring is not ready */
3650 if (!ring->sched.ready)
3654 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3656 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3657 total_size = ALIGN(total_size, 256);
3658 vgpr_offset = total_size;
3659 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3660 sgpr_offset = total_size;
3661 total_size += sizeof(sgpr_init_compute_shader);
3663 /* allocate an indirect buffer to put the commands in */
3664 memset(&ib, 0, sizeof(ib));
3665 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3667 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3671 /* load the compute shaders */
3672 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3673 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3675 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3676 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3678 /* init the ib length to 0 */
3682 /* write the register state for the compute dispatch */
3683 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3684 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3685 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3686 - PACKET3_SET_SH_REG_START;
3687 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3689 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3690 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3691 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3692 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3693 - PACKET3_SET_SH_REG_START;
3694 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3695 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3697 /* write dispatch packet */
3698 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3699 ib.ptr[ib.length_dw++] = 128; /* x */
3700 ib.ptr[ib.length_dw++] = 1; /* y */
3701 ib.ptr[ib.length_dw++] = 1; /* z */
3702 ib.ptr[ib.length_dw++] =
3703 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3705 /* write CS partial flush packet */
3706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3707 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3710 /* write the register state for the compute dispatch */
3711 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3713 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3714 - PACKET3_SET_SH_REG_START;
3715 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3717 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3718 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3719 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3720 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3721 - PACKET3_SET_SH_REG_START;
3722 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3723 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3725 /* write dispatch packet */
3726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3727 ib.ptr[ib.length_dw++] = 128; /* x */
3728 ib.ptr[ib.length_dw++] = 1; /* y */
3729 ib.ptr[ib.length_dw++] = 1; /* z */
3730 ib.ptr[ib.length_dw++] =
3731 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3733 /* write CS partial flush packet */
3734 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3735 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3737 /* shedule the ib on the ring */
3738 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3740 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3744 /* wait for the GPU to finish processing the IB */
3745 r = dma_fence_wait(f, false);
3747 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3751 /* read back registers to clear the counters */
3752 mutex_lock(&adev->grbm_idx_mutex);
3753 for (j = 0; j < 16; j++) {
3754 gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
3755 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3756 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3757 gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
3758 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3759 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3760 gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
3761 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3762 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3763 gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
3764 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3765 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3767 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3768 mutex_unlock(&adev->grbm_idx_mutex);
3771 amdgpu_ib_free(adev, &ib, NULL);
3777 static int gfx_v9_0_early_init(void *handle)
3779 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3781 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3782 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3783 gfx_v9_0_set_ring_funcs(adev);
3784 gfx_v9_0_set_irq_funcs(adev);
3785 gfx_v9_0_set_gds_init(adev);
3786 gfx_v9_0_set_rlc_funcs(adev);
3791 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3792 struct amdgpu_iv_entry *entry);
3794 static int gfx_v9_0_ecc_late_init(void *handle)
3796 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3797 struct ras_common_if **ras_if = &adev->gfx.ras_if;
3798 struct ras_ih_if ih_info = {
3799 .cb = gfx_v9_0_process_ras_data_cb,
3801 struct ras_fs_if fs_info = {
3802 .sysfs_name = "gfx_err_count",
3803 .debugfs_name = "gfx_err_inject",
3805 struct ras_common_if ras_block = {
3806 .block = AMDGPU_RAS_BLOCK__GFX,
3807 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3808 .sub_block_index = 0,
3813 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3814 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3818 /* requires IBs so do in late init after IB pool is initialized */
3819 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3823 /* handle resume path. */
3825 /* resend ras TA enable cmd during resume.
3826 * prepare to handle failure.
3828 ih_info.head = **ras_if;
3829 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3832 /* request a gpu reset. will run again. */
3833 amdgpu_ras_request_reset_on_boot(adev,
3834 AMDGPU_RAS_BLOCK__GFX);
3837 /* fail to enable ras, cleanup all. */
3840 /* enable successfully. continue. */
3844 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3848 **ras_if = ras_block;
3850 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3853 amdgpu_ras_request_reset_on_boot(adev,
3854 AMDGPU_RAS_BLOCK__GFX);
3860 ih_info.head = **ras_if;
3861 fs_info.head = **ras_if;
3863 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3867 r = amdgpu_ras_debugfs_create(adev, &fs_info);
3871 r = amdgpu_ras_sysfs_create(adev, &fs_info);
3875 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3881 amdgpu_ras_sysfs_remove(adev, *ras_if);
3883 amdgpu_ras_debugfs_remove(adev, *ras_if);
3885 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3887 amdgpu_ras_feature_enable(adev, *ras_if, 0);
3894 static int gfx_v9_0_late_init(void *handle)
3896 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3899 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3903 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3907 r = gfx_v9_0_ecc_late_init(handle);
3914 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3916 uint32_t rlc_setting;
3918 /* if RLC is not enabled, do nothing */
3919 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3920 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3926 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3931 data = RLC_SAFE_MODE__CMD_MASK;
3932 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3933 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3935 /* wait for RLC_SAFE_MODE */
3936 for (i = 0; i < adev->usec_timeout; i++) {
3937 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3943 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3947 data = RLC_SAFE_MODE__CMD_MASK;
3948 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3951 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3954 amdgpu_gfx_rlc_enter_safe_mode(adev);
3956 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3957 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3958 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3959 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3961 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3962 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3965 amdgpu_gfx_rlc_exit_safe_mode(adev);
3968 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3971 /* TODO: double check if we need to perform under safe mode */
3972 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3974 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3975 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3977 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3979 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
3980 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
3982 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
3984 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
3987 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3992 amdgpu_gfx_rlc_enter_safe_mode(adev);
3994 /* It is disabled by HW by default */
3995 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3996 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
3997 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3999 if (adev->asic_type != CHIP_VEGA12)
4000 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4002 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4003 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4004 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4006 /* only for Vega10 & Raven1 */
4007 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4010 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4012 /* MGLS is a global flag to control all MGLS in GFX */
4013 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4014 /* 2 - RLC memory Light sleep */
4015 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4016 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4017 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4019 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4021 /* 3 - CP memory Light sleep */
4022 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4023 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4024 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4026 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4030 /* 1 - MGCG_OVERRIDE */
4031 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4033 if (adev->asic_type != CHIP_VEGA12)
4034 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4036 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4037 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4038 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4039 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4042 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4044 /* 2 - disable MGLS in RLC */
4045 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4046 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4047 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4048 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4051 /* 3 - disable MGLS in CP */
4052 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4053 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4054 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4055 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4059 amdgpu_gfx_rlc_exit_safe_mode(adev);
4062 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4067 amdgpu_gfx_rlc_enter_safe_mode(adev);
4069 /* Enable 3D CGCG/CGLS */
4070 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4071 /* write cmd to clear cgcg/cgls ov */
4072 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4073 /* unset CGCG override */
4074 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4075 /* update CGCG and CGLS override bits */
4077 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4079 /* enable 3Dcgcg FSM(0x0000363f) */
4080 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4082 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4083 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4084 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4085 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4086 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4088 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4090 /* set IDLE_POLL_COUNT(0x00900100) */
4091 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4092 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4093 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4095 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4097 /* Disable CGCG/CGLS */
4098 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4099 /* disable cgcg, cgls should be disabled */
4100 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4101 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4102 /* disable cgcg and cgls in FSM */
4104 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4107 amdgpu_gfx_rlc_exit_safe_mode(adev);
4110 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4115 amdgpu_gfx_rlc_enter_safe_mode(adev);
4117 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4118 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4119 /* unset CGCG override */
4120 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4121 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4122 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4124 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4125 /* update CGCG and CGLS override bits */
4127 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4129 /* enable cgcg FSM(0x0000363F) */
4130 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4132 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4133 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4134 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4135 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4136 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4138 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4140 /* set IDLE_POLL_COUNT(0x00900100) */
4141 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4142 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4143 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4145 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4147 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4148 /* reset CGCG/CGLS bits */
4149 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4150 /* disable cgcg and cgls in FSM */
4152 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4155 amdgpu_gfx_rlc_exit_safe_mode(adev);
4158 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4162 /* CGCG/CGLS should be enabled after MGCG/MGLS
4163 * === MGCG + MGLS ===
4165 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4166 /* === CGCG /CGLS for GFX 3D Only === */
4167 gfx_v9_0_update_3d_clock_gating(adev, enable);
4168 /* === CGCG + CGLS === */
4169 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4171 /* CGCG/CGLS should be disabled before MGCG/MGLS
4172 * === CGCG + CGLS ===
4174 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4175 /* === CGCG /CGLS for GFX 3D Only === */
4176 gfx_v9_0_update_3d_clock_gating(adev, enable);
4177 /* === MGCG + MGLS === */
4178 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4183 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4184 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4185 .set_safe_mode = gfx_v9_0_set_safe_mode,
4186 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4187 .init = gfx_v9_0_rlc_init,
4188 .get_csb_size = gfx_v9_0_get_csb_size,
4189 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4190 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4191 .resume = gfx_v9_0_rlc_resume,
4192 .stop = gfx_v9_0_rlc_stop,
4193 .reset = gfx_v9_0_rlc_reset,
4194 .start = gfx_v9_0_rlc_start
4197 static int gfx_v9_0_set_powergating_state(void *handle,
4198 enum amd_powergating_state state)
4200 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4201 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4203 switch (adev->asic_type) {
4206 amdgpu_gfx_off_ctrl(adev, false);
4207 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4209 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4210 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4211 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4213 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4214 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4217 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4218 gfx_v9_0_enable_cp_power_gating(adev, true);
4220 gfx_v9_0_enable_cp_power_gating(adev, false);
4222 /* update gfx cgpg state */
4223 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4225 /* update mgcg state */
4226 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4229 amdgpu_gfx_off_ctrl(adev, true);
4233 amdgpu_gfx_off_ctrl(adev, false);
4234 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4236 amdgpu_gfx_off_ctrl(adev, true);
4246 static int gfx_v9_0_set_clockgating_state(void *handle,
4247 enum amd_clockgating_state state)
4249 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4251 if (amdgpu_sriov_vf(adev))
4254 switch (adev->asic_type) {
4259 gfx_v9_0_update_gfx_clock_gating(adev,
4260 state == AMD_CG_STATE_GATE ? true : false);
4268 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4270 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4273 if (amdgpu_sriov_vf(adev))
4276 /* AMD_CG_SUPPORT_GFX_MGCG */
4277 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4278 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4279 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4281 /* AMD_CG_SUPPORT_GFX_CGCG */
4282 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4283 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4284 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4286 /* AMD_CG_SUPPORT_GFX_CGLS */
4287 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4288 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4290 /* AMD_CG_SUPPORT_GFX_RLC_LS */
4291 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4292 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4293 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4295 /* AMD_CG_SUPPORT_GFX_CP_LS */
4296 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4297 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4298 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4300 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4301 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4302 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4303 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4305 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4306 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4307 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4310 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4312 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4315 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4317 struct amdgpu_device *adev = ring->adev;
4320 /* XXX check if swapping is necessary on BE */
4321 if (ring->use_doorbell) {
4322 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4324 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4325 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4331 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4333 struct amdgpu_device *adev = ring->adev;
4335 if (ring->use_doorbell) {
4336 /* XXX check if swapping is necessary on BE */
4337 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4338 WDOORBELL64(ring->doorbell_index, ring->wptr);
4340 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4341 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4345 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4347 struct amdgpu_device *adev = ring->adev;
4348 u32 ref_and_mask, reg_mem_engine;
4349 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4351 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4354 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4357 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4364 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4365 reg_mem_engine = 1; /* pfp */
4368 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4369 adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4370 adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4371 ref_and_mask, ref_and_mask, 0x20);
4374 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4375 struct amdgpu_job *job,
4376 struct amdgpu_ib *ib,
4379 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4380 u32 header, control = 0;
4382 if (ib->flags & AMDGPU_IB_FLAG_CE)
4383 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4385 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4387 control |= ib->length_dw | (vmid << 24);
4389 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4390 control |= INDIRECT_BUFFER_PRE_ENB(1);
4392 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4393 gfx_v9_0_ring_emit_de_meta(ring);
4396 amdgpu_ring_write(ring, header);
4397 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4398 amdgpu_ring_write(ring,
4402 lower_32_bits(ib->gpu_addr));
4403 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4404 amdgpu_ring_write(ring, control);
4407 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4408 struct amdgpu_job *job,
4409 struct amdgpu_ib *ib,
4412 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4413 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4415 /* Currently, there is a high possibility to get wave ID mismatch
4416 * between ME and GDS, leading to a hw deadlock, because ME generates
4417 * different wave IDs than the GDS expects. This situation happens
4418 * randomly when at least 5 compute pipes use GDS ordered append.
4419 * The wave IDs generated by ME are also wrong after suspend/resume.
4420 * Those are probably bugs somewhere else in the kernel driver.
4422 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4423 * GDS to 0 for this ring (me/pipe).
4425 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4426 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4427 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4428 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4431 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4432 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4433 amdgpu_ring_write(ring,
4437 lower_32_bits(ib->gpu_addr));
4438 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4439 amdgpu_ring_write(ring, control);
4442 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4443 u64 seq, unsigned flags)
4445 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4446 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4447 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4449 /* RELEASE_MEM - flush caches, send int */
4450 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4451 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4452 EOP_TC_NC_ACTION_EN) :
4453 (EOP_TCL1_ACTION_EN |
4455 EOP_TC_WB_ACTION_EN |
4456 EOP_TC_MD_ACTION_EN)) |
4457 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4459 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4462 * the address should be Qword aligned if 64bit write, Dword
4463 * aligned if only send 32bit data low (discard data high)
4469 amdgpu_ring_write(ring, lower_32_bits(addr));
4470 amdgpu_ring_write(ring, upper_32_bits(addr));
4471 amdgpu_ring_write(ring, lower_32_bits(seq));
4472 amdgpu_ring_write(ring, upper_32_bits(seq));
4473 amdgpu_ring_write(ring, 0);
4476 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4478 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4479 uint32_t seq = ring->fence_drv.sync_seq;
4480 uint64_t addr = ring->fence_drv.gpu_addr;
4482 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4483 lower_32_bits(addr), upper_32_bits(addr),
4484 seq, 0xffffffff, 4);
4487 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4488 unsigned vmid, uint64_t pd_addr)
4490 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4492 /* compute doesn't have PFP */
4493 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4494 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4495 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4496 amdgpu_ring_write(ring, 0x0);
4500 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4502 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4505 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4509 /* XXX check if swapping is necessary on BE */
4510 if (ring->use_doorbell)
4511 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4517 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4520 struct amdgpu_device *adev = ring->adev;
4521 int pipe_num, tmp, reg;
4522 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4524 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4526 /* first me only has 2 entries, GFX and HP3D */
4530 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4532 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4536 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4537 struct amdgpu_ring *ring,
4542 struct amdgpu_ring *iring;
4544 mutex_lock(&adev->gfx.pipe_reserve_mutex);
4545 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
4547 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4549 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4551 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4552 /* Clear all reservations - everyone reacquires all resources */
4553 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4554 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4557 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4558 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4561 /* Lower all pipes without a current reservation */
4562 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4563 iring = &adev->gfx.gfx_ring[i];
4564 pipe = amdgpu_gfx_queue_to_bit(adev,
4568 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4569 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4572 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4573 iring = &adev->gfx.compute_ring[i];
4574 pipe = amdgpu_gfx_queue_to_bit(adev,
4578 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4579 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4583 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4586 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4587 struct amdgpu_ring *ring,
4590 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4591 uint32_t queue_priority = acquire ? 0xf : 0x0;
4593 mutex_lock(&adev->srbm_mutex);
4594 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4596 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4597 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4599 soc15_grbm_select(adev, 0, 0, 0, 0);
4600 mutex_unlock(&adev->srbm_mutex);
4603 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4604 enum drm_sched_priority priority)
4606 struct amdgpu_device *adev = ring->adev;
4607 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4609 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4612 gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4613 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4616 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4618 struct amdgpu_device *adev = ring->adev;
4620 /* XXX check if swapping is necessary on BE */
4621 if (ring->use_doorbell) {
4622 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4623 WDOORBELL64(ring->doorbell_index, ring->wptr);
4625 BUG(); /* only DOORBELL method supported on gfx9 now */
4629 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4630 u64 seq, unsigned int flags)
4632 struct amdgpu_device *adev = ring->adev;
4634 /* we only allocate 32bit for each seq wb address */
4635 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4637 /* write fence seq to the "addr" */
4638 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4639 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4640 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4641 amdgpu_ring_write(ring, lower_32_bits(addr));
4642 amdgpu_ring_write(ring, upper_32_bits(addr));
4643 amdgpu_ring_write(ring, lower_32_bits(seq));
4645 if (flags & AMDGPU_FENCE_FLAG_INT) {
4646 /* set register to trigger INT */
4647 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4648 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4649 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4650 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4651 amdgpu_ring_write(ring, 0);
4652 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4656 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4658 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4659 amdgpu_ring_write(ring, 0);
4662 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4664 struct v9_ce_ib_state ce_payload = {0};
4668 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4669 csa_addr = amdgpu_csa_vaddr(ring->adev);
4671 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4672 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4673 WRITE_DATA_DST_SEL(8) |
4675 WRITE_DATA_CACHE_POLICY(0));
4676 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4677 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4678 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4681 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4683 struct v9_de_ib_state de_payload = {0};
4684 uint64_t csa_addr, gds_addr;
4687 csa_addr = amdgpu_csa_vaddr(ring->adev);
4688 gds_addr = csa_addr + 4096;
4689 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4690 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4692 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4693 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4694 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4695 WRITE_DATA_DST_SEL(8) |
4697 WRITE_DATA_CACHE_POLICY(0));
4698 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4699 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4700 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4703 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4705 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4706 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4709 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4713 if (amdgpu_sriov_vf(ring->adev))
4714 gfx_v9_0_ring_emit_ce_meta(ring);
4716 gfx_v9_0_ring_emit_tmz(ring, true);
4718 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4719 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4720 /* set load_global_config & load_global_uconfig */
4722 /* set load_cs_sh_regs */
4724 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4727 /* set load_ce_ram if preamble presented */
4728 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4731 /* still load_ce_ram if this is the first time preamble presented
4732 * although there is no context switch happens.
4734 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4738 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4739 amdgpu_ring_write(ring, dw2);
4740 amdgpu_ring_write(ring, 0);
4743 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4746 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4747 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4748 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4749 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4750 ret = ring->wptr & ring->buf_mask;
4751 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4755 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4758 BUG_ON(offset > ring->buf_mask);
4759 BUG_ON(ring->ring[offset] != 0x55aa55aa);
4761 cur = (ring->wptr & ring->buf_mask) - 1;
4762 if (likely(cur > offset))
4763 ring->ring[offset] = cur - offset;
4765 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4768 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4770 struct amdgpu_device *adev = ring->adev;
4772 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4773 amdgpu_ring_write(ring, 0 | /* src: register*/
4774 (5 << 8) | /* dst: memory */
4775 (1 << 20)); /* write confirm */
4776 amdgpu_ring_write(ring, reg);
4777 amdgpu_ring_write(ring, 0);
4778 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4779 adev->virt.reg_val_offs * 4));
4780 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4781 adev->virt.reg_val_offs * 4));
4784 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4789 switch (ring->funcs->type) {
4790 case AMDGPU_RING_TYPE_GFX:
4791 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4793 case AMDGPU_RING_TYPE_KIQ:
4794 cmd = (1 << 16); /* no inc addr */
4800 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4801 amdgpu_ring_write(ring, cmd);
4802 amdgpu_ring_write(ring, reg);
4803 amdgpu_ring_write(ring, 0);
4804 amdgpu_ring_write(ring, val);
4807 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4808 uint32_t val, uint32_t mask)
4810 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4813 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4814 uint32_t reg0, uint32_t reg1,
4815 uint32_t ref, uint32_t mask)
4817 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4818 struct amdgpu_device *adev = ring->adev;
4819 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4820 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4823 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4826 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4830 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4832 struct amdgpu_device *adev = ring->adev;
4835 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4836 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4837 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4838 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4839 WREG32(mmSQ_CMD, value);
4842 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4843 enum amdgpu_interrupt_state state)
4846 case AMDGPU_IRQ_STATE_DISABLE:
4847 case AMDGPU_IRQ_STATE_ENABLE:
4848 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4849 TIME_STAMP_INT_ENABLE,
4850 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4857 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4859 enum amdgpu_interrupt_state state)
4861 u32 mec_int_cntl, mec_int_cntl_reg;
4864 * amdgpu controls only the first MEC. That's why this function only
4865 * handles the setting of interrupts for this specific MEC. All other
4866 * pipes' interrupts are set by amdkfd.
4872 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4875 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4878 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4881 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4884 DRM_DEBUG("invalid pipe %d\n", pipe);
4888 DRM_DEBUG("invalid me %d\n", me);
4893 case AMDGPU_IRQ_STATE_DISABLE:
4894 mec_int_cntl = RREG32(mec_int_cntl_reg);
4895 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4896 TIME_STAMP_INT_ENABLE, 0);
4897 WREG32(mec_int_cntl_reg, mec_int_cntl);
4899 case AMDGPU_IRQ_STATE_ENABLE:
4900 mec_int_cntl = RREG32(mec_int_cntl_reg);
4901 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4902 TIME_STAMP_INT_ENABLE, 1);
4903 WREG32(mec_int_cntl_reg, mec_int_cntl);
4910 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4911 struct amdgpu_irq_src *source,
4913 enum amdgpu_interrupt_state state)
4916 case AMDGPU_IRQ_STATE_DISABLE:
4917 case AMDGPU_IRQ_STATE_ENABLE:
4918 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4919 PRIV_REG_INT_ENABLE,
4920 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4929 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4930 struct amdgpu_irq_src *source,
4932 enum amdgpu_interrupt_state state)
4935 case AMDGPU_IRQ_STATE_DISABLE:
4936 case AMDGPU_IRQ_STATE_ENABLE:
4937 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4938 PRIV_INSTR_INT_ENABLE,
4939 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4947 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
4948 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4949 CP_ECC_ERROR_INT_ENABLE, 1)
4951 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
4952 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4953 CP_ECC_ERROR_INT_ENABLE, 0)
4955 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4956 struct amdgpu_irq_src *source,
4958 enum amdgpu_interrupt_state state)
4961 case AMDGPU_IRQ_STATE_DISABLE:
4962 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4963 CP_ECC_ERROR_INT_ENABLE, 0);
4964 DISABLE_ECC_ON_ME_PIPE(1, 0);
4965 DISABLE_ECC_ON_ME_PIPE(1, 1);
4966 DISABLE_ECC_ON_ME_PIPE(1, 2);
4967 DISABLE_ECC_ON_ME_PIPE(1, 3);
4970 case AMDGPU_IRQ_STATE_ENABLE:
4971 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4972 CP_ECC_ERROR_INT_ENABLE, 1);
4973 ENABLE_ECC_ON_ME_PIPE(1, 0);
4974 ENABLE_ECC_ON_ME_PIPE(1, 1);
4975 ENABLE_ECC_ON_ME_PIPE(1, 2);
4976 ENABLE_ECC_ON_ME_PIPE(1, 3);
4986 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4987 struct amdgpu_irq_src *src,
4989 enum amdgpu_interrupt_state state)
4992 case AMDGPU_CP_IRQ_GFX_EOP:
4993 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
4995 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4996 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4998 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4999 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5001 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5002 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5004 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5005 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5007 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5008 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5010 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5011 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5013 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5014 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5016 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5017 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5025 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5026 struct amdgpu_irq_src *source,
5027 struct amdgpu_iv_entry *entry)
5030 u8 me_id, pipe_id, queue_id;
5031 struct amdgpu_ring *ring;
5033 DRM_DEBUG("IH: CP EOP\n");
5034 me_id = (entry->ring_id & 0x0c) >> 2;
5035 pipe_id = (entry->ring_id & 0x03) >> 0;
5036 queue_id = (entry->ring_id & 0x70) >> 4;
5040 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5044 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5045 ring = &adev->gfx.compute_ring[i];
5046 /* Per-queue interrupt is supported for MEC starting from VI.
5047 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5049 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5050 amdgpu_fence_process(ring);
5057 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5058 struct amdgpu_iv_entry *entry)
5060 u8 me_id, pipe_id, queue_id;
5061 struct amdgpu_ring *ring;
5064 me_id = (entry->ring_id & 0x0c) >> 2;
5065 pipe_id = (entry->ring_id & 0x03) >> 0;
5066 queue_id = (entry->ring_id & 0x70) >> 4;
5070 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5074 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5075 ring = &adev->gfx.compute_ring[i];
5076 if (ring->me == me_id && ring->pipe == pipe_id &&
5077 ring->queue == queue_id)
5078 drm_sched_fault(&ring->sched);
5084 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5085 struct amdgpu_irq_src *source,
5086 struct amdgpu_iv_entry *entry)
5088 DRM_ERROR("Illegal register access in command stream\n");
5089 gfx_v9_0_fault(adev, entry);
5093 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5094 struct amdgpu_irq_src *source,
5095 struct amdgpu_iv_entry *entry)
5097 DRM_ERROR("Illegal instruction in command stream\n");
5098 gfx_v9_0_fault(adev, entry);
5102 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5103 struct amdgpu_iv_entry *entry)
5105 /* TODO ue will trigger an interrupt. */
5106 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5107 amdgpu_ras_reset_gpu(adev, 0);
5108 return AMDGPU_RAS_UE;
5111 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5112 struct amdgpu_irq_src *source,
5113 struct amdgpu_iv_entry *entry)
5115 struct ras_common_if *ras_if = adev->gfx.ras_if;
5116 struct ras_dispatch_if ih_data = {
5123 ih_data.head = *ras_if;
5125 DRM_ERROR("CP ECC ERROR IRQ\n");
5126 amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5130 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5132 .early_init = gfx_v9_0_early_init,
5133 .late_init = gfx_v9_0_late_init,
5134 .sw_init = gfx_v9_0_sw_init,
5135 .sw_fini = gfx_v9_0_sw_fini,
5136 .hw_init = gfx_v9_0_hw_init,
5137 .hw_fini = gfx_v9_0_hw_fini,
5138 .suspend = gfx_v9_0_suspend,
5139 .resume = gfx_v9_0_resume,
5140 .is_idle = gfx_v9_0_is_idle,
5141 .wait_for_idle = gfx_v9_0_wait_for_idle,
5142 .soft_reset = gfx_v9_0_soft_reset,
5143 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5144 .set_powergating_state = gfx_v9_0_set_powergating_state,
5145 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
5148 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5149 .type = AMDGPU_RING_TYPE_GFX,
5151 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5152 .support_64bit_ptrs = true,
5153 .vmhub = AMDGPU_GFXHUB,
5154 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5155 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5156 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5157 .emit_frame_size = /* totally 242 maximum if 16 IBs */
5159 7 + /* PIPELINE_SYNC */
5160 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5161 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5163 8 + /* FENCE for VM_FLUSH */
5164 20 + /* GDS switch */
5165 4 + /* double SWITCH_BUFFER,
5166 the first COND_EXEC jump to the place just
5167 prior to this double SWITCH_BUFFER */
5175 8 + 8 + /* FENCE x2 */
5176 2, /* SWITCH_BUFFER */
5177 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5178 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5179 .emit_fence = gfx_v9_0_ring_emit_fence,
5180 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5181 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5182 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5183 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5184 .test_ring = gfx_v9_0_ring_test_ring,
5185 .test_ib = gfx_v9_0_ring_test_ib,
5186 .insert_nop = amdgpu_ring_insert_nop,
5187 .pad_ib = amdgpu_ring_generic_pad_ib,
5188 .emit_switch_buffer = gfx_v9_ring_emit_sb,
5189 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5190 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5191 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5192 .emit_tmz = gfx_v9_0_ring_emit_tmz,
5193 .emit_wreg = gfx_v9_0_ring_emit_wreg,
5194 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5195 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5196 .soft_recovery = gfx_v9_0_ring_soft_recovery,
5199 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5200 .type = AMDGPU_RING_TYPE_COMPUTE,
5202 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5203 .support_64bit_ptrs = true,
5204 .vmhub = AMDGPU_GFXHUB,
5205 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5206 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5207 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5209 20 + /* gfx_v9_0_ring_emit_gds_switch */
5210 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5211 5 + /* hdp invalidate */
5212 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5213 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5214 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5215 2 + /* gfx_v9_0_ring_emit_vm_flush */
5216 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5217 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5218 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5219 .emit_fence = gfx_v9_0_ring_emit_fence,
5220 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5221 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5222 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5223 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5224 .test_ring = gfx_v9_0_ring_test_ring,
5225 .test_ib = gfx_v9_0_ring_test_ib,
5226 .insert_nop = amdgpu_ring_insert_nop,
5227 .pad_ib = amdgpu_ring_generic_pad_ib,
5228 .set_priority = gfx_v9_0_ring_set_priority_compute,
5229 .emit_wreg = gfx_v9_0_ring_emit_wreg,
5230 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5231 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5234 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5235 .type = AMDGPU_RING_TYPE_KIQ,
5237 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5238 .support_64bit_ptrs = true,
5239 .vmhub = AMDGPU_GFXHUB,
5240 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5241 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5242 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5244 20 + /* gfx_v9_0_ring_emit_gds_switch */
5245 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5246 5 + /* hdp invalidate */
5247 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5248 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5249 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5250 2 + /* gfx_v9_0_ring_emit_vm_flush */
5251 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5252 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5253 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5254 .test_ring = gfx_v9_0_ring_test_ring,
5255 .insert_nop = amdgpu_ring_insert_nop,
5256 .pad_ib = amdgpu_ring_generic_pad_ib,
5257 .emit_rreg = gfx_v9_0_ring_emit_rreg,
5258 .emit_wreg = gfx_v9_0_ring_emit_wreg,
5259 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5260 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5263 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5267 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5269 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5270 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5272 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5273 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5276 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5277 .set = gfx_v9_0_set_eop_interrupt_state,
5278 .process = gfx_v9_0_eop_irq,
5281 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5282 .set = gfx_v9_0_set_priv_reg_fault_state,
5283 .process = gfx_v9_0_priv_reg_irq,
5286 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5287 .set = gfx_v9_0_set_priv_inst_fault_state,
5288 .process = gfx_v9_0_priv_inst_irq,
5291 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5292 .set = gfx_v9_0_set_cp_ecc_error_state,
5293 .process = gfx_v9_0_cp_ecc_error_irq,
5297 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5299 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5300 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5302 adev->gfx.priv_reg_irq.num_types = 1;
5303 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5305 adev->gfx.priv_inst_irq.num_types = 1;
5306 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5308 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5309 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5312 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5314 switch (adev->asic_type) {
5319 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5326 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5328 /* init asci gds info */
5329 switch (adev->asic_type) {
5333 adev->gds.gds_size = 0x10000;
5336 adev->gds.gds_size = 0x1000;
5339 adev->gds.gds_size = 0x10000;
5343 switch (adev->asic_type) {
5346 adev->gds.gds_compute_max_wave_id = 0x7ff;
5349 adev->gds.gds_compute_max_wave_id = 0x27f;
5352 if (adev->rev_id >= 0x8)
5353 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5355 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5358 /* this really depends on the chip */
5359 adev->gds.gds_compute_max_wave_id = 0x7ff;
5363 adev->gds.gws_size = 64;
5364 adev->gds.oa_size = 16;
5367 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5375 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5376 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5378 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5381 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5385 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5386 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5388 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5389 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5391 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5393 return (~data) & mask;
5396 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5397 struct amdgpu_cu_info *cu_info)
5399 int i, j, k, counter, active_cu_number = 0;
5400 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5401 unsigned disable_masks[4 * 2];
5403 if (!adev || !cu_info)
5406 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5408 mutex_lock(&adev->grbm_idx_mutex);
5409 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5410 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5414 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5416 gfx_v9_0_set_user_cu_inactive_bitmap(
5417 adev, disable_masks[i * 2 + j]);
5418 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5419 cu_info->bitmap[i][j] = bitmap;
5421 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5422 if (bitmap & mask) {
5423 if (counter < adev->gfx.config.max_cu_per_sh)
5429 active_cu_number += counter;
5431 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5432 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5435 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5436 mutex_unlock(&adev->grbm_idx_mutex);
5438 cu_info->number = active_cu_number;
5439 cu_info->ao_cu_mask = ao_cu_mask;
5440 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5445 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5447 .type = AMD_IP_BLOCK_TYPE_GFX,
5451 .funcs = &gfx_v9_0_ip_funcs,