2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
29 #include "vi_structs.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #include "ivsrcid/ivsrcid_vislands30.h"
56 #define GFX8_NUM_GFX_RINGS 1
57 #define GFX8_MEC_HPD_SIZE 2048
59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
64 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
82 #define SET_BPM_SERDES_CMD 1
83 #define CLE_BPM_SERDES_CMD 0
85 /* BPM Register Address*/
87 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
88 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
89 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
90 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
91 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
95 #define RLC_FormatDirectRegListLength 14
97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
175 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
176 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
177 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
178 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
179 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
180 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
181 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
182 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
183 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
184 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
185 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
186 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
187 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
188 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
189 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
190 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
193 static const u32 golden_settings_tonga_a11[] =
195 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
196 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
197 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
198 mmGB_GPU_ID, 0x0000000f, 0x00000000,
199 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
200 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
201 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
202 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
203 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
204 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
205 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
206 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
207 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
208 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
209 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
210 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
213 static const u32 tonga_golden_common_all[] =
215 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
216 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
217 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
218 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
219 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
220 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
221 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
222 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
225 static const u32 tonga_mgcg_cgcg_init[] =
227 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
228 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
229 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
232 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
234 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
236 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
237 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
238 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
239 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
240 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
243 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
244 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
245 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
246 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
247 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
248 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
249 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
250 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
251 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
252 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
253 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
254 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
255 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
256 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
257 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
258 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
261 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
266 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
271 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
274 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
275 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
276 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
277 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
278 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
279 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
280 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
281 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
282 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
283 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
284 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
285 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
286 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
287 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
288 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
289 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
290 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
291 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
292 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
293 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
294 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
295 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
296 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
297 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
298 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
299 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
300 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
301 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
304 static const u32 golden_settings_vegam_a11[] =
306 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
307 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
308 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
309 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
310 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
311 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
312 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
313 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
314 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
315 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
316 mmSQ_CONFIG, 0x07f80000, 0x01180000,
317 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
318 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
319 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
320 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
321 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
322 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
325 static const u32 vegam_golden_common_all[] =
327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
335 static const u32 golden_settings_polaris11_a11[] =
337 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
338 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
339 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
340 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
341 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
342 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
343 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
344 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
345 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
347 mmSQ_CONFIG, 0x07f80000, 0x01180000,
348 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
349 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
350 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
351 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
352 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
353 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
356 static const u32 polaris11_golden_common_all[] =
358 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
359 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
360 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
361 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
362 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
363 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
366 static const u32 golden_settings_polaris10_a11[] =
368 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
369 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
370 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
371 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
372 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
373 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
374 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
375 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
376 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
377 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
378 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
379 mmSQ_CONFIG, 0x07f80000, 0x07180000,
380 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
381 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
382 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
383 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
384 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
387 static const u32 polaris10_golden_common_all[] =
389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
391 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
392 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
393 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
394 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
395 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
396 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
399 static const u32 fiji_golden_common_all[] =
401 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
402 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
403 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
404 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
405 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
406 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
407 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
408 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
409 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
410 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
413 static const u32 golden_settings_fiji_a10[] =
415 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
416 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
420 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
421 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
422 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
423 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
424 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
425 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
428 static const u32 fiji_mgcg_cgcg_init[] =
430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
462 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
463 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
464 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
467 static const u32 golden_settings_iceland_a11[] =
469 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
470 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
471 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
472 mmGB_GPU_ID, 0x0000000f, 0x00000000,
473 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
474 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
475 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
476 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
477 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
478 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
479 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
480 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
481 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
482 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
483 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
484 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
487 static const u32 iceland_golden_common_all[] =
489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
491 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
492 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
493 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
494 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
495 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
496 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
499 static const u32 iceland_mgcg_cgcg_init[] =
501 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
502 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
503 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
504 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
505 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
506 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
507 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
508 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
509 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
510 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
511 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
512 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
514 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
516 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
517 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
518 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
519 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
520 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
521 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
522 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
523 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
524 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
525 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
526 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
527 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
528 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
530 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
531 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
532 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
533 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
534 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
535 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
536 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
537 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
538 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
539 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
540 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
541 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
542 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
543 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
544 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
545 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
546 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
547 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
548 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
549 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
550 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
551 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
552 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
553 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
554 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
555 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
556 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
557 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
560 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
563 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
564 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
567 static const u32 cz_golden_settings_a11[] =
569 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
570 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
571 mmGB_GPU_ID, 0x0000000f, 0x00000000,
572 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
573 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
574 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
575 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
576 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
577 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
578 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
579 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
580 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
583 static const u32 cz_golden_common_all[] =
585 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
586 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
587 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
588 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
589 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
590 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
591 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
592 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
595 static const u32 cz_mgcg_cgcg_init[] =
597 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
598 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
599 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
600 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
601 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
602 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
603 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
604 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
606 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
608 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
609 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
610 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
612 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
613 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
614 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
615 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
616 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
617 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
618 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
619 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
620 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
621 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
622 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
623 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
624 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
625 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
626 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
627 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
628 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
629 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
630 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
631 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
632 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
633 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
634 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
635 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
636 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
637 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
638 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
639 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
640 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
641 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
642 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
643 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
644 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
645 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
646 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
647 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
648 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
649 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
650 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
651 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
652 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
653 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
654 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
655 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
656 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
657 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
658 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
659 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
660 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
661 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
662 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
663 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
664 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
665 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
666 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
667 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
668 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
669 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
670 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
671 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
674 static const u32 stoney_golden_settings_a11[] =
676 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
677 mmGB_GPU_ID, 0x0000000f, 0x00000000,
678 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
679 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
680 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
681 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
682 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
683 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
684 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
685 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
688 static const u32 stoney_golden_common_all[] =
690 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
691 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
692 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
693 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
694 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
695 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
696 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
697 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
700 static const u32 stoney_mgcg_cgcg_init[] =
702 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
703 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
704 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
705 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
706 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 static const char * const sq_edc_source_names[] = {
711 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
712 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
713 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
714 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
715 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
716 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
717 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
731 switch (adev->asic_type) {
733 amdgpu_device_program_register_sequence(adev,
734 iceland_mgcg_cgcg_init,
735 ARRAY_SIZE(iceland_mgcg_cgcg_init));
736 amdgpu_device_program_register_sequence(adev,
737 golden_settings_iceland_a11,
738 ARRAY_SIZE(golden_settings_iceland_a11));
739 amdgpu_device_program_register_sequence(adev,
740 iceland_golden_common_all,
741 ARRAY_SIZE(iceland_golden_common_all));
744 amdgpu_device_program_register_sequence(adev,
746 ARRAY_SIZE(fiji_mgcg_cgcg_init));
747 amdgpu_device_program_register_sequence(adev,
748 golden_settings_fiji_a10,
749 ARRAY_SIZE(golden_settings_fiji_a10));
750 amdgpu_device_program_register_sequence(adev,
751 fiji_golden_common_all,
752 ARRAY_SIZE(fiji_golden_common_all));
756 amdgpu_device_program_register_sequence(adev,
757 tonga_mgcg_cgcg_init,
758 ARRAY_SIZE(tonga_mgcg_cgcg_init));
759 amdgpu_device_program_register_sequence(adev,
760 golden_settings_tonga_a11,
761 ARRAY_SIZE(golden_settings_tonga_a11));
762 amdgpu_device_program_register_sequence(adev,
763 tonga_golden_common_all,
764 ARRAY_SIZE(tonga_golden_common_all));
767 amdgpu_device_program_register_sequence(adev,
768 golden_settings_vegam_a11,
769 ARRAY_SIZE(golden_settings_vegam_a11));
770 amdgpu_device_program_register_sequence(adev,
771 vegam_golden_common_all,
772 ARRAY_SIZE(vegam_golden_common_all));
776 amdgpu_device_program_register_sequence(adev,
777 golden_settings_polaris11_a11,
778 ARRAY_SIZE(golden_settings_polaris11_a11));
779 amdgpu_device_program_register_sequence(adev,
780 polaris11_golden_common_all,
781 ARRAY_SIZE(polaris11_golden_common_all));
784 amdgpu_device_program_register_sequence(adev,
785 golden_settings_polaris10_a11,
786 ARRAY_SIZE(golden_settings_polaris10_a11));
787 amdgpu_device_program_register_sequence(adev,
788 polaris10_golden_common_all,
789 ARRAY_SIZE(polaris10_golden_common_all));
790 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
791 if (adev->pdev->revision == 0xc7 &&
792 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
793 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
794 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
795 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
796 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
800 amdgpu_device_program_register_sequence(adev,
802 ARRAY_SIZE(cz_mgcg_cgcg_init));
803 amdgpu_device_program_register_sequence(adev,
804 cz_golden_settings_a11,
805 ARRAY_SIZE(cz_golden_settings_a11));
806 amdgpu_device_program_register_sequence(adev,
807 cz_golden_common_all,
808 ARRAY_SIZE(cz_golden_common_all));
811 amdgpu_device_program_register_sequence(adev,
812 stoney_mgcg_cgcg_init,
813 ARRAY_SIZE(stoney_mgcg_cgcg_init));
814 amdgpu_device_program_register_sequence(adev,
815 stoney_golden_settings_a11,
816 ARRAY_SIZE(stoney_golden_settings_a11));
817 amdgpu_device_program_register_sequence(adev,
818 stoney_golden_common_all,
819 ARRAY_SIZE(stoney_golden_common_all));
826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
828 adev->gfx.scratch.num_reg = 8;
829 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
830 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
835 struct amdgpu_device *adev = ring->adev;
841 r = amdgpu_gfx_scratch_get(adev, &scratch);
843 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
846 WREG32(scratch, 0xCAFEDEAD);
847 r = amdgpu_ring_alloc(ring, 3);
849 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
851 amdgpu_gfx_scratch_free(adev, scratch);
854 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
855 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
856 amdgpu_ring_write(ring, 0xDEADBEEF);
857 amdgpu_ring_commit(ring);
859 for (i = 0; i < adev->usec_timeout; i++) {
860 tmp = RREG32(scratch);
861 if (tmp == 0xDEADBEEF)
865 if (i < adev->usec_timeout) {
866 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
869 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
870 ring->idx, scratch, tmp);
873 amdgpu_gfx_scratch_free(adev, scratch);
877 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
879 struct amdgpu_device *adev = ring->adev;
881 struct dma_fence *f = NULL;
888 r = amdgpu_device_wb_get(adev, &index);
890 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
894 gpu_addr = adev->wb.gpu_addr + (index * 4);
895 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
896 memset(&ib, 0, sizeof(ib));
897 r = amdgpu_ib_get(adev, NULL, 16, &ib);
899 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
902 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
903 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
904 ib.ptr[2] = lower_32_bits(gpu_addr);
905 ib.ptr[3] = upper_32_bits(gpu_addr);
906 ib.ptr[4] = 0xDEADBEEF;
909 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
913 r = dma_fence_wait_timeout(f, false, timeout);
915 DRM_ERROR("amdgpu: IB test timed out.\n");
919 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
923 tmp = adev->wb.wb[index];
924 if (tmp == 0xDEADBEEF) {
925 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
928 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
933 amdgpu_ib_free(adev, &ib, NULL);
936 amdgpu_device_wb_free(adev, index);
941 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
943 release_firmware(adev->gfx.pfp_fw);
944 adev->gfx.pfp_fw = NULL;
945 release_firmware(adev->gfx.me_fw);
946 adev->gfx.me_fw = NULL;
947 release_firmware(adev->gfx.ce_fw);
948 adev->gfx.ce_fw = NULL;
949 release_firmware(adev->gfx.rlc_fw);
950 adev->gfx.rlc_fw = NULL;
951 release_firmware(adev->gfx.mec_fw);
952 adev->gfx.mec_fw = NULL;
953 if ((adev->asic_type != CHIP_STONEY) &&
954 (adev->asic_type != CHIP_TOPAZ))
955 release_firmware(adev->gfx.mec2_fw);
956 adev->gfx.mec2_fw = NULL;
958 kfree(adev->gfx.rlc.register_list_format);
961 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
963 const char *chip_name;
966 struct amdgpu_firmware_info *info = NULL;
967 const struct common_firmware_header *header = NULL;
968 const struct gfx_firmware_header_v1_0 *cp_hdr;
969 const struct rlc_firmware_header_v2_0 *rlc_hdr;
970 unsigned int *tmp = NULL, i;
974 switch (adev->asic_type) {
982 chip_name = "carrizo";
988 chip_name = "stoney";
991 chip_name = "polaris10";
994 chip_name = "polaris11";
997 chip_name = "polaris12";
1000 chip_name = "vegam";
1006 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1008 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1009 if (err == -ENOENT) {
1010 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1011 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1014 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1015 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1019 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1023 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1026 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1027 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1028 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1029 if (err == -ENOENT) {
1030 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1031 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1034 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1035 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1039 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1042 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1043 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1045 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1047 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1048 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050 if (err == -ENOENT) {
1051 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1052 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1055 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1056 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1060 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1063 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1064 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1065 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1068 * Support for MCBP/Virtualization in combination with chained IBs is
1069 * formal released on feature version #46
1071 if (adev->gfx.ce_feature_version >= 46 &&
1072 adev->gfx.pfp_feature_version >= 46) {
1073 adev->virt.chained_ib_support = true;
1074 DRM_INFO("Chained IB support enabled!\n");
1076 adev->virt.chained_ib_support = false;
1078 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1079 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1082 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1083 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1084 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1085 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1087 adev->gfx.rlc.save_and_restore_offset =
1088 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1089 adev->gfx.rlc.clear_state_descriptor_offset =
1090 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1091 adev->gfx.rlc.avail_scratch_ram_locations =
1092 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1093 adev->gfx.rlc.reg_restore_list_size =
1094 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1095 adev->gfx.rlc.reg_list_format_start =
1096 le32_to_cpu(rlc_hdr->reg_list_format_start);
1097 adev->gfx.rlc.reg_list_format_separate_start =
1098 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1099 adev->gfx.rlc.starting_offsets_start =
1100 le32_to_cpu(rlc_hdr->starting_offsets_start);
1101 adev->gfx.rlc.reg_list_format_size_bytes =
1102 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1103 adev->gfx.rlc.reg_list_size_bytes =
1104 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1106 adev->gfx.rlc.register_list_format =
1107 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1108 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1110 if (!adev->gfx.rlc.register_list_format) {
1115 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1116 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1117 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1118 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1120 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1122 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1123 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1124 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1125 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1127 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1128 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1129 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1130 if (err == -ENOENT) {
1131 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1132 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1135 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1136 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1140 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1143 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1144 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1145 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1147 if ((adev->asic_type != CHIP_STONEY) &&
1148 (adev->asic_type != CHIP_TOPAZ)) {
1149 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1150 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1151 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1152 if (err == -ENOENT) {
1153 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1154 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1157 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1158 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1161 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1164 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1165 adev->gfx.mec2_fw->data;
1166 adev->gfx.mec2_fw_version =
1167 le32_to_cpu(cp_hdr->header.ucode_version);
1168 adev->gfx.mec2_feature_version =
1169 le32_to_cpu(cp_hdr->ucode_feature_version);
1172 adev->gfx.mec2_fw = NULL;
1176 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1177 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1178 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1179 info->fw = adev->gfx.pfp_fw;
1180 header = (const struct common_firmware_header *)info->fw->data;
1181 adev->firmware.fw_size +=
1182 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1185 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1186 info->fw = adev->gfx.me_fw;
1187 header = (const struct common_firmware_header *)info->fw->data;
1188 adev->firmware.fw_size +=
1189 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1192 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1193 info->fw = adev->gfx.ce_fw;
1194 header = (const struct common_firmware_header *)info->fw->data;
1195 adev->firmware.fw_size +=
1196 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1198 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1199 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1200 info->fw = adev->gfx.rlc_fw;
1201 header = (const struct common_firmware_header *)info->fw->data;
1202 adev->firmware.fw_size +=
1203 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1205 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1206 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1207 info->fw = adev->gfx.mec_fw;
1208 header = (const struct common_firmware_header *)info->fw->data;
1209 adev->firmware.fw_size +=
1210 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1212 /* we need account JT in */
1213 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1214 adev->firmware.fw_size +=
1215 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1217 if (amdgpu_sriov_vf(adev)) {
1218 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1219 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1220 info->fw = adev->gfx.mec_fw;
1221 adev->firmware.fw_size +=
1222 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1225 if (adev->gfx.mec2_fw) {
1226 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1227 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1228 info->fw = adev->gfx.mec2_fw;
1229 header = (const struct common_firmware_header *)info->fw->data;
1230 adev->firmware.fw_size +=
1231 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1239 "gfx8: Failed to load firmware \"%s\"\n",
1241 release_firmware(adev->gfx.pfp_fw);
1242 adev->gfx.pfp_fw = NULL;
1243 release_firmware(adev->gfx.me_fw);
1244 adev->gfx.me_fw = NULL;
1245 release_firmware(adev->gfx.ce_fw);
1246 adev->gfx.ce_fw = NULL;
1247 release_firmware(adev->gfx.rlc_fw);
1248 adev->gfx.rlc_fw = NULL;
1249 release_firmware(adev->gfx.mec_fw);
1250 adev->gfx.mec_fw = NULL;
1251 release_firmware(adev->gfx.mec2_fw);
1252 adev->gfx.mec2_fw = NULL;
1257 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1258 volatile u32 *buffer)
1261 const struct cs_section_def *sect = NULL;
1262 const struct cs_extent_def *ext = NULL;
1264 if (adev->gfx.rlc.cs_data == NULL)
1269 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1270 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1272 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1273 buffer[count++] = cpu_to_le32(0x80000000);
1274 buffer[count++] = cpu_to_le32(0x80000000);
1276 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1277 for (ext = sect->section; ext->extent != NULL; ++ext) {
1278 if (sect->id == SECT_CONTEXT) {
1280 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1281 buffer[count++] = cpu_to_le32(ext->reg_index -
1282 PACKET3_SET_CONTEXT_REG_START);
1283 for (i = 0; i < ext->reg_count; i++)
1284 buffer[count++] = cpu_to_le32(ext->extent[i]);
1291 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1292 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1293 PACKET3_SET_CONTEXT_REG_START);
1294 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1295 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1297 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1298 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1300 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1301 buffer[count++] = cpu_to_le32(0);
1304 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1306 const __le32 *fw_data;
1307 volatile u32 *dst_ptr;
1308 int me, i, max_me = 4;
1310 u32 table_offset, table_size;
1312 if (adev->asic_type == CHIP_CARRIZO)
1315 /* write the cp table buffer */
1316 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1317 for (me = 0; me < max_me; me++) {
1319 const struct gfx_firmware_header_v1_0 *hdr =
1320 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1321 fw_data = (const __le32 *)
1322 (adev->gfx.ce_fw->data +
1323 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1324 table_offset = le32_to_cpu(hdr->jt_offset);
1325 table_size = le32_to_cpu(hdr->jt_size);
1326 } else if (me == 1) {
1327 const struct gfx_firmware_header_v1_0 *hdr =
1328 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1329 fw_data = (const __le32 *)
1330 (adev->gfx.pfp_fw->data +
1331 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1332 table_offset = le32_to_cpu(hdr->jt_offset);
1333 table_size = le32_to_cpu(hdr->jt_size);
1334 } else if (me == 2) {
1335 const struct gfx_firmware_header_v1_0 *hdr =
1336 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1337 fw_data = (const __le32 *)
1338 (adev->gfx.me_fw->data +
1339 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1340 table_offset = le32_to_cpu(hdr->jt_offset);
1341 table_size = le32_to_cpu(hdr->jt_size);
1342 } else if (me == 3) {
1343 const struct gfx_firmware_header_v1_0 *hdr =
1344 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1345 fw_data = (const __le32 *)
1346 (adev->gfx.mec_fw->data +
1347 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1348 table_offset = le32_to_cpu(hdr->jt_offset);
1349 table_size = le32_to_cpu(hdr->jt_size);
1350 } else if (me == 4) {
1351 const struct gfx_firmware_header_v1_0 *hdr =
1352 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1353 fw_data = (const __le32 *)
1354 (adev->gfx.mec2_fw->data +
1355 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1356 table_offset = le32_to_cpu(hdr->jt_offset);
1357 table_size = le32_to_cpu(hdr->jt_size);
1360 for (i = 0; i < table_size; i ++) {
1361 dst_ptr[bo_offset + i] =
1362 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1365 bo_offset += table_size;
1369 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1371 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1372 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1375 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1377 volatile u32 *dst_ptr;
1379 const struct cs_section_def *cs_data;
1382 adev->gfx.rlc.cs_data = vi_cs_data;
1384 cs_data = adev->gfx.rlc.cs_data;
1387 /* clear state block */
1388 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1390 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1391 AMDGPU_GEM_DOMAIN_VRAM,
1392 &adev->gfx.rlc.clear_state_obj,
1393 &adev->gfx.rlc.clear_state_gpu_addr,
1394 (void **)&adev->gfx.rlc.cs_ptr);
1396 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1397 gfx_v8_0_rlc_fini(adev);
1401 /* set up the cs buffer */
1402 dst_ptr = adev->gfx.rlc.cs_ptr;
1403 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1404 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1405 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1408 if ((adev->asic_type == CHIP_CARRIZO) ||
1409 (adev->asic_type == CHIP_STONEY)) {
1410 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1411 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1412 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1413 &adev->gfx.rlc.cp_table_obj,
1414 &adev->gfx.rlc.cp_table_gpu_addr,
1415 (void **)&adev->gfx.rlc.cp_table_ptr);
1417 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1421 cz_init_cp_jump_table(adev);
1423 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1424 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1430 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1432 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1435 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1439 size_t mec_hpd_size;
1441 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1443 /* take ownership of the relevant compute queues */
1444 amdgpu_gfx_compute_queue_acquire(adev);
1446 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1448 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1449 AMDGPU_GEM_DOMAIN_GTT,
1450 &adev->gfx.mec.hpd_eop_obj,
1451 &adev->gfx.mec.hpd_eop_gpu_addr,
1454 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1458 memset(hpd, 0, mec_hpd_size);
1460 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1461 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1466 static const u32 vgpr_init_compute_shader[] =
1468 0x7e000209, 0x7e020208,
1469 0x7e040207, 0x7e060206,
1470 0x7e080205, 0x7e0a0204,
1471 0x7e0c0203, 0x7e0e0202,
1472 0x7e100201, 0x7e120200,
1473 0x7e140209, 0x7e160208,
1474 0x7e180207, 0x7e1a0206,
1475 0x7e1c0205, 0x7e1e0204,
1476 0x7e200203, 0x7e220202,
1477 0x7e240201, 0x7e260200,
1478 0x7e280209, 0x7e2a0208,
1479 0x7e2c0207, 0x7e2e0206,
1480 0x7e300205, 0x7e320204,
1481 0x7e340203, 0x7e360202,
1482 0x7e380201, 0x7e3a0200,
1483 0x7e3c0209, 0x7e3e0208,
1484 0x7e400207, 0x7e420206,
1485 0x7e440205, 0x7e460204,
1486 0x7e480203, 0x7e4a0202,
1487 0x7e4c0201, 0x7e4e0200,
1488 0x7e500209, 0x7e520208,
1489 0x7e540207, 0x7e560206,
1490 0x7e580205, 0x7e5a0204,
1491 0x7e5c0203, 0x7e5e0202,
1492 0x7e600201, 0x7e620200,
1493 0x7e640209, 0x7e660208,
1494 0x7e680207, 0x7e6a0206,
1495 0x7e6c0205, 0x7e6e0204,
1496 0x7e700203, 0x7e720202,
1497 0x7e740201, 0x7e760200,
1498 0x7e780209, 0x7e7a0208,
1499 0x7e7c0207, 0x7e7e0206,
1500 0xbf8a0000, 0xbf810000,
1503 static const u32 sgpr_init_compute_shader[] =
1505 0xbe8a0100, 0xbe8c0102,
1506 0xbe8e0104, 0xbe900106,
1507 0xbe920108, 0xbe940100,
1508 0xbe960102, 0xbe980104,
1509 0xbe9a0106, 0xbe9c0108,
1510 0xbe9e0100, 0xbea00102,
1511 0xbea20104, 0xbea40106,
1512 0xbea60108, 0xbea80100,
1513 0xbeaa0102, 0xbeac0104,
1514 0xbeae0106, 0xbeb00108,
1515 0xbeb20100, 0xbeb40102,
1516 0xbeb60104, 0xbeb80106,
1517 0xbeba0108, 0xbebc0100,
1518 0xbebe0102, 0xbec00104,
1519 0xbec20106, 0xbec40108,
1520 0xbec60100, 0xbec80102,
1521 0xbee60004, 0xbee70005,
1522 0xbeea0006, 0xbeeb0007,
1523 0xbee80008, 0xbee90009,
1524 0xbefc0000, 0xbf8a0000,
1525 0xbf810000, 0x00000000,
1528 static const u32 vgpr_init_regs[] =
1530 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1531 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1532 mmCOMPUTE_NUM_THREAD_X, 256*4,
1533 mmCOMPUTE_NUM_THREAD_Y, 1,
1534 mmCOMPUTE_NUM_THREAD_Z, 1,
1535 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1536 mmCOMPUTE_PGM_RSRC2, 20,
1537 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1538 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1539 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1540 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1541 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1542 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1543 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1544 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1545 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1546 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1549 static const u32 sgpr1_init_regs[] =
1551 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1552 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1553 mmCOMPUTE_NUM_THREAD_X, 256*5,
1554 mmCOMPUTE_NUM_THREAD_Y, 1,
1555 mmCOMPUTE_NUM_THREAD_Z, 1,
1556 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1557 mmCOMPUTE_PGM_RSRC2, 20,
1558 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1559 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1560 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1561 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1562 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1563 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1564 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1565 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1566 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1567 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1570 static const u32 sgpr2_init_regs[] =
1572 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1573 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1574 mmCOMPUTE_NUM_THREAD_X, 256*5,
1575 mmCOMPUTE_NUM_THREAD_Y, 1,
1576 mmCOMPUTE_NUM_THREAD_Z, 1,
1577 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1578 mmCOMPUTE_PGM_RSRC2, 20,
1579 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1580 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1581 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1582 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1583 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1584 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1585 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1586 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1587 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1588 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1591 static const u32 sec_ded_counter_registers[] =
1594 mmCPC_EDC_SCRATCH_CNT,
1595 mmCPC_EDC_UCODE_CNT,
1602 mmDC_EDC_CSINVOC_CNT,
1603 mmDC_EDC_RESTORE_CNT,
1609 mmSQC_ATC_EDC_GATCL1_CNT,
1615 mmTCP_ATC_EDC_GATCL1_CNT,
1620 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1622 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1623 struct amdgpu_ib ib;
1624 struct dma_fence *f = NULL;
1627 unsigned total_size, vgpr_offset, sgpr_offset;
1630 /* only supported on CZ */
1631 if (adev->asic_type != CHIP_CARRIZO)
1634 /* bail if the compute ring is not ready */
1638 tmp = RREG32(mmGB_EDC_MODE);
1639 WREG32(mmGB_EDC_MODE, 0);
1642 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1644 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1646 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1647 total_size = ALIGN(total_size, 256);
1648 vgpr_offset = total_size;
1649 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1650 sgpr_offset = total_size;
1651 total_size += sizeof(sgpr_init_compute_shader);
1653 /* allocate an indirect buffer to put the commands in */
1654 memset(&ib, 0, sizeof(ib));
1655 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1657 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1661 /* load the compute shaders */
1662 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1663 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1665 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1666 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1668 /* init the ib length to 0 */
1672 /* write the register state for the compute dispatch */
1673 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1675 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1676 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1678 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1679 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1681 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1682 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1683 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1685 /* write dispatch packet */
1686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1687 ib.ptr[ib.length_dw++] = 8; /* x */
1688 ib.ptr[ib.length_dw++] = 1; /* y */
1689 ib.ptr[ib.length_dw++] = 1; /* z */
1690 ib.ptr[ib.length_dw++] =
1691 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1693 /* write CS partial flush packet */
1694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1695 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1698 /* write the register state for the compute dispatch */
1699 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1701 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1702 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1704 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1705 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1707 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1708 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1709 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1711 /* write dispatch packet */
1712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1713 ib.ptr[ib.length_dw++] = 8; /* x */
1714 ib.ptr[ib.length_dw++] = 1; /* y */
1715 ib.ptr[ib.length_dw++] = 1; /* z */
1716 ib.ptr[ib.length_dw++] =
1717 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1719 /* write CS partial flush packet */
1720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1721 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1724 /* write the register state for the compute dispatch */
1725 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1727 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1728 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1730 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1731 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1732 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1733 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1734 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1735 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1737 /* write dispatch packet */
1738 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1739 ib.ptr[ib.length_dw++] = 8; /* x */
1740 ib.ptr[ib.length_dw++] = 1; /* y */
1741 ib.ptr[ib.length_dw++] = 1; /* z */
1742 ib.ptr[ib.length_dw++] =
1743 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1745 /* write CS partial flush packet */
1746 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1747 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1749 /* shedule the ib on the ring */
1750 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1752 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1756 /* wait for the GPU to finish processing the IB */
1757 r = dma_fence_wait(f, false);
1759 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1763 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1764 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1765 WREG32(mmGB_EDC_MODE, tmp);
1767 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1768 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1769 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1772 /* read back registers to clear the counters */
1773 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1774 RREG32(sec_ded_counter_registers[i]);
1777 amdgpu_ib_free(adev, &ib, NULL);
1783 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1786 u32 mc_shared_chmap, mc_arb_ramcfg;
1787 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1791 switch (adev->asic_type) {
1793 adev->gfx.config.max_shader_engines = 1;
1794 adev->gfx.config.max_tile_pipes = 2;
1795 adev->gfx.config.max_cu_per_sh = 6;
1796 adev->gfx.config.max_sh_per_se = 1;
1797 adev->gfx.config.max_backends_per_se = 2;
1798 adev->gfx.config.max_texture_channel_caches = 2;
1799 adev->gfx.config.max_gprs = 256;
1800 adev->gfx.config.max_gs_threads = 32;
1801 adev->gfx.config.max_hw_contexts = 8;
1803 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1804 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1805 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1806 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1807 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1810 adev->gfx.config.max_shader_engines = 4;
1811 adev->gfx.config.max_tile_pipes = 16;
1812 adev->gfx.config.max_cu_per_sh = 16;
1813 adev->gfx.config.max_sh_per_se = 1;
1814 adev->gfx.config.max_backends_per_se = 4;
1815 adev->gfx.config.max_texture_channel_caches = 16;
1816 adev->gfx.config.max_gprs = 256;
1817 adev->gfx.config.max_gs_threads = 32;
1818 adev->gfx.config.max_hw_contexts = 8;
1820 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1821 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1822 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1823 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1824 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1826 case CHIP_POLARIS11:
1827 case CHIP_POLARIS12:
1828 ret = amdgpu_atombios_get_gfx_info(adev);
1831 adev->gfx.config.max_gprs = 256;
1832 adev->gfx.config.max_gs_threads = 32;
1833 adev->gfx.config.max_hw_contexts = 8;
1835 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1836 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1837 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1838 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1839 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1841 case CHIP_POLARIS10:
1843 ret = amdgpu_atombios_get_gfx_info(adev);
1846 adev->gfx.config.max_gprs = 256;
1847 adev->gfx.config.max_gs_threads = 32;
1848 adev->gfx.config.max_hw_contexts = 8;
1850 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1851 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1852 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1853 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1854 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1857 adev->gfx.config.max_shader_engines = 4;
1858 adev->gfx.config.max_tile_pipes = 8;
1859 adev->gfx.config.max_cu_per_sh = 8;
1860 adev->gfx.config.max_sh_per_se = 1;
1861 adev->gfx.config.max_backends_per_se = 2;
1862 adev->gfx.config.max_texture_channel_caches = 8;
1863 adev->gfx.config.max_gprs = 256;
1864 adev->gfx.config.max_gs_threads = 32;
1865 adev->gfx.config.max_hw_contexts = 8;
1867 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1868 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1869 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1870 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1871 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1874 adev->gfx.config.max_shader_engines = 1;
1875 adev->gfx.config.max_tile_pipes = 2;
1876 adev->gfx.config.max_sh_per_se = 1;
1877 adev->gfx.config.max_backends_per_se = 2;
1878 adev->gfx.config.max_cu_per_sh = 8;
1879 adev->gfx.config.max_texture_channel_caches = 2;
1880 adev->gfx.config.max_gprs = 256;
1881 adev->gfx.config.max_gs_threads = 32;
1882 adev->gfx.config.max_hw_contexts = 8;
1884 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1885 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1886 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1887 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1888 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1891 adev->gfx.config.max_shader_engines = 1;
1892 adev->gfx.config.max_tile_pipes = 2;
1893 adev->gfx.config.max_sh_per_se = 1;
1894 adev->gfx.config.max_backends_per_se = 1;
1895 adev->gfx.config.max_cu_per_sh = 3;
1896 adev->gfx.config.max_texture_channel_caches = 2;
1897 adev->gfx.config.max_gprs = 256;
1898 adev->gfx.config.max_gs_threads = 16;
1899 adev->gfx.config.max_hw_contexts = 8;
1901 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1902 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1903 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1904 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1905 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1908 adev->gfx.config.max_shader_engines = 2;
1909 adev->gfx.config.max_tile_pipes = 4;
1910 adev->gfx.config.max_cu_per_sh = 2;
1911 adev->gfx.config.max_sh_per_se = 1;
1912 adev->gfx.config.max_backends_per_se = 2;
1913 adev->gfx.config.max_texture_channel_caches = 4;
1914 adev->gfx.config.max_gprs = 256;
1915 adev->gfx.config.max_gs_threads = 32;
1916 adev->gfx.config.max_hw_contexts = 8;
1918 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1919 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1920 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1921 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1922 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1926 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1927 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1928 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1930 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1931 adev->gfx.config.mem_max_burst_length_bytes = 256;
1932 if (adev->flags & AMD_IS_APU) {
1933 /* Get memory bank mapping mode. */
1934 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1935 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1936 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1938 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1939 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1940 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1942 /* Validate settings in case only one DIMM installed. */
1943 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1944 dimm00_addr_map = 0;
1945 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1946 dimm01_addr_map = 0;
1947 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1948 dimm10_addr_map = 0;
1949 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1950 dimm11_addr_map = 0;
1952 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1953 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1954 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1955 adev->gfx.config.mem_row_size_in_kb = 2;
1957 adev->gfx.config.mem_row_size_in_kb = 1;
1959 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1960 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1961 if (adev->gfx.config.mem_row_size_in_kb > 4)
1962 adev->gfx.config.mem_row_size_in_kb = 4;
1965 adev->gfx.config.shader_engine_tile_size = 32;
1966 adev->gfx.config.num_gpus = 1;
1967 adev->gfx.config.multi_gpu_tile_size = 64;
1969 /* fix up row size */
1970 switch (adev->gfx.config.mem_row_size_in_kb) {
1973 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1976 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1979 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1982 adev->gfx.config.gb_addr_config = gb_addr_config;
1987 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1988 int mec, int pipe, int queue)
1992 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1994 ring = &adev->gfx.compute_ring[ring_id];
1999 ring->queue = queue;
2001 ring->ring_obj = NULL;
2002 ring->use_doorbell = true;
2003 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2004 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2005 + (ring_id * GFX8_MEC_HPD_SIZE);
2006 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2008 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2009 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2012 /* type-2 packets are deprecated on MEC, use type-3 instead */
2013 r = amdgpu_ring_init(adev, ring, 1024,
2014 &adev->gfx.eop_irq, irq_type);
2022 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2024 static int gfx_v8_0_sw_init(void *handle)
2026 int i, j, k, r, ring_id;
2027 struct amdgpu_ring *ring;
2028 struct amdgpu_kiq *kiq;
2029 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2031 switch (adev->asic_type) {
2035 case CHIP_POLARIS10:
2036 case CHIP_POLARIS11:
2037 case CHIP_POLARIS12:
2039 adev->gfx.mec.num_mec = 2;
2044 adev->gfx.mec.num_mec = 1;
2048 adev->gfx.mec.num_pipe_per_mec = 4;
2049 adev->gfx.mec.num_queue_per_pipe = 8;
2052 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq);
2057 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
2061 /* Privileged reg */
2062 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
2063 &adev->gfx.priv_reg_irq);
2067 /* Privileged inst */
2068 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
2069 &adev->gfx.priv_inst_irq);
2073 /* Add CP EDC/ECC irq */
2074 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
2075 &adev->gfx.cp_ecc_error_irq);
2079 /* SQ interrupts. */
2080 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2083 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2087 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2089 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2091 gfx_v8_0_scratch_init(adev);
2093 r = gfx_v8_0_init_microcode(adev);
2095 DRM_ERROR("Failed to load gfx firmware!\n");
2099 r = gfx_v8_0_rlc_init(adev);
2101 DRM_ERROR("Failed to init rlc BOs!\n");
2105 r = gfx_v8_0_mec_init(adev);
2107 DRM_ERROR("Failed to init MEC BOs!\n");
2111 /* set up the gfx ring */
2112 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2113 ring = &adev->gfx.gfx_ring[i];
2114 ring->ring_obj = NULL;
2115 sprintf(ring->name, "gfx");
2116 /* no gfx doorbells on iceland */
2117 if (adev->asic_type != CHIP_TOPAZ) {
2118 ring->use_doorbell = true;
2119 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2122 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2123 AMDGPU_CP_IRQ_GFX_EOP);
2129 /* set up the compute queues - allocate horizontally across pipes */
2131 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2132 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2133 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2134 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2137 r = gfx_v8_0_compute_ring_init(adev,
2148 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2150 DRM_ERROR("Failed to init KIQ BOs!\n");
2154 kiq = &adev->gfx.kiq;
2155 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2159 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2160 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2164 adev->gfx.ce_ram_size = 0x8000;
2166 r = gfx_v8_0_gpu_early_init(adev);
2173 static int gfx_v8_0_sw_fini(void *handle)
2176 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2178 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2179 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2180 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2182 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2183 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2184 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2185 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2187 amdgpu_gfx_compute_mqd_sw_fini(adev);
2188 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2189 amdgpu_gfx_kiq_fini(adev);
2191 gfx_v8_0_mec_fini(adev);
2192 gfx_v8_0_rlc_fini(adev);
2193 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2194 &adev->gfx.rlc.clear_state_gpu_addr,
2195 (void **)&adev->gfx.rlc.cs_ptr);
2196 if ((adev->asic_type == CHIP_CARRIZO) ||
2197 (adev->asic_type == CHIP_STONEY)) {
2198 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2199 &adev->gfx.rlc.cp_table_gpu_addr,
2200 (void **)&adev->gfx.rlc.cp_table_ptr);
2202 gfx_v8_0_free_microcode(adev);
2207 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2209 uint32_t *modearray, *mod2array;
2210 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2211 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2214 modearray = adev->gfx.config.tile_mode_array;
2215 mod2array = adev->gfx.config.macrotile_mode_array;
2217 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2218 modearray[reg_offset] = 0;
2220 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2221 mod2array[reg_offset] = 0;
2223 switch (adev->asic_type) {
2225 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2226 PIPE_CONFIG(ADDR_SURF_P2) |
2227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2229 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230 PIPE_CONFIG(ADDR_SURF_P2) |
2231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2233 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2234 PIPE_CONFIG(ADDR_SURF_P2) |
2235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2237 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2238 PIPE_CONFIG(ADDR_SURF_P2) |
2239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2241 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242 PIPE_CONFIG(ADDR_SURF_P2) |
2243 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2245 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2246 PIPE_CONFIG(ADDR_SURF_P2) |
2247 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2249 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2250 PIPE_CONFIG(ADDR_SURF_P2) |
2251 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2253 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2254 PIPE_CONFIG(ADDR_SURF_P2));
2255 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2256 PIPE_CONFIG(ADDR_SURF_P2) |
2257 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2259 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260 PIPE_CONFIG(ADDR_SURF_P2) |
2261 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2263 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2264 PIPE_CONFIG(ADDR_SURF_P2) |
2265 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2267 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2268 PIPE_CONFIG(ADDR_SURF_P2) |
2269 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2271 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272 PIPE_CONFIG(ADDR_SURF_P2) |
2273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2275 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2276 PIPE_CONFIG(ADDR_SURF_P2) |
2277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2279 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2280 PIPE_CONFIG(ADDR_SURF_P2) |
2281 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2283 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2284 PIPE_CONFIG(ADDR_SURF_P2) |
2285 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2287 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2288 PIPE_CONFIG(ADDR_SURF_P2) |
2289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2291 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2292 PIPE_CONFIG(ADDR_SURF_P2) |
2293 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2295 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2296 PIPE_CONFIG(ADDR_SURF_P2) |
2297 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2299 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2300 PIPE_CONFIG(ADDR_SURF_P2) |
2301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2303 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2304 PIPE_CONFIG(ADDR_SURF_P2) |
2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2307 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2308 PIPE_CONFIG(ADDR_SURF_P2) |
2309 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2311 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2312 PIPE_CONFIG(ADDR_SURF_P2) |
2313 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2315 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2316 PIPE_CONFIG(ADDR_SURF_P2) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2319 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320 PIPE_CONFIG(ADDR_SURF_P2) |
2321 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2323 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2324 PIPE_CONFIG(ADDR_SURF_P2) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2328 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2331 NUM_BANKS(ADDR_SURF_8_BANK));
2332 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2335 NUM_BANKS(ADDR_SURF_8_BANK));
2336 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2339 NUM_BANKS(ADDR_SURF_8_BANK));
2340 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2343 NUM_BANKS(ADDR_SURF_8_BANK));
2344 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2347 NUM_BANKS(ADDR_SURF_8_BANK));
2348 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2351 NUM_BANKS(ADDR_SURF_8_BANK));
2352 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2355 NUM_BANKS(ADDR_SURF_8_BANK));
2356 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2359 NUM_BANKS(ADDR_SURF_16_BANK));
2360 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2363 NUM_BANKS(ADDR_SURF_16_BANK));
2364 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2367 NUM_BANKS(ADDR_SURF_16_BANK));
2368 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2371 NUM_BANKS(ADDR_SURF_16_BANK));
2372 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2375 NUM_BANKS(ADDR_SURF_16_BANK));
2376 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2379 NUM_BANKS(ADDR_SURF_16_BANK));
2380 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2383 NUM_BANKS(ADDR_SURF_8_BANK));
2385 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2386 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2388 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2390 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2391 if (reg_offset != 7)
2392 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2397 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2405 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2414 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2425 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2427 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2429 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2430 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2431 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2432 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2439 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2443 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2444 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2447 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2448 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2456 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2457 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2461 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2463 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2464 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2467 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2468 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2469 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2472 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2476 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2480 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2481 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2484 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2485 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2488 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2489 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2492 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2493 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2496 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2497 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2499 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2500 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2501 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2503 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2504 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2505 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2509 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2510 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2511 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2512 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2513 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2514 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2515 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2516 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2518 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2520 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523 NUM_BANKS(ADDR_SURF_8_BANK));
2524 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2527 NUM_BANKS(ADDR_SURF_8_BANK));
2528 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2531 NUM_BANKS(ADDR_SURF_8_BANK));
2532 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2535 NUM_BANKS(ADDR_SURF_8_BANK));
2536 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539 NUM_BANKS(ADDR_SURF_8_BANK));
2540 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2543 NUM_BANKS(ADDR_SURF_8_BANK));
2544 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2547 NUM_BANKS(ADDR_SURF_8_BANK));
2548 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2551 NUM_BANKS(ADDR_SURF_8_BANK));
2552 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2555 NUM_BANKS(ADDR_SURF_8_BANK));
2556 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2559 NUM_BANKS(ADDR_SURF_8_BANK));
2560 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2562 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2563 NUM_BANKS(ADDR_SURF_8_BANK));
2564 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2566 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2567 NUM_BANKS(ADDR_SURF_8_BANK));
2568 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2570 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2571 NUM_BANKS(ADDR_SURF_8_BANK));
2572 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2573 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2574 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2575 NUM_BANKS(ADDR_SURF_4_BANK));
2577 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2578 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2580 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2581 if (reg_offset != 7)
2582 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2586 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2587 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2589 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2590 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2593 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2594 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2597 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2598 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2599 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2601 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2602 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2605 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2606 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2607 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2609 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2610 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2613 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2614 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2615 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2616 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2617 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2618 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2619 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2620 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2621 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2629 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2631 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2632 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2633 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2635 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2636 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2639 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2640 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2643 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2645 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2646 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2647 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2648 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2649 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2651 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2652 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2653 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2655 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2656 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2657 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2658 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2659 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2660 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2661 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2662 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2663 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2664 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2665 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2666 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2667 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2668 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2669 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2670 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2671 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2672 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2673 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2674 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2675 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2676 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2677 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2679 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2680 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2681 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2682 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2683 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2684 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2685 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2686 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2687 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2688 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2689 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2690 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2692 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2694 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2695 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2698 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2701 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2703 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2704 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2705 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2709 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2711 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2712 NUM_BANKS(ADDR_SURF_16_BANK));
2713 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2715 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2716 NUM_BANKS(ADDR_SURF_16_BANK));
2717 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2719 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2720 NUM_BANKS(ADDR_SURF_16_BANK));
2721 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2723 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2724 NUM_BANKS(ADDR_SURF_16_BANK));
2725 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2727 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2728 NUM_BANKS(ADDR_SURF_16_BANK));
2729 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2730 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2731 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2732 NUM_BANKS(ADDR_SURF_16_BANK));
2733 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2736 NUM_BANKS(ADDR_SURF_16_BANK));
2737 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2739 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2740 NUM_BANKS(ADDR_SURF_16_BANK));
2741 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2742 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2743 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2744 NUM_BANKS(ADDR_SURF_16_BANK));
2745 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2747 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2748 NUM_BANKS(ADDR_SURF_16_BANK));
2749 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2751 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2752 NUM_BANKS(ADDR_SURF_16_BANK));
2753 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2755 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2756 NUM_BANKS(ADDR_SURF_8_BANK));
2757 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2759 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2760 NUM_BANKS(ADDR_SURF_4_BANK));
2761 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2764 NUM_BANKS(ADDR_SURF_4_BANK));
2766 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2767 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2769 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2770 if (reg_offset != 7)
2771 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2774 case CHIP_POLARIS11:
2775 case CHIP_POLARIS12:
2776 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2777 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2780 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2783 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2787 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2788 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2792 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2793 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2795 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2799 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2800 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2801 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2803 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2804 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2805 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2807 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2808 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2809 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2810 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2811 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2818 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2819 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2822 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2826 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2827 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2831 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2833 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2835 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2839 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2840 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2842 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2843 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2845 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2846 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2847 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2850 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2851 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2852 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2854 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2855 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2856 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2857 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2858 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2859 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2860 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2862 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2863 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2864 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2866 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2867 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2868 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2869 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2870 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2871 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2874 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2875 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2876 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2878 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2879 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2881 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2882 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2883 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2884 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2886 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2888 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2890 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2891 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2893 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2894 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2895 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2896 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2897 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2899 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2900 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2901 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2902 NUM_BANKS(ADDR_SURF_16_BANK));
2904 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2905 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2906 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2907 NUM_BANKS(ADDR_SURF_16_BANK));
2909 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2912 NUM_BANKS(ADDR_SURF_16_BANK));
2914 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917 NUM_BANKS(ADDR_SURF_16_BANK));
2919 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2920 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2921 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2922 NUM_BANKS(ADDR_SURF_16_BANK));
2924 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2926 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2927 NUM_BANKS(ADDR_SURF_16_BANK));
2929 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2930 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2931 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2932 NUM_BANKS(ADDR_SURF_16_BANK));
2934 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2935 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2936 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2937 NUM_BANKS(ADDR_SURF_16_BANK));
2939 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2942 NUM_BANKS(ADDR_SURF_16_BANK));
2944 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2947 NUM_BANKS(ADDR_SURF_16_BANK));
2949 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2950 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2951 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2952 NUM_BANKS(ADDR_SURF_16_BANK));
2954 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2956 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2957 NUM_BANKS(ADDR_SURF_16_BANK));
2959 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2962 NUM_BANKS(ADDR_SURF_8_BANK));
2964 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2967 NUM_BANKS(ADDR_SURF_4_BANK));
2969 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2970 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2972 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2973 if (reg_offset != 7)
2974 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2977 case CHIP_POLARIS10:
2978 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2981 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2982 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2985 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2986 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2987 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2989 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2990 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2991 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2993 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2994 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2995 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2997 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2998 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3001 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3002 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3003 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3005 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3006 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3007 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3008 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3009 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3010 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3012 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3013 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3016 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3017 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3019 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3020 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3024 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3025 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3027 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3028 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3029 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3030 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3031 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3033 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3034 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3036 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3037 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3038 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3041 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3042 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3043 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3044 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3045 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3046 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3047 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3048 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3049 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3050 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3052 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3053 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3054 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3055 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3056 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3057 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3058 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3060 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3061 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3062 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3064 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3065 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3066 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3068 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3069 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3070 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3072 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3073 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3074 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3076 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3077 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3078 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3080 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3081 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3082 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3084 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3085 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3086 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3087 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3088 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3089 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3092 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3093 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3094 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3096 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3097 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3098 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3099 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3101 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3103 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3104 NUM_BANKS(ADDR_SURF_16_BANK));
3106 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3107 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3108 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3109 NUM_BANKS(ADDR_SURF_16_BANK));
3111 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3113 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3114 NUM_BANKS(ADDR_SURF_16_BANK));
3116 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3118 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3119 NUM_BANKS(ADDR_SURF_16_BANK));
3121 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3122 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3123 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3124 NUM_BANKS(ADDR_SURF_16_BANK));
3126 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3127 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3128 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3129 NUM_BANKS(ADDR_SURF_16_BANK));
3131 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3132 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3133 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3134 NUM_BANKS(ADDR_SURF_16_BANK));
3136 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3139 NUM_BANKS(ADDR_SURF_16_BANK));
3141 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3143 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144 NUM_BANKS(ADDR_SURF_16_BANK));
3146 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3149 NUM_BANKS(ADDR_SURF_16_BANK));
3151 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3154 NUM_BANKS(ADDR_SURF_16_BANK));
3156 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3159 NUM_BANKS(ADDR_SURF_8_BANK));
3161 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3162 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3163 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3164 NUM_BANKS(ADDR_SURF_4_BANK));
3166 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3169 NUM_BANKS(ADDR_SURF_4_BANK));
3171 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3172 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3174 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3175 if (reg_offset != 7)
3176 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3180 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3181 PIPE_CONFIG(ADDR_SURF_P2) |
3182 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3183 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3184 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3185 PIPE_CONFIG(ADDR_SURF_P2) |
3186 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3187 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3188 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3189 PIPE_CONFIG(ADDR_SURF_P2) |
3190 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3191 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3192 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193 PIPE_CONFIG(ADDR_SURF_P2) |
3194 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3195 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3196 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197 PIPE_CONFIG(ADDR_SURF_P2) |
3198 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3199 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3200 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3201 PIPE_CONFIG(ADDR_SURF_P2) |
3202 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3203 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3204 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3205 PIPE_CONFIG(ADDR_SURF_P2) |
3206 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3207 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3209 PIPE_CONFIG(ADDR_SURF_P2));
3210 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3211 PIPE_CONFIG(ADDR_SURF_P2) |
3212 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3214 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3215 PIPE_CONFIG(ADDR_SURF_P2) |
3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3218 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3219 PIPE_CONFIG(ADDR_SURF_P2) |
3220 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3222 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3223 PIPE_CONFIG(ADDR_SURF_P2) |
3224 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3226 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3227 PIPE_CONFIG(ADDR_SURF_P2) |
3228 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3230 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3231 PIPE_CONFIG(ADDR_SURF_P2) |
3232 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3234 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3235 PIPE_CONFIG(ADDR_SURF_P2) |
3236 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3238 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3239 PIPE_CONFIG(ADDR_SURF_P2) |
3240 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3242 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3243 PIPE_CONFIG(ADDR_SURF_P2) |
3244 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3246 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3247 PIPE_CONFIG(ADDR_SURF_P2) |
3248 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3250 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3251 PIPE_CONFIG(ADDR_SURF_P2) |
3252 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3254 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3255 PIPE_CONFIG(ADDR_SURF_P2) |
3256 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3258 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3259 PIPE_CONFIG(ADDR_SURF_P2) |
3260 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3262 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3263 PIPE_CONFIG(ADDR_SURF_P2) |
3264 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3266 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3267 PIPE_CONFIG(ADDR_SURF_P2) |
3268 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3270 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3271 PIPE_CONFIG(ADDR_SURF_P2) |
3272 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3274 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3275 PIPE_CONFIG(ADDR_SURF_P2) |
3276 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3278 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3279 PIPE_CONFIG(ADDR_SURF_P2) |
3280 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3283 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3286 NUM_BANKS(ADDR_SURF_8_BANK));
3287 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3288 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3289 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3290 NUM_BANKS(ADDR_SURF_8_BANK));
3291 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3294 NUM_BANKS(ADDR_SURF_8_BANK));
3295 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3298 NUM_BANKS(ADDR_SURF_8_BANK));
3299 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3300 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3301 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3302 NUM_BANKS(ADDR_SURF_8_BANK));
3303 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3306 NUM_BANKS(ADDR_SURF_8_BANK));
3307 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3308 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3309 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3310 NUM_BANKS(ADDR_SURF_8_BANK));
3311 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3312 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3313 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314 NUM_BANKS(ADDR_SURF_16_BANK));
3315 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318 NUM_BANKS(ADDR_SURF_16_BANK));
3319 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3320 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3321 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322 NUM_BANKS(ADDR_SURF_16_BANK));
3323 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3326 NUM_BANKS(ADDR_SURF_16_BANK));
3327 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330 NUM_BANKS(ADDR_SURF_16_BANK));
3331 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3334 NUM_BANKS(ADDR_SURF_16_BANK));
3335 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338 NUM_BANKS(ADDR_SURF_8_BANK));
3340 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3341 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3343 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3345 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3346 if (reg_offset != 7)
3347 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3352 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3356 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3357 PIPE_CONFIG(ADDR_SURF_P2) |
3358 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3360 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3361 PIPE_CONFIG(ADDR_SURF_P2) |
3362 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3363 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3364 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3365 PIPE_CONFIG(ADDR_SURF_P2) |
3366 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3368 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3369 PIPE_CONFIG(ADDR_SURF_P2) |
3370 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3372 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3373 PIPE_CONFIG(ADDR_SURF_P2) |
3374 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3375 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3376 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3377 PIPE_CONFIG(ADDR_SURF_P2) |
3378 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3379 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3380 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3381 PIPE_CONFIG(ADDR_SURF_P2) |
3382 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3383 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3384 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3385 PIPE_CONFIG(ADDR_SURF_P2));
3386 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3387 PIPE_CONFIG(ADDR_SURF_P2) |
3388 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3390 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3391 PIPE_CONFIG(ADDR_SURF_P2) |
3392 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3394 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3395 PIPE_CONFIG(ADDR_SURF_P2) |
3396 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3398 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3399 PIPE_CONFIG(ADDR_SURF_P2) |
3400 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3402 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3403 PIPE_CONFIG(ADDR_SURF_P2) |
3404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3406 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3407 PIPE_CONFIG(ADDR_SURF_P2) |
3408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3410 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3411 PIPE_CONFIG(ADDR_SURF_P2) |
3412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3414 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3415 PIPE_CONFIG(ADDR_SURF_P2) |
3416 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3418 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3419 PIPE_CONFIG(ADDR_SURF_P2) |
3420 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3422 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3423 PIPE_CONFIG(ADDR_SURF_P2) |
3424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3426 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3427 PIPE_CONFIG(ADDR_SURF_P2) |
3428 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3430 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3431 PIPE_CONFIG(ADDR_SURF_P2) |
3432 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3434 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3435 PIPE_CONFIG(ADDR_SURF_P2) |
3436 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3438 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3439 PIPE_CONFIG(ADDR_SURF_P2) |
3440 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3442 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3443 PIPE_CONFIG(ADDR_SURF_P2) |
3444 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3446 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3447 PIPE_CONFIG(ADDR_SURF_P2) |
3448 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3450 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3451 PIPE_CONFIG(ADDR_SURF_P2) |
3452 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3454 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3455 PIPE_CONFIG(ADDR_SURF_P2) |
3456 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3459 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3462 NUM_BANKS(ADDR_SURF_8_BANK));
3463 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3466 NUM_BANKS(ADDR_SURF_8_BANK));
3467 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3470 NUM_BANKS(ADDR_SURF_8_BANK));
3471 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3474 NUM_BANKS(ADDR_SURF_8_BANK));
3475 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3478 NUM_BANKS(ADDR_SURF_8_BANK));
3479 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3482 NUM_BANKS(ADDR_SURF_8_BANK));
3483 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3486 NUM_BANKS(ADDR_SURF_8_BANK));
3487 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3490 NUM_BANKS(ADDR_SURF_16_BANK));
3491 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3494 NUM_BANKS(ADDR_SURF_16_BANK));
3495 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3498 NUM_BANKS(ADDR_SURF_16_BANK));
3499 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3502 NUM_BANKS(ADDR_SURF_16_BANK));
3503 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3504 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3505 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3506 NUM_BANKS(ADDR_SURF_16_BANK));
3507 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3510 NUM_BANKS(ADDR_SURF_16_BANK));
3511 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3514 NUM_BANKS(ADDR_SURF_8_BANK));
3516 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3517 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3519 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3521 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3522 if (reg_offset != 7)
3523 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3529 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3530 u32 se_num, u32 sh_num, u32 instance)
3534 if (instance == 0xffffffff)
3535 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3537 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3539 if (se_num == 0xffffffff)
3540 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3542 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3544 if (sh_num == 0xffffffff)
3545 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3547 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3549 WREG32(mmGRBM_GFX_INDEX, data);
3552 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3553 u32 me, u32 pipe, u32 q)
3555 vi_srbm_select(adev, me, pipe, q, 0);
3558 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3562 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3563 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3565 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3567 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3568 adev->gfx.config.max_sh_per_se);
3570 return (~data) & mask;
3574 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3576 switch (adev->asic_type) {
3579 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3580 RB_XSEL2(1) | PKR_MAP(2) |
3581 PKR_XSEL(1) | PKR_YSEL(1) |
3582 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3583 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3587 case CHIP_POLARIS10:
3588 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3589 SE_XSEL(1) | SE_YSEL(1);
3590 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3595 *rconf |= RB_MAP_PKR0(2);
3598 case CHIP_POLARIS11:
3599 case CHIP_POLARIS12:
3600 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3601 SE_XSEL(1) | SE_YSEL(1);
3609 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3615 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3616 u32 raster_config, u32 raster_config_1,
3617 unsigned rb_mask, unsigned num_rb)
3619 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3620 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3621 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3622 unsigned rb_per_se = num_rb / num_se;
3623 unsigned se_mask[4];
3626 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3627 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3628 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3629 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3631 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3632 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3633 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3635 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3636 (!se_mask[2] && !se_mask[3]))) {
3637 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3639 if (!se_mask[0] && !se_mask[1]) {
3641 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3644 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3648 for (se = 0; se < num_se; se++) {
3649 unsigned raster_config_se = raster_config;
3650 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3651 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3652 int idx = (se / 2) * 2;
3654 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3655 raster_config_se &= ~SE_MAP_MASK;
3657 if (!se_mask[idx]) {
3658 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3660 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3664 pkr0_mask &= rb_mask;
3665 pkr1_mask &= rb_mask;
3666 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3667 raster_config_se &= ~PKR_MAP_MASK;
3670 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3672 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3676 if (rb_per_se >= 2) {
3677 unsigned rb0_mask = 1 << (se * rb_per_se);
3678 unsigned rb1_mask = rb0_mask << 1;
3680 rb0_mask &= rb_mask;
3681 rb1_mask &= rb_mask;
3682 if (!rb0_mask || !rb1_mask) {
3683 raster_config_se &= ~RB_MAP_PKR0_MASK;
3687 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3690 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3694 if (rb_per_se > 2) {
3695 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3696 rb1_mask = rb0_mask << 1;
3697 rb0_mask &= rb_mask;
3698 rb1_mask &= rb_mask;
3699 if (!rb0_mask || !rb1_mask) {
3700 raster_config_se &= ~RB_MAP_PKR1_MASK;
3704 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3707 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3713 /* GRBM_GFX_INDEX has a different offset on VI */
3714 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3715 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3716 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3719 /* GRBM_GFX_INDEX has a different offset on VI */
3720 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3723 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3727 u32 raster_config = 0, raster_config_1 = 0;
3729 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3730 adev->gfx.config.max_sh_per_se;
3731 unsigned num_rb_pipes;
3733 mutex_lock(&adev->grbm_idx_mutex);
3734 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3735 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3736 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3737 data = gfx_v8_0_get_rb_active_bitmap(adev);
3738 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3739 rb_bitmap_width_per_sh);
3742 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3744 adev->gfx.config.backend_enable_mask = active_rbs;
3745 adev->gfx.config.num_rbs = hweight32(active_rbs);
3747 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3748 adev->gfx.config.max_shader_engines, 16);
3750 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3752 if (!adev->gfx.config.backend_enable_mask ||
3753 adev->gfx.config.num_rbs >= num_rb_pipes) {
3754 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3755 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3757 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3758 adev->gfx.config.backend_enable_mask,
3762 /* cache the values for userspace */
3763 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3764 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3765 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3766 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3767 RREG32(mmCC_RB_BACKEND_DISABLE);
3768 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3769 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3770 adev->gfx.config.rb_config[i][j].raster_config =
3771 RREG32(mmPA_SC_RASTER_CONFIG);
3772 adev->gfx.config.rb_config[i][j].raster_config_1 =
3773 RREG32(mmPA_SC_RASTER_CONFIG_1);
3776 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3777 mutex_unlock(&adev->grbm_idx_mutex);
3781 * gfx_v8_0_init_compute_vmid - gart enable
3783 * @adev: amdgpu_device pointer
3785 * Initialize compute vmid sh_mem registers
3788 #define DEFAULT_SH_MEM_BASES (0x6000)
3789 #define FIRST_COMPUTE_VMID (8)
3790 #define LAST_COMPUTE_VMID (16)
3791 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3794 uint32_t sh_mem_config;
3795 uint32_t sh_mem_bases;
3798 * Configure apertures:
3799 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3800 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3801 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3803 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3805 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3806 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3807 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3808 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3809 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3810 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3812 mutex_lock(&adev->srbm_mutex);
3813 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3814 vi_srbm_select(adev, 0, 0, 0, i);
3815 /* CP and shaders */
3816 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3817 WREG32(mmSH_MEM_APE1_BASE, 1);
3818 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3819 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3821 vi_srbm_select(adev, 0, 0, 0, 0);
3822 mutex_unlock(&adev->srbm_mutex);
3825 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3827 switch (adev->asic_type) {
3829 adev->gfx.config.double_offchip_lds_buf = 1;
3833 adev->gfx.config.double_offchip_lds_buf = 0;
3838 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3840 u32 tmp, sh_static_mem_cfg;
3843 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3844 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3845 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3846 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3848 gfx_v8_0_tiling_mode_table_init(adev);
3849 gfx_v8_0_setup_rb(adev);
3850 gfx_v8_0_get_cu_info(adev);
3851 gfx_v8_0_config_init(adev);
3853 /* XXX SH_MEM regs */
3854 /* where to put LDS, scratch, GPUVM in FSA64 space */
3855 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3857 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3859 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3861 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3863 mutex_lock(&adev->srbm_mutex);
3864 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3865 vi_srbm_select(adev, 0, 0, 0, i);
3866 /* CP and shaders */
3868 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3869 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3870 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3871 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3872 WREG32(mmSH_MEM_CONFIG, tmp);
3873 WREG32(mmSH_MEM_BASES, 0);
3875 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3876 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3877 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3878 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3879 WREG32(mmSH_MEM_CONFIG, tmp);
3880 tmp = adev->gmc.shared_aperture_start >> 48;
3881 WREG32(mmSH_MEM_BASES, tmp);
3884 WREG32(mmSH_MEM_APE1_BASE, 1);
3885 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3887 vi_srbm_select(adev, 0, 0, 0, 0);
3888 mutex_unlock(&adev->srbm_mutex);
3890 gfx_v8_0_init_compute_vmid(adev);
3892 mutex_lock(&adev->grbm_idx_mutex);
3894 * making sure that the following register writes will be broadcasted
3895 * to all the shaders
3897 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3899 WREG32(mmPA_SC_FIFO_SIZE,
3900 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3901 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3902 (adev->gfx.config.sc_prim_fifo_size_backend <<
3903 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3904 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3905 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3906 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3907 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3909 tmp = RREG32(mmSPI_ARB_PRIORITY);
3910 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3911 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3912 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3913 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3914 WREG32(mmSPI_ARB_PRIORITY, tmp);
3916 mutex_unlock(&adev->grbm_idx_mutex);
3920 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3925 mutex_lock(&adev->grbm_idx_mutex);
3926 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3927 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3928 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3929 for (k = 0; k < adev->usec_timeout; k++) {
3930 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3934 if (k == adev->usec_timeout) {
3935 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3936 0xffffffff, 0xffffffff);
3937 mutex_unlock(&adev->grbm_idx_mutex);
3938 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3944 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3945 mutex_unlock(&adev->grbm_idx_mutex);
3947 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3948 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3949 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3950 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3951 for (k = 0; k < adev->usec_timeout; k++) {
3952 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3958 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3961 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3963 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3964 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3965 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3966 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3968 WREG32(mmCP_INT_CNTL_RING0, tmp);
3971 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3974 WREG32(mmRLC_CSIB_ADDR_HI,
3975 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3976 WREG32(mmRLC_CSIB_ADDR_LO,
3977 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3978 WREG32(mmRLC_CSIB_LENGTH,
3979 adev->gfx.rlc.clear_state_size);
3982 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3985 int *unique_indices,
3988 int *ind_start_offsets,
3993 bool new_entry = true;
3995 for (; ind_offset < list_size; ind_offset++) {
3999 ind_start_offsets[*offset_count] = ind_offset;
4000 *offset_count = *offset_count + 1;
4001 BUG_ON(*offset_count >= max_offset);
4004 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4011 /* look for the matching indice */
4013 indices < *indices_count;
4015 if (unique_indices[indices] ==
4016 register_list_format[ind_offset])
4020 if (indices >= *indices_count) {
4021 unique_indices[*indices_count] =
4022 register_list_format[ind_offset];
4023 indices = *indices_count;
4024 *indices_count = *indices_count + 1;
4025 BUG_ON(*indices_count >= max_indices);
4028 register_list_format[ind_offset] = indices;
4032 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4035 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4036 int indices_count = 0;
4037 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4038 int offset_count = 0;
4041 unsigned int *register_list_format =
4042 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4043 if (!register_list_format)
4045 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4046 adev->gfx.rlc.reg_list_format_size_bytes);
4048 gfx_v8_0_parse_ind_reg_list(register_list_format,
4049 RLC_FormatDirectRegListLength,
4050 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4053 ARRAY_SIZE(unique_indices),
4054 indirect_start_offsets,
4056 ARRAY_SIZE(indirect_start_offsets));
4058 /* save and restore list */
4059 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4061 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4062 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4063 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4066 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4067 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4068 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4070 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4071 list_size = list_size >> 1;
4072 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4073 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4075 /* starting offsets starts */
4076 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4077 adev->gfx.rlc.starting_offsets_start);
4078 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4079 WREG32(mmRLC_GPM_SCRATCH_DATA,
4080 indirect_start_offsets[i]);
4082 /* unique indices */
4083 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4084 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4085 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4086 if (unique_indices[i] != 0) {
4087 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4088 WREG32(data + i, unique_indices[i] >> 20);
4091 kfree(register_list_format);
4096 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4098 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4101 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4105 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4107 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4108 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4109 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4110 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4111 WREG32(mmRLC_PG_DELAY, data);
4113 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4114 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4118 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4121 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4124 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4127 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4130 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4132 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4135 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4137 if ((adev->asic_type == CHIP_CARRIZO) ||
4138 (adev->asic_type == CHIP_STONEY)) {
4139 gfx_v8_0_init_csb(adev);
4140 gfx_v8_0_init_save_restore_list(adev);
4141 gfx_v8_0_enable_save_restore_machine(adev);
4142 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4143 gfx_v8_0_init_power_gating(adev);
4144 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4145 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4146 (adev->asic_type == CHIP_POLARIS12) ||
4147 (adev->asic_type == CHIP_VEGAM)) {
4148 gfx_v8_0_init_csb(adev);
4149 gfx_v8_0_init_save_restore_list(adev);
4150 gfx_v8_0_enable_save_restore_machine(adev);
4151 gfx_v8_0_init_power_gating(adev);
4156 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4158 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4160 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4161 gfx_v8_0_wait_for_rlc_serdes(adev);
4164 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4166 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4169 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4173 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4175 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4177 /* carrizo do enable cp interrupt after cp inited */
4178 if (!(adev->flags & AMD_IS_APU))
4179 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4184 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4186 const struct rlc_firmware_header_v2_0 *hdr;
4187 const __le32 *fw_data;
4188 unsigned i, fw_size;
4190 if (!adev->gfx.rlc_fw)
4193 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4194 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4196 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4197 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4198 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4200 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4201 for (i = 0; i < fw_size; i++)
4202 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4203 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4208 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4212 gfx_v8_0_rlc_stop(adev);
4213 gfx_v8_0_rlc_reset(adev);
4214 gfx_v8_0_init_pg(adev);
4216 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4217 /* legacy rlc firmware loading */
4218 r = gfx_v8_0_rlc_load_microcode(adev);
4223 gfx_v8_0_rlc_start(adev);
4228 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4231 u32 tmp = RREG32(mmCP_ME_CNTL);
4234 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4235 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4236 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4238 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4239 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4240 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4241 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4242 adev->gfx.gfx_ring[i].ready = false;
4244 WREG32(mmCP_ME_CNTL, tmp);
4248 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4250 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4251 const struct gfx_firmware_header_v1_0 *ce_hdr;
4252 const struct gfx_firmware_header_v1_0 *me_hdr;
4253 const __le32 *fw_data;
4254 unsigned i, fw_size;
4256 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4259 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4260 adev->gfx.pfp_fw->data;
4261 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4262 adev->gfx.ce_fw->data;
4263 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4264 adev->gfx.me_fw->data;
4266 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4267 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4268 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4270 gfx_v8_0_cp_gfx_enable(adev, false);
4273 fw_data = (const __le32 *)
4274 (adev->gfx.pfp_fw->data +
4275 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4276 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4277 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4278 for (i = 0; i < fw_size; i++)
4279 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4280 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4283 fw_data = (const __le32 *)
4284 (adev->gfx.ce_fw->data +
4285 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4286 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4287 WREG32(mmCP_CE_UCODE_ADDR, 0);
4288 for (i = 0; i < fw_size; i++)
4289 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4290 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4293 fw_data = (const __le32 *)
4294 (adev->gfx.me_fw->data +
4295 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4296 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4297 WREG32(mmCP_ME_RAM_WADDR, 0);
4298 for (i = 0; i < fw_size; i++)
4299 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4300 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4305 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4308 const struct cs_section_def *sect = NULL;
4309 const struct cs_extent_def *ext = NULL;
4311 /* begin clear state */
4313 /* context control state */
4316 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4317 for (ext = sect->section; ext->extent != NULL; ++ext) {
4318 if (sect->id == SECT_CONTEXT)
4319 count += 2 + ext->reg_count;
4324 /* pa_sc_raster_config/pa_sc_raster_config1 */
4326 /* end clear state */
4334 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4336 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4337 const struct cs_section_def *sect = NULL;
4338 const struct cs_extent_def *ext = NULL;
4342 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4343 WREG32(mmCP_ENDIAN_SWAP, 0);
4344 WREG32(mmCP_DEVICE_ID, 1);
4346 gfx_v8_0_cp_gfx_enable(adev, true);
4348 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4350 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4354 /* clear state buffer */
4355 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4356 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4358 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4359 amdgpu_ring_write(ring, 0x80000000);
4360 amdgpu_ring_write(ring, 0x80000000);
4362 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4363 for (ext = sect->section; ext->extent != NULL; ++ext) {
4364 if (sect->id == SECT_CONTEXT) {
4365 amdgpu_ring_write(ring,
4366 PACKET3(PACKET3_SET_CONTEXT_REG,
4368 amdgpu_ring_write(ring,
4369 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4370 for (i = 0; i < ext->reg_count; i++)
4371 amdgpu_ring_write(ring, ext->extent[i]);
4376 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4377 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4378 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4379 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4381 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4382 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4384 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4385 amdgpu_ring_write(ring, 0);
4387 /* init the CE partitions */
4388 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4389 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4390 amdgpu_ring_write(ring, 0x8000);
4391 amdgpu_ring_write(ring, 0x8000);
4393 amdgpu_ring_commit(ring);
4397 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4400 /* no gfx doorbells on iceland */
4401 if (adev->asic_type == CHIP_TOPAZ)
4404 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4406 if (ring->use_doorbell) {
4407 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4408 DOORBELL_OFFSET, ring->doorbell_index);
4409 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4411 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4414 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4417 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4419 if (adev->flags & AMD_IS_APU)
4422 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4423 DOORBELL_RANGE_LOWER,
4424 AMDGPU_DOORBELL_GFX_RING0);
4425 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4427 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4428 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4431 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4433 struct amdgpu_ring *ring;
4436 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4439 /* Set the write pointer delay */
4440 WREG32(mmCP_RB_WPTR_DELAY, 0);
4442 /* set the RB to use vmid 0 */
4443 WREG32(mmCP_RB_VMID, 0);
4445 /* Set ring buffer size */
4446 ring = &adev->gfx.gfx_ring[0];
4447 rb_bufsz = order_base_2(ring->ring_size / 8);
4448 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4449 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4450 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4451 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4453 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4455 WREG32(mmCP_RB0_CNTL, tmp);
4457 /* Initialize the ring buffer's read and write pointers */
4458 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4460 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4462 /* set the wb address wether it's enabled or not */
4463 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4464 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4465 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4467 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4468 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4469 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4471 WREG32(mmCP_RB0_CNTL, tmp);
4473 rb_addr = ring->gpu_addr >> 8;
4474 WREG32(mmCP_RB0_BASE, rb_addr);
4475 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4477 gfx_v8_0_set_cpg_door_bell(adev, ring);
4478 /* start the ring */
4479 amdgpu_ring_clear_ring(ring);
4480 gfx_v8_0_cp_gfx_start(adev);
4482 r = amdgpu_ring_test_ring(ring);
4484 ring->ready = false;
4489 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4494 WREG32(mmCP_MEC_CNTL, 0);
4496 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4497 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4498 adev->gfx.compute_ring[i].ready = false;
4499 adev->gfx.kiq.ring.ready = false;
4504 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4506 const struct gfx_firmware_header_v1_0 *mec_hdr;
4507 const __le32 *fw_data;
4508 unsigned i, fw_size;
4510 if (!adev->gfx.mec_fw)
4513 gfx_v8_0_cp_compute_enable(adev, false);
4515 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4516 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4518 fw_data = (const __le32 *)
4519 (adev->gfx.mec_fw->data +
4520 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4521 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4524 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4525 for (i = 0; i < fw_size; i++)
4526 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4527 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4529 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4530 if (adev->gfx.mec2_fw) {
4531 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4533 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4534 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4536 fw_data = (const __le32 *)
4537 (adev->gfx.mec2_fw->data +
4538 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4539 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4541 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4542 for (i = 0; i < fw_size; i++)
4543 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4544 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4551 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4554 struct amdgpu_device *adev = ring->adev;
4556 /* tell RLC which is KIQ queue */
4557 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4559 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4560 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4562 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4565 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4567 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4568 uint64_t queue_mask = 0;
4571 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4572 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4575 /* This situation may be hit in the future if a new HW
4576 * generation exposes more than 64 queues. If so, the
4577 * definition of queue_mask needs updating */
4578 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4579 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4583 queue_mask |= (1ull << i);
4586 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4588 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4592 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4593 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4594 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4595 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4596 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4597 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4598 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4599 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4600 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4601 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4602 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4603 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4606 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4607 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4608 amdgpu_ring_write(kiq_ring,
4609 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4610 amdgpu_ring_write(kiq_ring,
4611 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4612 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4613 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4614 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4615 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4616 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4617 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4618 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4621 r = amdgpu_ring_test_ring(kiq_ring);
4623 DRM_ERROR("KCQ enable failed\n");
4624 kiq_ring->ready = false;
4629 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4633 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4634 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4635 for (i = 0; i < adev->usec_timeout; i++) {
4636 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4640 if (i == adev->usec_timeout)
4643 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4644 WREG32(mmCP_HQD_PQ_RPTR, 0);
4645 WREG32(mmCP_HQD_PQ_WPTR, 0);
4650 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4652 struct amdgpu_device *adev = ring->adev;
4653 struct vi_mqd *mqd = ring->mqd_ptr;
4654 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4657 mqd->header = 0xC0310800;
4658 mqd->compute_pipelinestat_enable = 0x00000001;
4659 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4660 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4661 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4662 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4663 mqd->compute_misc_reserved = 0x00000003;
4664 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4665 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4666 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4667 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4668 eop_base_addr = ring->eop_gpu_addr >> 8;
4669 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4670 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4672 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4673 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4674 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4675 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4677 mqd->cp_hqd_eop_control = tmp;
4679 /* enable doorbell? */
4680 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4681 CP_HQD_PQ_DOORBELL_CONTROL,
4683 ring->use_doorbell ? 1 : 0);
4685 mqd->cp_hqd_pq_doorbell_control = tmp;
4687 /* set the pointer to the MQD */
4688 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4689 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4691 /* set MQD vmid to 0 */
4692 tmp = RREG32(mmCP_MQD_CONTROL);
4693 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4694 mqd->cp_mqd_control = tmp;
4696 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4697 hqd_gpu_addr = ring->gpu_addr >> 8;
4698 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4699 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4701 /* set up the HQD, this is similar to CP_RB0_CNTL */
4702 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4703 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4704 (order_base_2(ring->ring_size / 4) - 1));
4705 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4706 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4708 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4710 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4711 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4712 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4713 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4714 mqd->cp_hqd_pq_control = tmp;
4716 /* set the wb address whether it's enabled or not */
4717 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4718 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4719 mqd->cp_hqd_pq_rptr_report_addr_hi =
4720 upper_32_bits(wb_gpu_addr) & 0xffff;
4722 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4723 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4724 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4725 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4728 /* enable the doorbell if requested */
4729 if (ring->use_doorbell) {
4730 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4731 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4732 DOORBELL_OFFSET, ring->doorbell_index);
4734 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4736 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4737 DOORBELL_SOURCE, 0);
4738 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4742 mqd->cp_hqd_pq_doorbell_control = tmp;
4744 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4746 mqd->cp_hqd_pq_wptr = ring->wptr;
4747 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4749 /* set the vmid for the queue */
4750 mqd->cp_hqd_vmid = 0;
4752 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4753 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4754 mqd->cp_hqd_persistent_state = tmp;
4757 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4758 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4759 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4760 mqd->cp_hqd_ib_control = tmp;
4762 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4763 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4764 mqd->cp_hqd_iq_timer = tmp;
4766 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4767 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4768 mqd->cp_hqd_ctx_save_control = tmp;
4771 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4772 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4773 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4774 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4775 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4776 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4777 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4778 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4779 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4780 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4781 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4782 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4783 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4784 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4785 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4787 /* activate the queue */
4788 mqd->cp_hqd_active = 1;
4793 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4799 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4800 mqd_data = &mqd->cp_mqd_base_addr_lo;
4802 /* disable wptr polling */
4803 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4805 /* program all HQD registers */
4806 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4807 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4809 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4810 * This is safe since EOP RPTR==WPTR for any inactive HQD
4811 * on ASICs that do not support context-save.
4812 * EOP writes/reads can start anywhere in the ring.
4814 if (adev->asic_type != CHIP_TONGA) {
4815 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4816 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4817 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4820 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4821 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4823 /* activate the HQD */
4824 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4825 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4830 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4832 struct amdgpu_device *adev = ring->adev;
4833 struct vi_mqd *mqd = ring->mqd_ptr;
4834 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4836 gfx_v8_0_kiq_setting(ring);
4838 if (adev->in_gpu_reset) { /* for GPU_RESET case */
4839 /* reset MQD to a clean status */
4840 if (adev->gfx.mec.mqd_backup[mqd_idx])
4841 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4843 /* reset ring buffer */
4845 amdgpu_ring_clear_ring(ring);
4846 mutex_lock(&adev->srbm_mutex);
4847 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4848 gfx_v8_0_mqd_commit(adev, mqd);
4849 vi_srbm_select(adev, 0, 0, 0, 0);
4850 mutex_unlock(&adev->srbm_mutex);
4852 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4853 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4854 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4855 mutex_lock(&adev->srbm_mutex);
4856 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4857 gfx_v8_0_mqd_init(ring);
4858 gfx_v8_0_mqd_commit(adev, mqd);
4859 vi_srbm_select(adev, 0, 0, 0, 0);
4860 mutex_unlock(&adev->srbm_mutex);
4862 if (adev->gfx.mec.mqd_backup[mqd_idx])
4863 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4869 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4871 struct amdgpu_device *adev = ring->adev;
4872 struct vi_mqd *mqd = ring->mqd_ptr;
4873 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4875 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4876 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4877 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4878 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4879 mutex_lock(&adev->srbm_mutex);
4880 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4881 gfx_v8_0_mqd_init(ring);
4882 vi_srbm_select(adev, 0, 0, 0, 0);
4883 mutex_unlock(&adev->srbm_mutex);
4885 if (adev->gfx.mec.mqd_backup[mqd_idx])
4886 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4887 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4888 /* reset MQD to a clean status */
4889 if (adev->gfx.mec.mqd_backup[mqd_idx])
4890 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4891 /* reset ring buffer */
4893 amdgpu_ring_clear_ring(ring);
4895 amdgpu_ring_clear_ring(ring);
4900 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4902 if (adev->asic_type > CHIP_TONGA) {
4903 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4904 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4906 /* enable doorbells */
4907 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4910 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4912 struct amdgpu_ring *ring;
4915 ring = &adev->gfx.kiq.ring;
4917 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4918 if (unlikely(r != 0))
4921 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4922 if (unlikely(r != 0))
4925 gfx_v8_0_kiq_init_queue(ring);
4926 amdgpu_bo_kunmap(ring->mqd_obj);
4927 ring->mqd_ptr = NULL;
4928 amdgpu_bo_unreserve(ring->mqd_obj);
4933 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4935 struct amdgpu_ring *ring = NULL;
4938 gfx_v8_0_cp_compute_enable(adev, true);
4940 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4941 ring = &adev->gfx.compute_ring[i];
4943 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4944 if (unlikely(r != 0))
4946 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4948 r = gfx_v8_0_kcq_init_queue(ring);
4949 amdgpu_bo_kunmap(ring->mqd_obj);
4950 ring->mqd_ptr = NULL;
4952 amdgpu_bo_unreserve(ring->mqd_obj);
4957 gfx_v8_0_set_mec_doorbell_range(adev);
4959 r = gfx_v8_0_kiq_kcq_enable(adev);
4964 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4965 ring = &adev->gfx.compute_ring[i];
4967 r = amdgpu_ring_test_ring(ring);
4969 ring->ready = false;
4976 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4980 if (!(adev->flags & AMD_IS_APU))
4981 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4983 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4984 /* legacy firmware loading */
4985 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4989 r = gfx_v8_0_cp_compute_load_microcode(adev);
4994 r = gfx_v8_0_kiq_resume(adev);
4998 r = gfx_v8_0_cp_gfx_resume(adev);
5002 r = gfx_v8_0_kcq_resume(adev);
5005 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5010 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5012 gfx_v8_0_cp_gfx_enable(adev, enable);
5013 gfx_v8_0_cp_compute_enable(adev, enable);
5016 static int gfx_v8_0_hw_init(void *handle)
5019 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5021 gfx_v8_0_init_golden_registers(adev);
5022 gfx_v8_0_constants_init(adev);
5024 r = gfx_v8_0_rlc_resume(adev);
5028 r = gfx_v8_0_cp_resume(adev);
5033 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
5036 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
5038 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
5040 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5042 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5043 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5045 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5046 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5047 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5048 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5049 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5050 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5051 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5052 amdgpu_ring_write(kiq_ring, 0);
5053 amdgpu_ring_write(kiq_ring, 0);
5054 amdgpu_ring_write(kiq_ring, 0);
5056 r = amdgpu_ring_test_ring(kiq_ring);
5058 DRM_ERROR("KCQ disable failed\n");
5063 static bool gfx_v8_0_is_idle(void *handle)
5065 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5067 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
5068 || RREG32(mmGRBM_STATUS2) != 0x8)
5074 static bool gfx_v8_0_rlc_is_idle(void *handle)
5076 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5078 if (RREG32(mmGRBM_STATUS2) != 0x8)
5084 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
5087 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5089 for (i = 0; i < adev->usec_timeout; i++) {
5090 if (gfx_v8_0_rlc_is_idle(handle))
5098 static int gfx_v8_0_wait_for_idle(void *handle)
5101 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5103 for (i = 0; i < adev->usec_timeout; i++) {
5104 if (gfx_v8_0_is_idle(handle))
5112 static int gfx_v8_0_hw_fini(void *handle)
5114 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5116 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5117 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5119 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5121 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5123 /* disable KCQ to avoid CPC touch memory not valid anymore */
5124 gfx_v8_0_kcq_disable(adev);
5126 if (amdgpu_sriov_vf(adev)) {
5127 pr_debug("For SRIOV client, shouldn't do anything.\n");
5130 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5131 if (!gfx_v8_0_wait_for_idle(adev))
5132 gfx_v8_0_cp_enable(adev, false);
5134 pr_err("cp is busy, skip halt cp\n");
5135 if (!gfx_v8_0_wait_for_rlc_idle(adev))
5136 gfx_v8_0_rlc_stop(adev);
5138 pr_err("rlc is busy, skip halt rlc\n");
5139 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5143 static int gfx_v8_0_suspend(void *handle)
5145 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5146 adev->gfx.in_suspend = true;
5147 return gfx_v8_0_hw_fini(adev);
5150 static int gfx_v8_0_resume(void *handle)
5153 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5155 r = gfx_v8_0_hw_init(adev);
5156 adev->gfx.in_suspend = false;
5160 static bool gfx_v8_0_check_soft_reset(void *handle)
5162 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5163 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5167 tmp = RREG32(mmGRBM_STATUS);
5168 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5169 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5170 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5171 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5172 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5173 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5174 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5175 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5176 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5177 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5178 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5179 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5180 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5184 tmp = RREG32(mmGRBM_STATUS2);
5185 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5186 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5187 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5189 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5190 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5191 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5192 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5194 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5196 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5198 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5199 SOFT_RESET_GRBM, 1);
5203 tmp = RREG32(mmSRBM_STATUS);
5204 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5205 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5206 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5207 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5208 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5209 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5211 if (grbm_soft_reset || srbm_soft_reset) {
5212 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5213 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5216 adev->gfx.grbm_soft_reset = 0;
5217 adev->gfx.srbm_soft_reset = 0;
5222 static int gfx_v8_0_pre_soft_reset(void *handle)
5224 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5225 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5227 if ((!adev->gfx.grbm_soft_reset) &&
5228 (!adev->gfx.srbm_soft_reset))
5231 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5232 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5235 gfx_v8_0_rlc_stop(adev);
5237 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5238 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5239 /* Disable GFX parsing/prefetching */
5240 gfx_v8_0_cp_gfx_enable(adev, false);
5242 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5243 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5244 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5245 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5248 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5249 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5251 mutex_lock(&adev->srbm_mutex);
5252 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5253 gfx_v8_0_deactivate_hqd(adev, 2);
5254 vi_srbm_select(adev, 0, 0, 0, 0);
5255 mutex_unlock(&adev->srbm_mutex);
5257 /* Disable MEC parsing/prefetching */
5258 gfx_v8_0_cp_compute_enable(adev, false);
5264 static int gfx_v8_0_soft_reset(void *handle)
5266 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5267 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5270 if ((!adev->gfx.grbm_soft_reset) &&
5271 (!adev->gfx.srbm_soft_reset))
5274 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5275 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5277 if (grbm_soft_reset || srbm_soft_reset) {
5278 tmp = RREG32(mmGMCON_DEBUG);
5279 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5280 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5281 WREG32(mmGMCON_DEBUG, tmp);
5285 if (grbm_soft_reset) {
5286 tmp = RREG32(mmGRBM_SOFT_RESET);
5287 tmp |= grbm_soft_reset;
5288 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5289 WREG32(mmGRBM_SOFT_RESET, tmp);
5290 tmp = RREG32(mmGRBM_SOFT_RESET);
5294 tmp &= ~grbm_soft_reset;
5295 WREG32(mmGRBM_SOFT_RESET, tmp);
5296 tmp = RREG32(mmGRBM_SOFT_RESET);
5299 if (srbm_soft_reset) {
5300 tmp = RREG32(mmSRBM_SOFT_RESET);
5301 tmp |= srbm_soft_reset;
5302 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5303 WREG32(mmSRBM_SOFT_RESET, tmp);
5304 tmp = RREG32(mmSRBM_SOFT_RESET);
5308 tmp &= ~srbm_soft_reset;
5309 WREG32(mmSRBM_SOFT_RESET, tmp);
5310 tmp = RREG32(mmSRBM_SOFT_RESET);
5313 if (grbm_soft_reset || srbm_soft_reset) {
5314 tmp = RREG32(mmGMCON_DEBUG);
5315 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5316 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5317 WREG32(mmGMCON_DEBUG, tmp);
5320 /* Wait a little for things to settle down */
5326 static int gfx_v8_0_post_soft_reset(void *handle)
5328 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5329 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5331 if ((!adev->gfx.grbm_soft_reset) &&
5332 (!adev->gfx.srbm_soft_reset))
5335 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5336 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5338 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5339 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5340 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5341 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5344 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5345 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5347 mutex_lock(&adev->srbm_mutex);
5348 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5349 gfx_v8_0_deactivate_hqd(adev, 2);
5350 vi_srbm_select(adev, 0, 0, 0, 0);
5351 mutex_unlock(&adev->srbm_mutex);
5353 gfx_v8_0_kiq_resume(adev);
5354 gfx_v8_0_kcq_resume(adev);
5357 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5358 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5359 gfx_v8_0_cp_gfx_resume(adev);
5361 gfx_v8_0_rlc_start(adev);
5367 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5369 * @adev: amdgpu_device pointer
5371 * Fetches a GPU clock counter snapshot.
5372 * Returns the 64 bit clock counter snapshot.
5374 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5378 mutex_lock(&adev->gfx.gpu_clock_mutex);
5379 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5380 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5381 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5382 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5386 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5388 uint32_t gds_base, uint32_t gds_size,
5389 uint32_t gws_base, uint32_t gws_size,
5390 uint32_t oa_base, uint32_t oa_size)
5393 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5394 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5395 WRITE_DATA_DST_SEL(0)));
5396 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5397 amdgpu_ring_write(ring, 0);
5398 amdgpu_ring_write(ring, gds_base);
5401 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5402 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5403 WRITE_DATA_DST_SEL(0)));
5404 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5405 amdgpu_ring_write(ring, 0);
5406 amdgpu_ring_write(ring, gds_size);
5409 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5410 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5411 WRITE_DATA_DST_SEL(0)));
5412 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5413 amdgpu_ring_write(ring, 0);
5414 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5417 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5418 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5419 WRITE_DATA_DST_SEL(0)));
5420 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5421 amdgpu_ring_write(ring, 0);
5422 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5425 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5427 WREG32(mmSQ_IND_INDEX,
5428 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5429 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5430 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5431 (SQ_IND_INDEX__FORCE_READ_MASK));
5432 return RREG32(mmSQ_IND_DATA);
5435 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5436 uint32_t wave, uint32_t thread,
5437 uint32_t regno, uint32_t num, uint32_t *out)
5439 WREG32(mmSQ_IND_INDEX,
5440 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5441 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5442 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5443 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5444 (SQ_IND_INDEX__FORCE_READ_MASK) |
5445 (SQ_IND_INDEX__AUTO_INCR_MASK));
5447 *(out++) = RREG32(mmSQ_IND_DATA);
5450 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5452 /* type 0 wave data */
5453 dst[(*no_fields)++] = 0;
5454 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5455 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5456 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5457 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5458 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5459 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5460 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5461 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5462 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5463 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5464 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5465 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5466 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5467 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5468 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5469 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5470 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5471 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5474 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5475 uint32_t wave, uint32_t start,
5476 uint32_t size, uint32_t *dst)
5479 adev, simd, wave, 0,
5480 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5484 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5485 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5486 .select_se_sh = &gfx_v8_0_select_se_sh,
5487 .read_wave_data = &gfx_v8_0_read_wave_data,
5488 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5489 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5492 static int gfx_v8_0_early_init(void *handle)
5494 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5496 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5497 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5498 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5499 gfx_v8_0_set_ring_funcs(adev);
5500 gfx_v8_0_set_irq_funcs(adev);
5501 gfx_v8_0_set_gds_init(adev);
5502 gfx_v8_0_set_rlc_funcs(adev);
5507 static int gfx_v8_0_late_init(void *handle)
5509 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5512 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5516 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5520 /* requires IBs so do in late init after IB pool is initialized */
5521 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5525 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5527 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5531 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5534 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5542 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5545 if (((adev->asic_type == CHIP_POLARIS11) ||
5546 (adev->asic_type == CHIP_POLARIS12) ||
5547 (adev->asic_type == CHIP_VEGAM)) &&
5548 adev->powerplay.pp_funcs->set_powergating_by_smu)
5549 /* Send msg to SMU via Powerplay */
5550 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5552 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5555 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5558 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5561 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5564 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5567 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5570 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5573 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5576 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5578 /* Read any GFX register to wake up GFX. */
5580 RREG32(mmDB_RENDER_CONTROL);
5583 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5586 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5587 cz_enable_gfx_cg_power_gating(adev, true);
5588 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5589 cz_enable_gfx_pipeline_power_gating(adev, true);
5591 cz_enable_gfx_cg_power_gating(adev, false);
5592 cz_enable_gfx_pipeline_power_gating(adev, false);
5596 static int gfx_v8_0_set_powergating_state(void *handle,
5597 enum amd_powergating_state state)
5599 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5600 bool enable = (state == AMD_PG_STATE_GATE);
5602 if (amdgpu_sriov_vf(adev))
5605 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5606 AMD_PG_SUPPORT_RLC_SMU_HS |
5608 AMD_PG_SUPPORT_GFX_DMG))
5609 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5610 switch (adev->asic_type) {
5614 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5615 cz_enable_sck_slow_down_on_power_up(adev, true);
5616 cz_enable_sck_slow_down_on_power_down(adev, true);
5618 cz_enable_sck_slow_down_on_power_up(adev, false);
5619 cz_enable_sck_slow_down_on_power_down(adev, false);
5621 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5622 cz_enable_cp_power_gating(adev, true);
5624 cz_enable_cp_power_gating(adev, false);
5626 cz_update_gfx_cg_power_gating(adev, enable);
5628 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5629 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5631 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5633 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5634 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5636 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5638 case CHIP_POLARIS11:
5639 case CHIP_POLARIS12:
5641 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5642 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5644 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5646 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5647 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5649 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5651 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5652 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5654 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5659 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5660 AMD_PG_SUPPORT_RLC_SMU_HS |
5662 AMD_PG_SUPPORT_GFX_DMG))
5663 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5667 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5669 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5672 if (amdgpu_sriov_vf(adev))
5675 /* AMD_CG_SUPPORT_GFX_MGCG */
5676 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5677 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5678 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5680 /* AMD_CG_SUPPORT_GFX_CGLG */
5681 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5682 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5683 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5685 /* AMD_CG_SUPPORT_GFX_CGLS */
5686 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5687 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5689 /* AMD_CG_SUPPORT_GFX_CGTS */
5690 data = RREG32(mmCGTS_SM_CTRL_REG);
5691 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5692 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5694 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5695 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5696 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5698 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5699 data = RREG32(mmRLC_MEM_SLP_CNTL);
5700 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5701 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5703 /* AMD_CG_SUPPORT_GFX_CP_LS */
5704 data = RREG32(mmCP_MEM_SLP_CNTL);
5705 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5706 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5709 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5710 uint32_t reg_addr, uint32_t cmd)
5714 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5716 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5717 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5719 data = RREG32(mmRLC_SERDES_WR_CTRL);
5720 if (adev->asic_type == CHIP_STONEY)
5721 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5722 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5723 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5724 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5725 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5726 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5727 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5728 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5729 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5731 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5732 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5733 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5734 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5735 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5736 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5737 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5738 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5739 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5740 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5741 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5742 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5743 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5744 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5745 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5747 WREG32(mmRLC_SERDES_WR_CTRL, data);
5750 #define MSG_ENTER_RLC_SAFE_MODE 1
5751 #define MSG_EXIT_RLC_SAFE_MODE 0
5752 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5753 #define RLC_GPR_REG2__REQ__SHIFT 0
5754 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5755 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5757 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5762 data = RREG32(mmRLC_CNTL);
5763 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5766 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5767 data |= RLC_SAFE_MODE__CMD_MASK;
5768 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5769 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5770 WREG32(mmRLC_SAFE_MODE, data);
5772 for (i = 0; i < adev->usec_timeout; i++) {
5773 if ((RREG32(mmRLC_GPM_STAT) &
5774 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5775 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5776 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5777 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5782 for (i = 0; i < adev->usec_timeout; i++) {
5783 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5787 adev->gfx.rlc.in_safe_mode = true;
5791 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5796 data = RREG32(mmRLC_CNTL);
5797 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5800 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5801 if (adev->gfx.rlc.in_safe_mode) {
5802 data |= RLC_SAFE_MODE__CMD_MASK;
5803 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5804 WREG32(mmRLC_SAFE_MODE, data);
5805 adev->gfx.rlc.in_safe_mode = false;
5809 for (i = 0; i < adev->usec_timeout; i++) {
5810 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5816 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5817 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5818 .exit_safe_mode = iceland_exit_rlc_safe_mode
5821 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5824 uint32_t temp, data;
5826 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5828 /* It is disabled by HW by default */
5829 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5830 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5831 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5832 /* 1 - RLC memory Light sleep */
5833 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5835 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5836 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5839 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5840 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5841 if (adev->flags & AMD_IS_APU)
5842 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5843 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5844 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5846 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5847 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5848 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5849 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5852 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5854 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5855 gfx_v8_0_wait_for_rlc_serdes(adev);
5857 /* 5 - clear mgcg override */
5858 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5860 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5861 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5862 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5863 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5864 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5865 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5866 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5867 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5868 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5869 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5870 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5871 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5873 WREG32(mmCGTS_SM_CTRL_REG, data);
5877 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5878 gfx_v8_0_wait_for_rlc_serdes(adev);
5880 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5881 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5882 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5883 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5884 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5885 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5887 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5889 /* 2 - disable MGLS in RLC */
5890 data = RREG32(mmRLC_MEM_SLP_CNTL);
5891 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5892 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5893 WREG32(mmRLC_MEM_SLP_CNTL, data);
5896 /* 3 - disable MGLS in CP */
5897 data = RREG32(mmCP_MEM_SLP_CNTL);
5898 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5899 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5900 WREG32(mmCP_MEM_SLP_CNTL, data);
5903 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5904 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5905 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5906 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5908 WREG32(mmCGTS_SM_CTRL_REG, data);
5910 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5911 gfx_v8_0_wait_for_rlc_serdes(adev);
5913 /* 6 - set mgcg override */
5914 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5918 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5919 gfx_v8_0_wait_for_rlc_serdes(adev);
5922 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5925 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5928 uint32_t temp, temp1, data, data1;
5930 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5932 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5934 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5935 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5936 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5938 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5940 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5941 gfx_v8_0_wait_for_rlc_serdes(adev);
5943 /* 2 - clear cgcg override */
5944 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5946 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5947 gfx_v8_0_wait_for_rlc_serdes(adev);
5949 /* 3 - write cmd to set CGLS */
5950 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5952 /* 4 - enable cgcg */
5953 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5955 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5957 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5959 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5960 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5963 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5965 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5969 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5971 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5972 * Cmp_busy/GFX_Idle interrupts
5974 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5976 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5977 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5980 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5981 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5982 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5984 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5986 /* read gfx register to wake up cgcg */
5987 RREG32(mmCB_CGTT_SCLK_CTRL);
5988 RREG32(mmCB_CGTT_SCLK_CTRL);
5989 RREG32(mmCB_CGTT_SCLK_CTRL);
5990 RREG32(mmCB_CGTT_SCLK_CTRL);
5992 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5993 gfx_v8_0_wait_for_rlc_serdes(adev);
5995 /* write cmd to Set CGCG Overrride */
5996 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5998 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5999 gfx_v8_0_wait_for_rlc_serdes(adev);
6001 /* write cmd to Clear CGLS */
6002 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6004 /* disable cgcg, cgls should be disabled too. */
6005 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6006 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6008 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6009 /* enable interrupts again for PG */
6010 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6013 gfx_v8_0_wait_for_rlc_serdes(adev);
6015 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6017 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6021 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6022 * === MGCG + MGLS + TS(CG/LS) ===
6024 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6025 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6027 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6028 * === CGCG + CGLS ===
6030 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6031 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6036 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6037 enum amd_clockgating_state state)
6039 uint32_t msg_id, pp_state = 0;
6040 uint32_t pp_support_state = 0;
6042 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6043 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6044 pp_support_state = PP_STATE_SUPPORT_LS;
6045 pp_state = PP_STATE_LS;
6047 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6048 pp_support_state |= PP_STATE_SUPPORT_CG;
6049 pp_state |= PP_STATE_CG;
6051 if (state == AMD_CG_STATE_UNGATE)
6054 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6058 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6059 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6062 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6063 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6064 pp_support_state = PP_STATE_SUPPORT_LS;
6065 pp_state = PP_STATE_LS;
6068 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6069 pp_support_state |= PP_STATE_SUPPORT_CG;
6070 pp_state |= PP_STATE_CG;
6073 if (state == AMD_CG_STATE_UNGATE)
6076 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6080 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6081 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6087 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6088 enum amd_clockgating_state state)
6091 uint32_t msg_id, pp_state = 0;
6092 uint32_t pp_support_state = 0;
6094 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6095 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6096 pp_support_state = PP_STATE_SUPPORT_LS;
6097 pp_state = PP_STATE_LS;
6099 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6100 pp_support_state |= PP_STATE_SUPPORT_CG;
6101 pp_state |= PP_STATE_CG;
6103 if (state == AMD_CG_STATE_UNGATE)
6106 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6110 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6111 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6114 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6115 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6116 pp_support_state = PP_STATE_SUPPORT_LS;
6117 pp_state = PP_STATE_LS;
6119 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6120 pp_support_state |= PP_STATE_SUPPORT_CG;
6121 pp_state |= PP_STATE_CG;
6123 if (state == AMD_CG_STATE_UNGATE)
6126 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6130 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6131 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6134 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6135 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6136 pp_support_state = PP_STATE_SUPPORT_LS;
6137 pp_state = PP_STATE_LS;
6140 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6141 pp_support_state |= PP_STATE_SUPPORT_CG;
6142 pp_state |= PP_STATE_CG;
6145 if (state == AMD_CG_STATE_UNGATE)
6148 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6152 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6153 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6156 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6157 pp_support_state = PP_STATE_SUPPORT_LS;
6159 if (state == AMD_CG_STATE_UNGATE)
6162 pp_state = PP_STATE_LS;
6164 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6168 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6169 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6172 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6173 pp_support_state = PP_STATE_SUPPORT_LS;
6175 if (state == AMD_CG_STATE_UNGATE)
6178 pp_state = PP_STATE_LS;
6179 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6183 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6184 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6190 static int gfx_v8_0_set_clockgating_state(void *handle,
6191 enum amd_clockgating_state state)
6193 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6195 if (amdgpu_sriov_vf(adev))
6198 switch (adev->asic_type) {
6202 gfx_v8_0_update_gfx_clock_gating(adev,
6203 state == AMD_CG_STATE_GATE);
6206 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6208 case CHIP_POLARIS10:
6209 case CHIP_POLARIS11:
6210 case CHIP_POLARIS12:
6212 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6220 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6222 return ring->adev->wb.wb[ring->rptr_offs];
6225 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6227 struct amdgpu_device *adev = ring->adev;
6229 if (ring->use_doorbell)
6230 /* XXX check if swapping is necessary on BE */
6231 return ring->adev->wb.wb[ring->wptr_offs];
6233 return RREG32(mmCP_RB0_WPTR);
6236 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6238 struct amdgpu_device *adev = ring->adev;
6240 if (ring->use_doorbell) {
6241 /* XXX check if swapping is necessary on BE */
6242 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6243 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6245 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6246 (void)RREG32(mmCP_RB0_WPTR);
6250 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6252 u32 ref_and_mask, reg_mem_engine;
6254 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6255 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6258 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6261 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6268 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6269 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6272 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6273 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6274 WAIT_REG_MEM_FUNCTION(3) | /* == */
6276 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6277 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6278 amdgpu_ring_write(ring, ref_and_mask);
6279 amdgpu_ring_write(ring, ref_and_mask);
6280 amdgpu_ring_write(ring, 0x20); /* poll interval */
6283 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6285 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6286 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6289 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6290 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6294 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6295 struct amdgpu_ib *ib,
6296 unsigned vmid, bool ctx_switch)
6298 u32 header, control = 0;
6300 if (ib->flags & AMDGPU_IB_FLAG_CE)
6301 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6303 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6305 control |= ib->length_dw | (vmid << 24);
6307 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6308 control |= INDIRECT_BUFFER_PRE_ENB(1);
6310 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6311 gfx_v8_0_ring_emit_de_meta(ring);
6314 amdgpu_ring_write(ring, header);
6315 amdgpu_ring_write(ring,
6319 (ib->gpu_addr & 0xFFFFFFFC));
6320 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6321 amdgpu_ring_write(ring, control);
6324 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6325 struct amdgpu_ib *ib,
6326 unsigned vmid, bool ctx_switch)
6328 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6330 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6331 amdgpu_ring_write(ring,
6335 (ib->gpu_addr & 0xFFFFFFFC));
6336 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6337 amdgpu_ring_write(ring, control);
6340 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6341 u64 seq, unsigned flags)
6343 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6344 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6346 /* EVENT_WRITE_EOP - flush caches, send int */
6347 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6348 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6350 EOP_TC_WB_ACTION_EN |
6351 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6353 amdgpu_ring_write(ring, addr & 0xfffffffc);
6354 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6355 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6356 amdgpu_ring_write(ring, lower_32_bits(seq));
6357 amdgpu_ring_write(ring, upper_32_bits(seq));
6361 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6363 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6364 uint32_t seq = ring->fence_drv.sync_seq;
6365 uint64_t addr = ring->fence_drv.gpu_addr;
6367 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6368 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6369 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6370 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6371 amdgpu_ring_write(ring, addr & 0xfffffffc);
6372 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6373 amdgpu_ring_write(ring, seq);
6374 amdgpu_ring_write(ring, 0xffffffff);
6375 amdgpu_ring_write(ring, 4); /* poll interval */
6378 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6379 unsigned vmid, uint64_t pd_addr)
6381 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6383 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6385 /* wait for the invalidate to complete */
6386 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6387 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6388 WAIT_REG_MEM_FUNCTION(0) | /* always */
6389 WAIT_REG_MEM_ENGINE(0))); /* me */
6390 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6391 amdgpu_ring_write(ring, 0);
6392 amdgpu_ring_write(ring, 0); /* ref */
6393 amdgpu_ring_write(ring, 0); /* mask */
6394 amdgpu_ring_write(ring, 0x20); /* poll interval */
6396 /* compute doesn't have PFP */
6398 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6399 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6400 amdgpu_ring_write(ring, 0x0);
6404 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6406 return ring->adev->wb.wb[ring->wptr_offs];
6409 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6411 struct amdgpu_device *adev = ring->adev;
6413 /* XXX check if swapping is necessary on BE */
6414 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6415 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6418 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6421 struct amdgpu_device *adev = ring->adev;
6422 int pipe_num, tmp, reg;
6423 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6425 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6427 /* first me only has 2 entries, GFX and HP3D */
6431 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6433 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6437 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6438 struct amdgpu_ring *ring,
6443 struct amdgpu_ring *iring;
6445 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6446 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6448 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6450 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6452 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6453 /* Clear all reservations - everyone reacquires all resources */
6454 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6455 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6458 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6459 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6462 /* Lower all pipes without a current reservation */
6463 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6464 iring = &adev->gfx.gfx_ring[i];
6465 pipe = amdgpu_gfx_queue_to_bit(adev,
6469 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6470 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6473 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6474 iring = &adev->gfx.compute_ring[i];
6475 pipe = amdgpu_gfx_queue_to_bit(adev,
6479 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6480 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6484 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6487 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6488 struct amdgpu_ring *ring,
6491 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6492 uint32_t queue_priority = acquire ? 0xf : 0x0;
6494 mutex_lock(&adev->srbm_mutex);
6495 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6497 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6498 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6500 vi_srbm_select(adev, 0, 0, 0, 0);
6501 mutex_unlock(&adev->srbm_mutex);
6503 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6504 enum drm_sched_priority priority)
6506 struct amdgpu_device *adev = ring->adev;
6507 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6509 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6512 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6513 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6516 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6520 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6521 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6523 /* RELEASE_MEM - flush caches, send int */
6524 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6525 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6527 EOP_TC_WB_ACTION_EN |
6528 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6530 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6531 amdgpu_ring_write(ring, addr & 0xfffffffc);
6532 amdgpu_ring_write(ring, upper_32_bits(addr));
6533 amdgpu_ring_write(ring, lower_32_bits(seq));
6534 amdgpu_ring_write(ring, upper_32_bits(seq));
6537 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6538 u64 seq, unsigned int flags)
6540 /* we only allocate 32bit for each seq wb address */
6541 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6543 /* write fence seq to the "addr" */
6544 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6545 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6546 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6547 amdgpu_ring_write(ring, lower_32_bits(addr));
6548 amdgpu_ring_write(ring, upper_32_bits(addr));
6549 amdgpu_ring_write(ring, lower_32_bits(seq));
6551 if (flags & AMDGPU_FENCE_FLAG_INT) {
6552 /* set register to trigger INT */
6553 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6554 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6555 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6556 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6557 amdgpu_ring_write(ring, 0);
6558 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6562 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6564 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6565 amdgpu_ring_write(ring, 0);
6568 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6572 if (amdgpu_sriov_vf(ring->adev))
6573 gfx_v8_0_ring_emit_ce_meta(ring);
6575 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6576 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6577 gfx_v8_0_ring_emit_vgt_flush(ring);
6578 /* set load_global_config & load_global_uconfig */
6580 /* set load_cs_sh_regs */
6582 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6585 /* set load_ce_ram if preamble presented */
6586 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6589 /* still load_ce_ram if this is the first time preamble presented
6590 * although there is no context switch happens.
6592 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6596 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6597 amdgpu_ring_write(ring, dw2);
6598 amdgpu_ring_write(ring, 0);
6601 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6605 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6606 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6607 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6608 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6609 ret = ring->wptr & ring->buf_mask;
6610 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6614 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6618 BUG_ON(offset > ring->buf_mask);
6619 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6621 cur = (ring->wptr & ring->buf_mask) - 1;
6622 if (likely(cur > offset))
6623 ring->ring[offset] = cur - offset;
6625 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6628 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6630 struct amdgpu_device *adev = ring->adev;
6632 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6633 amdgpu_ring_write(ring, 0 | /* src: register*/
6634 (5 << 8) | /* dst: memory */
6635 (1 << 20)); /* write confirm */
6636 amdgpu_ring_write(ring, reg);
6637 amdgpu_ring_write(ring, 0);
6638 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6639 adev->virt.reg_val_offs * 4));
6640 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6641 adev->virt.reg_val_offs * 4));
6644 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6649 switch (ring->funcs->type) {
6650 case AMDGPU_RING_TYPE_GFX:
6651 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6653 case AMDGPU_RING_TYPE_KIQ:
6654 cmd = 1 << 16; /* no inc addr */
6661 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6662 amdgpu_ring_write(ring, cmd);
6663 amdgpu_ring_write(ring, reg);
6664 amdgpu_ring_write(ring, 0);
6665 amdgpu_ring_write(ring, val);
6668 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6670 struct amdgpu_device *adev = ring->adev;
6673 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6674 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6675 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6676 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6677 WREG32(mmSQ_CMD, value);
6680 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6681 enum amdgpu_interrupt_state state)
6683 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6684 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6687 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6689 enum amdgpu_interrupt_state state)
6691 u32 mec_int_cntl, mec_int_cntl_reg;
6694 * amdgpu controls only the first MEC. That's why this function only
6695 * handles the setting of interrupts for this specific MEC. All other
6696 * pipes' interrupts are set by amdkfd.
6702 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6705 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6708 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6711 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6714 DRM_DEBUG("invalid pipe %d\n", pipe);
6718 DRM_DEBUG("invalid me %d\n", me);
6723 case AMDGPU_IRQ_STATE_DISABLE:
6724 mec_int_cntl = RREG32(mec_int_cntl_reg);
6725 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6726 WREG32(mec_int_cntl_reg, mec_int_cntl);
6728 case AMDGPU_IRQ_STATE_ENABLE:
6729 mec_int_cntl = RREG32(mec_int_cntl_reg);
6730 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6731 WREG32(mec_int_cntl_reg, mec_int_cntl);
6738 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6739 struct amdgpu_irq_src *source,
6741 enum amdgpu_interrupt_state state)
6743 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6744 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6749 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6750 struct amdgpu_irq_src *source,
6752 enum amdgpu_interrupt_state state)
6754 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6755 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6760 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6761 struct amdgpu_irq_src *src,
6763 enum amdgpu_interrupt_state state)
6766 case AMDGPU_CP_IRQ_GFX_EOP:
6767 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6769 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6770 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6772 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6773 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6775 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6776 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6778 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6779 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6781 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6782 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6784 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6785 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6787 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6788 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6790 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6791 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6799 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6800 struct amdgpu_irq_src *source,
6802 enum amdgpu_interrupt_state state)
6807 case AMDGPU_IRQ_STATE_DISABLE:
6811 case AMDGPU_IRQ_STATE_ENABLE:
6819 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6820 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6821 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6822 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6823 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6824 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6826 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6828 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6830 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6832 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6834 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6836 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6838 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6844 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6845 struct amdgpu_irq_src *source,
6847 enum amdgpu_interrupt_state state)
6852 case AMDGPU_IRQ_STATE_DISABLE:
6856 case AMDGPU_IRQ_STATE_ENABLE:
6864 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6870 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6871 struct amdgpu_irq_src *source,
6872 struct amdgpu_iv_entry *entry)
6875 u8 me_id, pipe_id, queue_id;
6876 struct amdgpu_ring *ring;
6878 DRM_DEBUG("IH: CP EOP\n");
6879 me_id = (entry->ring_id & 0x0c) >> 2;
6880 pipe_id = (entry->ring_id & 0x03) >> 0;
6881 queue_id = (entry->ring_id & 0x70) >> 4;
6885 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6889 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6890 ring = &adev->gfx.compute_ring[i];
6891 /* Per-queue interrupt is supported for MEC starting from VI.
6892 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6894 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6895 amdgpu_fence_process(ring);
6902 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6903 struct amdgpu_irq_src *source,
6904 struct amdgpu_iv_entry *entry)
6906 DRM_ERROR("Illegal register access in command stream\n");
6907 schedule_work(&adev->reset_work);
6911 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6912 struct amdgpu_irq_src *source,
6913 struct amdgpu_iv_entry *entry)
6915 DRM_ERROR("Illegal instruction in command stream\n");
6916 schedule_work(&adev->reset_work);
6920 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6921 struct amdgpu_irq_src *source,
6922 struct amdgpu_iv_entry *entry)
6924 DRM_ERROR("CP EDC/ECC error detected.");
6928 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6930 u32 enc, se_id, sh_id, cu_id;
6932 int sq_edc_source = -1;
6934 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6935 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6939 DRM_INFO("SQ general purpose intr detected:"
6940 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6941 "host_cmd_overflow %d, cmd_timestamp %d,"
6942 "reg_timestamp %d, thread_trace_buff_full %d,"
6943 "wlt %d, thread_trace %d.\n",
6945 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6946 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6947 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6948 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6949 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6950 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6951 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6952 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6958 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6959 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6962 * This function can be called either directly from ISR
6963 * or from BH in which case we can access SQ_EDC_INFO
6967 mutex_lock(&adev->grbm_idx_mutex);
6968 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6970 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6972 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6973 mutex_unlock(&adev->grbm_idx_mutex);
6977 sprintf(type, "instruction intr");
6979 sprintf(type, "EDC/ECC error");
6983 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6984 "trap %s, sq_ed_info.source %s.\n",
6985 type, se_id, sh_id, cu_id,
6986 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6987 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6988 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6989 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6990 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6994 DRM_ERROR("SQ invalid encoding type\n.");
6998 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
7001 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
7002 struct sq_work *sq_work = container_of(work, struct sq_work, work);
7004 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
7007 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
7008 struct amdgpu_irq_src *source,
7009 struct amdgpu_iv_entry *entry)
7011 unsigned ih_data = entry->src_data[0];
7014 * Try to submit work so SQ_EDC_INFO can be accessed from
7015 * BH. If previous work submission hasn't finished yet
7016 * just print whatever info is possible directly from the ISR.
7018 if (work_pending(&adev->gfx.sq_work.work)) {
7019 gfx_v8_0_parse_sq_irq(adev, ih_data);
7021 adev->gfx.sq_work.ih_data = ih_data;
7022 schedule_work(&adev->gfx.sq_work.work);
7028 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7029 struct amdgpu_irq_src *src,
7031 enum amdgpu_interrupt_state state)
7033 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7036 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7037 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7038 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7040 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7042 GENERIC2_INT_ENABLE,
7043 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7045 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7047 GENERIC2_INT_ENABLE,
7048 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7051 BUG(); /* kiq only support GENERIC2_INT now */
7057 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7058 struct amdgpu_irq_src *source,
7059 struct amdgpu_iv_entry *entry)
7061 u8 me_id, pipe_id, queue_id;
7062 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7064 me_id = (entry->ring_id & 0x0c) >> 2;
7065 pipe_id = (entry->ring_id & 0x03) >> 0;
7066 queue_id = (entry->ring_id & 0x70) >> 4;
7067 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7068 me_id, pipe_id, queue_id);
7070 amdgpu_fence_process(ring);
7074 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7076 .early_init = gfx_v8_0_early_init,
7077 .late_init = gfx_v8_0_late_init,
7078 .sw_init = gfx_v8_0_sw_init,
7079 .sw_fini = gfx_v8_0_sw_fini,
7080 .hw_init = gfx_v8_0_hw_init,
7081 .hw_fini = gfx_v8_0_hw_fini,
7082 .suspend = gfx_v8_0_suspend,
7083 .resume = gfx_v8_0_resume,
7084 .is_idle = gfx_v8_0_is_idle,
7085 .wait_for_idle = gfx_v8_0_wait_for_idle,
7086 .check_soft_reset = gfx_v8_0_check_soft_reset,
7087 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7088 .soft_reset = gfx_v8_0_soft_reset,
7089 .post_soft_reset = gfx_v8_0_post_soft_reset,
7090 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7091 .set_powergating_state = gfx_v8_0_set_powergating_state,
7092 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7095 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7096 .type = AMDGPU_RING_TYPE_GFX,
7098 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7099 .support_64bit_ptrs = false,
7100 .get_rptr = gfx_v8_0_ring_get_rptr,
7101 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7102 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7103 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7105 7 + /* PIPELINE_SYNC */
7106 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7107 8 + /* FENCE for VM_FLUSH */
7108 20 + /* GDS switch */
7109 4 + /* double SWITCH_BUFFER,
7110 the first COND_EXEC jump to the place just
7111 prior to this double SWITCH_BUFFER */
7119 8 + 8 + /* FENCE x2 */
7120 2, /* SWITCH_BUFFER */
7121 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7122 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7123 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7124 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7125 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7126 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7127 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7128 .test_ring = gfx_v8_0_ring_test_ring,
7129 .test_ib = gfx_v8_0_ring_test_ib,
7130 .insert_nop = amdgpu_ring_insert_nop,
7131 .pad_ib = amdgpu_ring_generic_pad_ib,
7132 .emit_switch_buffer = gfx_v8_ring_emit_sb,
7133 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7134 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7135 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7136 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7137 .soft_recovery = gfx_v8_0_ring_soft_recovery,
7140 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7141 .type = AMDGPU_RING_TYPE_COMPUTE,
7143 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7144 .support_64bit_ptrs = false,
7145 .get_rptr = gfx_v8_0_ring_get_rptr,
7146 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7147 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7149 20 + /* gfx_v8_0_ring_emit_gds_switch */
7150 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7151 5 + /* hdp_invalidate */
7152 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7153 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7154 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7155 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7156 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7157 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7158 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7159 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7160 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7161 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7162 .test_ring = gfx_v8_0_ring_test_ring,
7163 .test_ib = gfx_v8_0_ring_test_ib,
7164 .insert_nop = amdgpu_ring_insert_nop,
7165 .pad_ib = amdgpu_ring_generic_pad_ib,
7166 .set_priority = gfx_v8_0_ring_set_priority_compute,
7167 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7170 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7171 .type = AMDGPU_RING_TYPE_KIQ,
7173 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7174 .support_64bit_ptrs = false,
7175 .get_rptr = gfx_v8_0_ring_get_rptr,
7176 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7177 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7179 20 + /* gfx_v8_0_ring_emit_gds_switch */
7180 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7181 5 + /* hdp_invalidate */
7182 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7183 17 + /* gfx_v8_0_ring_emit_vm_flush */
7184 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7185 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7186 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7187 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7188 .test_ring = gfx_v8_0_ring_test_ring,
7189 .test_ib = gfx_v8_0_ring_test_ib,
7190 .insert_nop = amdgpu_ring_insert_nop,
7191 .pad_ib = amdgpu_ring_generic_pad_ib,
7192 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7193 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7196 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7200 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7202 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7203 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7205 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7206 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7209 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7210 .set = gfx_v8_0_set_eop_interrupt_state,
7211 .process = gfx_v8_0_eop_irq,
7214 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7215 .set = gfx_v8_0_set_priv_reg_fault_state,
7216 .process = gfx_v8_0_priv_reg_irq,
7219 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7220 .set = gfx_v8_0_set_priv_inst_fault_state,
7221 .process = gfx_v8_0_priv_inst_irq,
7224 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7225 .set = gfx_v8_0_kiq_set_interrupt_state,
7226 .process = gfx_v8_0_kiq_irq,
7229 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7230 .set = gfx_v8_0_set_cp_ecc_int_state,
7231 .process = gfx_v8_0_cp_ecc_error_irq,
7234 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7235 .set = gfx_v8_0_set_sq_int_state,
7236 .process = gfx_v8_0_sq_irq,
7239 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7241 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7242 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7244 adev->gfx.priv_reg_irq.num_types = 1;
7245 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7247 adev->gfx.priv_inst_irq.num_types = 1;
7248 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7250 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7251 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7253 adev->gfx.cp_ecc_error_irq.num_types = 1;
7254 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7256 adev->gfx.sq_irq.num_types = 1;
7257 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7260 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7262 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7265 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7267 /* init asci gds info */
7268 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7269 adev->gds.gws.total_size = 64;
7270 adev->gds.oa.total_size = 16;
7272 if (adev->gds.mem.total_size == 64 * 1024) {
7273 adev->gds.mem.gfx_partition_size = 4096;
7274 adev->gds.mem.cs_partition_size = 4096;
7276 adev->gds.gws.gfx_partition_size = 4;
7277 adev->gds.gws.cs_partition_size = 4;
7279 adev->gds.oa.gfx_partition_size = 4;
7280 adev->gds.oa.cs_partition_size = 1;
7282 adev->gds.mem.gfx_partition_size = 1024;
7283 adev->gds.mem.cs_partition_size = 1024;
7285 adev->gds.gws.gfx_partition_size = 16;
7286 adev->gds.gws.cs_partition_size = 16;
7288 adev->gds.oa.gfx_partition_size = 4;
7289 adev->gds.oa.cs_partition_size = 4;
7293 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7301 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7302 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7304 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7307 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7311 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7312 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7314 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7316 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7319 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7321 int i, j, k, counter, active_cu_number = 0;
7322 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7323 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7324 unsigned disable_masks[4 * 2];
7327 memset(cu_info, 0, sizeof(*cu_info));
7329 if (adev->flags & AMD_IS_APU)
7332 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7334 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7336 mutex_lock(&adev->grbm_idx_mutex);
7337 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7338 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7342 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7344 gfx_v8_0_set_user_cu_inactive_bitmap(
7345 adev, disable_masks[i * 2 + j]);
7346 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7347 cu_info->bitmap[i][j] = bitmap;
7349 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7350 if (bitmap & mask) {
7351 if (counter < ao_cu_num)
7357 active_cu_number += counter;
7359 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7360 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7363 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7364 mutex_unlock(&adev->grbm_idx_mutex);
7366 cu_info->number = active_cu_number;
7367 cu_info->ao_cu_mask = ao_cu_mask;
7368 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7369 cu_info->max_waves_per_simd = 10;
7370 cu_info->max_scratch_slots_per_cu = 32;
7371 cu_info->wave_front_size = 64;
7372 cu_info->lds_size = 64;
7375 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7377 .type = AMD_IP_BLOCK_TYPE_GFX,
7381 .funcs = &gfx_v8_0_ip_funcs,
7384 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7386 .type = AMD_IP_BLOCK_TYPE_GFX,
7390 .funcs = &gfx_v8_0_ip_funcs,
7393 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7395 uint64_t ce_payload_addr;
7398 struct vi_ce_ib_state regular;
7399 struct vi_ce_ib_state_chained_ib chained;
7402 if (ring->adev->virt.chained_ib_support) {
7403 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7404 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7405 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7407 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7408 offsetof(struct vi_gfx_meta_data, ce_payload);
7409 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7412 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7413 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7414 WRITE_DATA_DST_SEL(8) |
7416 WRITE_DATA_CACHE_POLICY(0));
7417 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7418 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7419 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7422 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7424 uint64_t de_payload_addr, gds_addr, csa_addr;
7427 struct vi_de_ib_state regular;
7428 struct vi_de_ib_state_chained_ib chained;
7431 csa_addr = amdgpu_csa_vaddr(ring->adev);
7432 gds_addr = csa_addr + 4096;
7433 if (ring->adev->virt.chained_ib_support) {
7434 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7435 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7436 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7437 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7439 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7440 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7441 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7442 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7445 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7446 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7447 WRITE_DATA_DST_SEL(8) |
7449 WRITE_DATA_CACHE_POLICY(0));
7450 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7451 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7452 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);