2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
29 #include "vi_structs.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #define GFX8_NUM_GFX_RINGS 1
55 #define GFX8_MEC_HPD_SIZE 2048
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
80 #define SET_BPM_SERDES_CMD 1
81 #define CLE_BPM_SERDES_CMD 0
83 /* BPM Register Address*/
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
93 #define RLC_FormatDirectRegListLength 14
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
128 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
164 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
165 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
171 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
173 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
174 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
175 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
176 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
177 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
178 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
179 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
180 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
181 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
182 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
183 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
184 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
185 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
186 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
187 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
188 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
191 static const u32 golden_settings_tonga_a11[] =
193 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
194 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
195 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
196 mmGB_GPU_ID, 0x0000000f, 0x00000000,
197 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
198 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
199 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
200 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
201 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
202 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
203 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
204 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
205 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
206 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
207 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
208 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
211 static const u32 tonga_golden_common_all[] =
213 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
214 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
215 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
216 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
217 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
218 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
219 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
220 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
223 static const u32 tonga_mgcg_cgcg_init[] =
225 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
226 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
228 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
229 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
232 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
234 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
236 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
237 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
238 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
239 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
240 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
243 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
244 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
245 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
246 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
247 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
248 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
249 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
250 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
251 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
252 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
253 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
254 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
255 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
257 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
258 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
259 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
260 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
261 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
264 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
269 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
274 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
279 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
284 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
289 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
294 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
297 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
298 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
299 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
302 static const u32 golden_settings_vegam_a11[] =
304 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
305 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 mmSQ_CONFIG, 0x07f80000, 0x01180000,
315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 static const u32 vegam_golden_common_all[] =
325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
327 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
328 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
329 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
330 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
333 static const u32 golden_settings_polaris11_a11[] =
335 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
336 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
337 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
338 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
339 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
340 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
341 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
342 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
343 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
344 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
345 mmSQ_CONFIG, 0x07f80000, 0x01180000,
346 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
347 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
348 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
349 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
350 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
351 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
354 static const u32 polaris11_golden_common_all[] =
356 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
358 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
359 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
360 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
361 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
364 static const u32 golden_settings_polaris10_a11[] =
366 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
367 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
368 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
369 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
370 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
371 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
372 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
373 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
374 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
375 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
376 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
377 mmSQ_CONFIG, 0x07f80000, 0x07180000,
378 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
379 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
380 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
381 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
382 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
385 static const u32 polaris10_golden_common_all[] =
387 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
388 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
389 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
390 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
391 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
392 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
393 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
394 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
397 static const u32 fiji_golden_common_all[] =
399 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
400 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
401 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
402 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
403 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
404 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
405 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
406 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
407 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
408 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
411 static const u32 golden_settings_fiji_a10[] =
413 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
414 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
416 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
417 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
418 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
419 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
420 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
421 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
422 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
423 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
426 static const u32 fiji_mgcg_cgcg_init[] =
428 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
435 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
460 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
461 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
462 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
465 static const u32 golden_settings_iceland_a11[] =
467 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
468 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
469 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
470 mmGB_GPU_ID, 0x0000000f, 0x00000000,
471 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
472 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
473 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
474 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
475 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
476 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
477 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
478 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
479 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
480 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
481 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
482 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
485 static const u32 iceland_golden_common_all[] =
487 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
488 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
489 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
490 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
491 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
492 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
493 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
494 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
497 static const u32 iceland_mgcg_cgcg_init[] =
499 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
500 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
501 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
502 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
503 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
504 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
505 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
506 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
507 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
508 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
509 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
510 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
511 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
512 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
514 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
516 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
517 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
518 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
519 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
520 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
521 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
522 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
523 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
524 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
525 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
526 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
528 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
533 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
538 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
543 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
548 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
553 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
558 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
565 static const u32 cz_golden_settings_a11[] =
567 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
568 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569 mmGB_GPU_ID, 0x0000000f, 0x00000000,
570 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
571 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
572 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
573 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
574 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
575 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
576 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
577 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
578 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
581 static const u32 cz_golden_common_all[] =
583 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
584 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
585 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
586 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
587 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
588 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
589 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
590 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
593 static const u32 cz_mgcg_cgcg_init[] =
595 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
596 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
597 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
598 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
599 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
600 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
601 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
602 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
603 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
604 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
606 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
608 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
609 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
610 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
612 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
613 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
614 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
615 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
616 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
617 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
618 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
619 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
620 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
621 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
622 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
623 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
624 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
625 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
626 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
627 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
628 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
629 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
630 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
631 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
634 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
639 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
644 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
649 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
654 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
659 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
664 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
667 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
668 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
669 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
672 static const u32 stoney_golden_settings_a11[] =
674 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
675 mmGB_GPU_ID, 0x0000000f, 0x00000000,
676 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
677 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
678 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
679 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
680 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
681 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
682 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
683 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
686 static const u32 stoney_golden_common_all[] =
688 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
689 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
690 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
691 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
692 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
693 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
694 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
695 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
698 static const u32 stoney_mgcg_cgcg_init[] =
700 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
701 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
702 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
703 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
704 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
707 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
708 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
709 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
710 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
711 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
712 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
713 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
714 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
716 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
718 switch (adev->asic_type) {
720 amdgpu_device_program_register_sequence(adev,
721 iceland_mgcg_cgcg_init,
722 ARRAY_SIZE(iceland_mgcg_cgcg_init));
723 amdgpu_device_program_register_sequence(adev,
724 golden_settings_iceland_a11,
725 ARRAY_SIZE(golden_settings_iceland_a11));
726 amdgpu_device_program_register_sequence(adev,
727 iceland_golden_common_all,
728 ARRAY_SIZE(iceland_golden_common_all));
731 amdgpu_device_program_register_sequence(adev,
733 ARRAY_SIZE(fiji_mgcg_cgcg_init));
734 amdgpu_device_program_register_sequence(adev,
735 golden_settings_fiji_a10,
736 ARRAY_SIZE(golden_settings_fiji_a10));
737 amdgpu_device_program_register_sequence(adev,
738 fiji_golden_common_all,
739 ARRAY_SIZE(fiji_golden_common_all));
743 amdgpu_device_program_register_sequence(adev,
744 tonga_mgcg_cgcg_init,
745 ARRAY_SIZE(tonga_mgcg_cgcg_init));
746 amdgpu_device_program_register_sequence(adev,
747 golden_settings_tonga_a11,
748 ARRAY_SIZE(golden_settings_tonga_a11));
749 amdgpu_device_program_register_sequence(adev,
750 tonga_golden_common_all,
751 ARRAY_SIZE(tonga_golden_common_all));
754 amdgpu_device_program_register_sequence(adev,
755 golden_settings_vegam_a11,
756 ARRAY_SIZE(golden_settings_vegam_a11));
757 amdgpu_device_program_register_sequence(adev,
758 vegam_golden_common_all,
759 ARRAY_SIZE(vegam_golden_common_all));
763 amdgpu_device_program_register_sequence(adev,
764 golden_settings_polaris11_a11,
765 ARRAY_SIZE(golden_settings_polaris11_a11));
766 amdgpu_device_program_register_sequence(adev,
767 polaris11_golden_common_all,
768 ARRAY_SIZE(polaris11_golden_common_all));
771 amdgpu_device_program_register_sequence(adev,
772 golden_settings_polaris10_a11,
773 ARRAY_SIZE(golden_settings_polaris10_a11));
774 amdgpu_device_program_register_sequence(adev,
775 polaris10_golden_common_all,
776 ARRAY_SIZE(polaris10_golden_common_all));
777 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
778 if (adev->pdev->revision == 0xc7 &&
779 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
780 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
781 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
782 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
783 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
787 amdgpu_device_program_register_sequence(adev,
789 ARRAY_SIZE(cz_mgcg_cgcg_init));
790 amdgpu_device_program_register_sequence(adev,
791 cz_golden_settings_a11,
792 ARRAY_SIZE(cz_golden_settings_a11));
793 amdgpu_device_program_register_sequence(adev,
794 cz_golden_common_all,
795 ARRAY_SIZE(cz_golden_common_all));
798 amdgpu_device_program_register_sequence(adev,
799 stoney_mgcg_cgcg_init,
800 ARRAY_SIZE(stoney_mgcg_cgcg_init));
801 amdgpu_device_program_register_sequence(adev,
802 stoney_golden_settings_a11,
803 ARRAY_SIZE(stoney_golden_settings_a11));
804 amdgpu_device_program_register_sequence(adev,
805 stoney_golden_common_all,
806 ARRAY_SIZE(stoney_golden_common_all));
813 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
815 adev->gfx.scratch.num_reg = 8;
816 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
817 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
820 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
822 struct amdgpu_device *adev = ring->adev;
828 r = amdgpu_gfx_scratch_get(adev, &scratch);
830 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
833 WREG32(scratch, 0xCAFEDEAD);
834 r = amdgpu_ring_alloc(ring, 3);
836 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
838 amdgpu_gfx_scratch_free(adev, scratch);
841 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
842 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
843 amdgpu_ring_write(ring, 0xDEADBEEF);
844 amdgpu_ring_commit(ring);
846 for (i = 0; i < adev->usec_timeout; i++) {
847 tmp = RREG32(scratch);
848 if (tmp == 0xDEADBEEF)
852 if (i < adev->usec_timeout) {
853 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
856 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
857 ring->idx, scratch, tmp);
860 amdgpu_gfx_scratch_free(adev, scratch);
864 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
866 struct amdgpu_device *adev = ring->adev;
868 struct dma_fence *f = NULL;
875 r = amdgpu_device_wb_get(adev, &index);
877 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
881 gpu_addr = adev->wb.gpu_addr + (index * 4);
882 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
883 memset(&ib, 0, sizeof(ib));
884 r = amdgpu_ib_get(adev, NULL, 16, &ib);
886 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
889 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
890 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
891 ib.ptr[2] = lower_32_bits(gpu_addr);
892 ib.ptr[3] = upper_32_bits(gpu_addr);
893 ib.ptr[4] = 0xDEADBEEF;
896 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
900 r = dma_fence_wait_timeout(f, false, timeout);
902 DRM_ERROR("amdgpu: IB test timed out.\n");
906 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
910 tmp = adev->wb.wb[index];
911 if (tmp == 0xDEADBEEF) {
912 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
915 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
920 amdgpu_ib_free(adev, &ib, NULL);
923 amdgpu_device_wb_free(adev, index);
928 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
930 release_firmware(adev->gfx.pfp_fw);
931 adev->gfx.pfp_fw = NULL;
932 release_firmware(adev->gfx.me_fw);
933 adev->gfx.me_fw = NULL;
934 release_firmware(adev->gfx.ce_fw);
935 adev->gfx.ce_fw = NULL;
936 release_firmware(adev->gfx.rlc_fw);
937 adev->gfx.rlc_fw = NULL;
938 release_firmware(adev->gfx.mec_fw);
939 adev->gfx.mec_fw = NULL;
940 if ((adev->asic_type != CHIP_STONEY) &&
941 (adev->asic_type != CHIP_TOPAZ))
942 release_firmware(adev->gfx.mec2_fw);
943 adev->gfx.mec2_fw = NULL;
945 kfree(adev->gfx.rlc.register_list_format);
948 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
950 const char *chip_name;
953 struct amdgpu_firmware_info *info = NULL;
954 const struct common_firmware_header *header = NULL;
955 const struct gfx_firmware_header_v1_0 *cp_hdr;
956 const struct rlc_firmware_header_v2_0 *rlc_hdr;
957 unsigned int *tmp = NULL, i;
961 switch (adev->asic_type) {
969 chip_name = "carrizo";
975 chip_name = "stoney";
978 chip_name = "polaris10";
981 chip_name = "polaris11";
984 chip_name = "polaris12";
993 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
994 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
995 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996 if (err == -ENOENT) {
997 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
998 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1001 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1002 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1006 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1009 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1010 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1011 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1013 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1014 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1015 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016 if (err == -ENOENT) {
1017 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1018 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1021 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1022 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1026 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1029 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1030 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1032 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1034 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1035 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1036 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037 if (err == -ENOENT) {
1038 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1039 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1042 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1043 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1047 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1050 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1051 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1052 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1055 * Support for MCBP/Virtualization in combination with chained IBs is
1056 * formal released on feature version #46
1058 if (adev->gfx.ce_feature_version >= 46 &&
1059 adev->gfx.pfp_feature_version >= 46) {
1060 adev->virt.chained_ib_support = true;
1061 DRM_INFO("Chained IB support enabled!\n");
1063 adev->virt.chained_ib_support = false;
1065 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1066 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1069 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1070 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1071 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1072 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1074 adev->gfx.rlc.save_and_restore_offset =
1075 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1076 adev->gfx.rlc.clear_state_descriptor_offset =
1077 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1078 adev->gfx.rlc.avail_scratch_ram_locations =
1079 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1080 adev->gfx.rlc.reg_restore_list_size =
1081 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1082 adev->gfx.rlc.reg_list_format_start =
1083 le32_to_cpu(rlc_hdr->reg_list_format_start);
1084 adev->gfx.rlc.reg_list_format_separate_start =
1085 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1086 adev->gfx.rlc.starting_offsets_start =
1087 le32_to_cpu(rlc_hdr->starting_offsets_start);
1088 adev->gfx.rlc.reg_list_format_size_bytes =
1089 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1090 adev->gfx.rlc.reg_list_size_bytes =
1091 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1093 adev->gfx.rlc.register_list_format =
1094 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1095 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1097 if (!adev->gfx.rlc.register_list_format) {
1102 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1103 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1104 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1105 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1107 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1109 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1110 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1111 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1112 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1114 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1115 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1116 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117 if (err == -ENOENT) {
1118 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1119 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1122 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1123 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1127 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1130 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1131 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1132 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1134 if ((adev->asic_type != CHIP_STONEY) &&
1135 (adev->asic_type != CHIP_TOPAZ)) {
1136 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1137 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1138 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139 if (err == -ENOENT) {
1140 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1141 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1144 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1145 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1148 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1151 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1152 adev->gfx.mec2_fw->data;
1153 adev->gfx.mec2_fw_version =
1154 le32_to_cpu(cp_hdr->header.ucode_version);
1155 adev->gfx.mec2_feature_version =
1156 le32_to_cpu(cp_hdr->ucode_feature_version);
1159 adev->gfx.mec2_fw = NULL;
1163 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1164 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1165 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1166 info->fw = adev->gfx.pfp_fw;
1167 header = (const struct common_firmware_header *)info->fw->data;
1168 adev->firmware.fw_size +=
1169 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1171 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1172 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1173 info->fw = adev->gfx.me_fw;
1174 header = (const struct common_firmware_header *)info->fw->data;
1175 adev->firmware.fw_size +=
1176 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1178 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1179 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1180 info->fw = adev->gfx.ce_fw;
1181 header = (const struct common_firmware_header *)info->fw->data;
1182 adev->firmware.fw_size +=
1183 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1185 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1186 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1187 info->fw = adev->gfx.rlc_fw;
1188 header = (const struct common_firmware_header *)info->fw->data;
1189 adev->firmware.fw_size +=
1190 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1193 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1194 info->fw = adev->gfx.mec_fw;
1195 header = (const struct common_firmware_header *)info->fw->data;
1196 adev->firmware.fw_size +=
1197 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1199 /* we need account JT in */
1200 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1201 adev->firmware.fw_size +=
1202 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1204 if (amdgpu_sriov_vf(adev)) {
1205 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1206 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1207 info->fw = adev->gfx.mec_fw;
1208 adev->firmware.fw_size +=
1209 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1212 if (adev->gfx.mec2_fw) {
1213 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1214 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1215 info->fw = adev->gfx.mec2_fw;
1216 header = (const struct common_firmware_header *)info->fw->data;
1217 adev->firmware.fw_size +=
1218 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1226 "gfx8: Failed to load firmware \"%s\"\n",
1228 release_firmware(adev->gfx.pfp_fw);
1229 adev->gfx.pfp_fw = NULL;
1230 release_firmware(adev->gfx.me_fw);
1231 adev->gfx.me_fw = NULL;
1232 release_firmware(adev->gfx.ce_fw);
1233 adev->gfx.ce_fw = NULL;
1234 release_firmware(adev->gfx.rlc_fw);
1235 adev->gfx.rlc_fw = NULL;
1236 release_firmware(adev->gfx.mec_fw);
1237 adev->gfx.mec_fw = NULL;
1238 release_firmware(adev->gfx.mec2_fw);
1239 adev->gfx.mec2_fw = NULL;
1244 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1245 volatile u32 *buffer)
1248 const struct cs_section_def *sect = NULL;
1249 const struct cs_extent_def *ext = NULL;
1251 if (adev->gfx.rlc.cs_data == NULL)
1256 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1257 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1259 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1260 buffer[count++] = cpu_to_le32(0x80000000);
1261 buffer[count++] = cpu_to_le32(0x80000000);
1263 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1264 for (ext = sect->section; ext->extent != NULL; ++ext) {
1265 if (sect->id == SECT_CONTEXT) {
1267 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1268 buffer[count++] = cpu_to_le32(ext->reg_index -
1269 PACKET3_SET_CONTEXT_REG_START);
1270 for (i = 0; i < ext->reg_count; i++)
1271 buffer[count++] = cpu_to_le32(ext->extent[i]);
1278 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1279 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1280 PACKET3_SET_CONTEXT_REG_START);
1281 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1282 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1284 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1285 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1287 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1288 buffer[count++] = cpu_to_le32(0);
1291 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1293 const __le32 *fw_data;
1294 volatile u32 *dst_ptr;
1295 int me, i, max_me = 4;
1297 u32 table_offset, table_size;
1299 if (adev->asic_type == CHIP_CARRIZO)
1302 /* write the cp table buffer */
1303 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1304 for (me = 0; me < max_me; me++) {
1306 const struct gfx_firmware_header_v1_0 *hdr =
1307 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1308 fw_data = (const __le32 *)
1309 (adev->gfx.ce_fw->data +
1310 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1311 table_offset = le32_to_cpu(hdr->jt_offset);
1312 table_size = le32_to_cpu(hdr->jt_size);
1313 } else if (me == 1) {
1314 const struct gfx_firmware_header_v1_0 *hdr =
1315 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1316 fw_data = (const __le32 *)
1317 (adev->gfx.pfp_fw->data +
1318 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1319 table_offset = le32_to_cpu(hdr->jt_offset);
1320 table_size = le32_to_cpu(hdr->jt_size);
1321 } else if (me == 2) {
1322 const struct gfx_firmware_header_v1_0 *hdr =
1323 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1324 fw_data = (const __le32 *)
1325 (adev->gfx.me_fw->data +
1326 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1327 table_offset = le32_to_cpu(hdr->jt_offset);
1328 table_size = le32_to_cpu(hdr->jt_size);
1329 } else if (me == 3) {
1330 const struct gfx_firmware_header_v1_0 *hdr =
1331 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1332 fw_data = (const __le32 *)
1333 (adev->gfx.mec_fw->data +
1334 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1335 table_offset = le32_to_cpu(hdr->jt_offset);
1336 table_size = le32_to_cpu(hdr->jt_size);
1337 } else if (me == 4) {
1338 const struct gfx_firmware_header_v1_0 *hdr =
1339 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1340 fw_data = (const __le32 *)
1341 (adev->gfx.mec2_fw->data +
1342 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1343 table_offset = le32_to_cpu(hdr->jt_offset);
1344 table_size = le32_to_cpu(hdr->jt_size);
1347 for (i = 0; i < table_size; i ++) {
1348 dst_ptr[bo_offset + i] =
1349 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1352 bo_offset += table_size;
1356 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1358 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1359 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1362 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1364 volatile u32 *dst_ptr;
1366 const struct cs_section_def *cs_data;
1369 adev->gfx.rlc.cs_data = vi_cs_data;
1371 cs_data = adev->gfx.rlc.cs_data;
1374 /* clear state block */
1375 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1377 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1378 AMDGPU_GEM_DOMAIN_VRAM,
1379 &adev->gfx.rlc.clear_state_obj,
1380 &adev->gfx.rlc.clear_state_gpu_addr,
1381 (void **)&adev->gfx.rlc.cs_ptr);
1383 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1384 gfx_v8_0_rlc_fini(adev);
1388 /* set up the cs buffer */
1389 dst_ptr = adev->gfx.rlc.cs_ptr;
1390 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1391 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1392 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1395 if ((adev->asic_type == CHIP_CARRIZO) ||
1396 (adev->asic_type == CHIP_STONEY)) {
1397 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1398 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1399 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1400 &adev->gfx.rlc.cp_table_obj,
1401 &adev->gfx.rlc.cp_table_gpu_addr,
1402 (void **)&adev->gfx.rlc.cp_table_ptr);
1404 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1408 cz_init_cp_jump_table(adev);
1410 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1411 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1417 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1419 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1422 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1426 size_t mec_hpd_size;
1428 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1430 /* take ownership of the relevant compute queues */
1431 amdgpu_gfx_compute_queue_acquire(adev);
1433 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1435 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1436 AMDGPU_GEM_DOMAIN_GTT,
1437 &adev->gfx.mec.hpd_eop_obj,
1438 &adev->gfx.mec.hpd_eop_gpu_addr,
1441 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1445 memset(hpd, 0, mec_hpd_size);
1447 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1448 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1453 static const u32 vgpr_init_compute_shader[] =
1455 0x7e000209, 0x7e020208,
1456 0x7e040207, 0x7e060206,
1457 0x7e080205, 0x7e0a0204,
1458 0x7e0c0203, 0x7e0e0202,
1459 0x7e100201, 0x7e120200,
1460 0x7e140209, 0x7e160208,
1461 0x7e180207, 0x7e1a0206,
1462 0x7e1c0205, 0x7e1e0204,
1463 0x7e200203, 0x7e220202,
1464 0x7e240201, 0x7e260200,
1465 0x7e280209, 0x7e2a0208,
1466 0x7e2c0207, 0x7e2e0206,
1467 0x7e300205, 0x7e320204,
1468 0x7e340203, 0x7e360202,
1469 0x7e380201, 0x7e3a0200,
1470 0x7e3c0209, 0x7e3e0208,
1471 0x7e400207, 0x7e420206,
1472 0x7e440205, 0x7e460204,
1473 0x7e480203, 0x7e4a0202,
1474 0x7e4c0201, 0x7e4e0200,
1475 0x7e500209, 0x7e520208,
1476 0x7e540207, 0x7e560206,
1477 0x7e580205, 0x7e5a0204,
1478 0x7e5c0203, 0x7e5e0202,
1479 0x7e600201, 0x7e620200,
1480 0x7e640209, 0x7e660208,
1481 0x7e680207, 0x7e6a0206,
1482 0x7e6c0205, 0x7e6e0204,
1483 0x7e700203, 0x7e720202,
1484 0x7e740201, 0x7e760200,
1485 0x7e780209, 0x7e7a0208,
1486 0x7e7c0207, 0x7e7e0206,
1487 0xbf8a0000, 0xbf810000,
1490 static const u32 sgpr_init_compute_shader[] =
1492 0xbe8a0100, 0xbe8c0102,
1493 0xbe8e0104, 0xbe900106,
1494 0xbe920108, 0xbe940100,
1495 0xbe960102, 0xbe980104,
1496 0xbe9a0106, 0xbe9c0108,
1497 0xbe9e0100, 0xbea00102,
1498 0xbea20104, 0xbea40106,
1499 0xbea60108, 0xbea80100,
1500 0xbeaa0102, 0xbeac0104,
1501 0xbeae0106, 0xbeb00108,
1502 0xbeb20100, 0xbeb40102,
1503 0xbeb60104, 0xbeb80106,
1504 0xbeba0108, 0xbebc0100,
1505 0xbebe0102, 0xbec00104,
1506 0xbec20106, 0xbec40108,
1507 0xbec60100, 0xbec80102,
1508 0xbee60004, 0xbee70005,
1509 0xbeea0006, 0xbeeb0007,
1510 0xbee80008, 0xbee90009,
1511 0xbefc0000, 0xbf8a0000,
1512 0xbf810000, 0x00000000,
1515 static const u32 vgpr_init_regs[] =
1517 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1518 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1519 mmCOMPUTE_NUM_THREAD_X, 256*4,
1520 mmCOMPUTE_NUM_THREAD_Y, 1,
1521 mmCOMPUTE_NUM_THREAD_Z, 1,
1522 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1523 mmCOMPUTE_PGM_RSRC2, 20,
1524 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1525 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1526 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1527 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1528 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1529 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1530 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1531 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1532 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1533 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1536 static const u32 sgpr1_init_regs[] =
1538 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1539 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1540 mmCOMPUTE_NUM_THREAD_X, 256*5,
1541 mmCOMPUTE_NUM_THREAD_Y, 1,
1542 mmCOMPUTE_NUM_THREAD_Z, 1,
1543 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1544 mmCOMPUTE_PGM_RSRC2, 20,
1545 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1546 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1547 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1548 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1549 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1550 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1551 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1552 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1553 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1554 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1557 static const u32 sgpr2_init_regs[] =
1559 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1560 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1561 mmCOMPUTE_NUM_THREAD_X, 256*5,
1562 mmCOMPUTE_NUM_THREAD_Y, 1,
1563 mmCOMPUTE_NUM_THREAD_Z, 1,
1564 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1565 mmCOMPUTE_PGM_RSRC2, 20,
1566 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1567 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1568 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1569 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1570 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1571 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1572 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1573 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1574 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1575 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1578 static const u32 sec_ded_counter_registers[] =
1581 mmCPC_EDC_SCRATCH_CNT,
1582 mmCPC_EDC_UCODE_CNT,
1589 mmDC_EDC_CSINVOC_CNT,
1590 mmDC_EDC_RESTORE_CNT,
1596 mmSQC_ATC_EDC_GATCL1_CNT,
1602 mmTCP_ATC_EDC_GATCL1_CNT,
1607 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1609 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1610 struct amdgpu_ib ib;
1611 struct dma_fence *f = NULL;
1614 unsigned total_size, vgpr_offset, sgpr_offset;
1617 /* only supported on CZ */
1618 if (adev->asic_type != CHIP_CARRIZO)
1621 /* bail if the compute ring is not ready */
1625 tmp = RREG32(mmGB_EDC_MODE);
1626 WREG32(mmGB_EDC_MODE, 0);
1629 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1631 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1633 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1634 total_size = ALIGN(total_size, 256);
1635 vgpr_offset = total_size;
1636 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1637 sgpr_offset = total_size;
1638 total_size += sizeof(sgpr_init_compute_shader);
1640 /* allocate an indirect buffer to put the commands in */
1641 memset(&ib, 0, sizeof(ib));
1642 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1644 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1648 /* load the compute shaders */
1649 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1650 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1652 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1653 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1655 /* init the ib length to 0 */
1659 /* write the register state for the compute dispatch */
1660 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1661 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1662 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1663 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1665 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1666 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1667 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1668 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1669 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1670 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1672 /* write dispatch packet */
1673 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1674 ib.ptr[ib.length_dw++] = 8; /* x */
1675 ib.ptr[ib.length_dw++] = 1; /* y */
1676 ib.ptr[ib.length_dw++] = 1; /* z */
1677 ib.ptr[ib.length_dw++] =
1678 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1680 /* write CS partial flush packet */
1681 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1682 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1685 /* write the register state for the compute dispatch */
1686 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1687 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1688 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1689 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1691 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1692 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1693 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1694 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1695 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1696 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1698 /* write dispatch packet */
1699 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1700 ib.ptr[ib.length_dw++] = 8; /* x */
1701 ib.ptr[ib.length_dw++] = 1; /* y */
1702 ib.ptr[ib.length_dw++] = 1; /* z */
1703 ib.ptr[ib.length_dw++] =
1704 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1706 /* write CS partial flush packet */
1707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1708 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1711 /* write the register state for the compute dispatch */
1712 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1714 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1715 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1717 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1718 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1719 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1720 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1721 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1722 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1724 /* write dispatch packet */
1725 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1726 ib.ptr[ib.length_dw++] = 8; /* x */
1727 ib.ptr[ib.length_dw++] = 1; /* y */
1728 ib.ptr[ib.length_dw++] = 1; /* z */
1729 ib.ptr[ib.length_dw++] =
1730 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1732 /* write CS partial flush packet */
1733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1734 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1736 /* shedule the ib on the ring */
1737 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1739 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1743 /* wait for the GPU to finish processing the IB */
1744 r = dma_fence_wait(f, false);
1746 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1750 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1751 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1752 WREG32(mmGB_EDC_MODE, tmp);
1754 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1755 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1756 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1759 /* read back registers to clear the counters */
1760 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1761 RREG32(sec_ded_counter_registers[i]);
1764 amdgpu_ib_free(adev, &ib, NULL);
1770 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1773 u32 mc_shared_chmap, mc_arb_ramcfg;
1774 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1778 switch (adev->asic_type) {
1780 adev->gfx.config.max_shader_engines = 1;
1781 adev->gfx.config.max_tile_pipes = 2;
1782 adev->gfx.config.max_cu_per_sh = 6;
1783 adev->gfx.config.max_sh_per_se = 1;
1784 adev->gfx.config.max_backends_per_se = 2;
1785 adev->gfx.config.max_texture_channel_caches = 2;
1786 adev->gfx.config.max_gprs = 256;
1787 adev->gfx.config.max_gs_threads = 32;
1788 adev->gfx.config.max_hw_contexts = 8;
1790 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1791 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1792 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1793 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1794 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1797 adev->gfx.config.max_shader_engines = 4;
1798 adev->gfx.config.max_tile_pipes = 16;
1799 adev->gfx.config.max_cu_per_sh = 16;
1800 adev->gfx.config.max_sh_per_se = 1;
1801 adev->gfx.config.max_backends_per_se = 4;
1802 adev->gfx.config.max_texture_channel_caches = 16;
1803 adev->gfx.config.max_gprs = 256;
1804 adev->gfx.config.max_gs_threads = 32;
1805 adev->gfx.config.max_hw_contexts = 8;
1807 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1808 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1809 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1810 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1811 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1813 case CHIP_POLARIS11:
1814 case CHIP_POLARIS12:
1815 ret = amdgpu_atombios_get_gfx_info(adev);
1818 adev->gfx.config.max_gprs = 256;
1819 adev->gfx.config.max_gs_threads = 32;
1820 adev->gfx.config.max_hw_contexts = 8;
1822 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1823 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1824 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1825 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1826 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1828 case CHIP_POLARIS10:
1830 ret = amdgpu_atombios_get_gfx_info(adev);
1833 adev->gfx.config.max_gprs = 256;
1834 adev->gfx.config.max_gs_threads = 32;
1835 adev->gfx.config.max_hw_contexts = 8;
1837 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1838 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1839 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1840 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1841 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1844 adev->gfx.config.max_shader_engines = 4;
1845 adev->gfx.config.max_tile_pipes = 8;
1846 adev->gfx.config.max_cu_per_sh = 8;
1847 adev->gfx.config.max_sh_per_se = 1;
1848 adev->gfx.config.max_backends_per_se = 2;
1849 adev->gfx.config.max_texture_channel_caches = 8;
1850 adev->gfx.config.max_gprs = 256;
1851 adev->gfx.config.max_gs_threads = 32;
1852 adev->gfx.config.max_hw_contexts = 8;
1854 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1855 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1856 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1857 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1858 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1861 adev->gfx.config.max_shader_engines = 1;
1862 adev->gfx.config.max_tile_pipes = 2;
1863 adev->gfx.config.max_sh_per_se = 1;
1864 adev->gfx.config.max_backends_per_se = 2;
1865 adev->gfx.config.max_cu_per_sh = 8;
1866 adev->gfx.config.max_texture_channel_caches = 2;
1867 adev->gfx.config.max_gprs = 256;
1868 adev->gfx.config.max_gs_threads = 32;
1869 adev->gfx.config.max_hw_contexts = 8;
1871 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1872 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1873 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1874 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1875 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1878 adev->gfx.config.max_shader_engines = 1;
1879 adev->gfx.config.max_tile_pipes = 2;
1880 adev->gfx.config.max_sh_per_se = 1;
1881 adev->gfx.config.max_backends_per_se = 1;
1882 adev->gfx.config.max_cu_per_sh = 3;
1883 adev->gfx.config.max_texture_channel_caches = 2;
1884 adev->gfx.config.max_gprs = 256;
1885 adev->gfx.config.max_gs_threads = 16;
1886 adev->gfx.config.max_hw_contexts = 8;
1888 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1889 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1890 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1891 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1892 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1895 adev->gfx.config.max_shader_engines = 2;
1896 adev->gfx.config.max_tile_pipes = 4;
1897 adev->gfx.config.max_cu_per_sh = 2;
1898 adev->gfx.config.max_sh_per_se = 1;
1899 adev->gfx.config.max_backends_per_se = 2;
1900 adev->gfx.config.max_texture_channel_caches = 4;
1901 adev->gfx.config.max_gprs = 256;
1902 adev->gfx.config.max_gs_threads = 32;
1903 adev->gfx.config.max_hw_contexts = 8;
1905 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1906 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1907 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1908 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1909 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1913 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1914 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1915 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1917 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1918 adev->gfx.config.mem_max_burst_length_bytes = 256;
1919 if (adev->flags & AMD_IS_APU) {
1920 /* Get memory bank mapping mode. */
1921 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1922 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1923 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1925 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1926 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1927 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1929 /* Validate settings in case only one DIMM installed. */
1930 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1931 dimm00_addr_map = 0;
1932 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1933 dimm01_addr_map = 0;
1934 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1935 dimm10_addr_map = 0;
1936 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1937 dimm11_addr_map = 0;
1939 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1940 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1941 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1942 adev->gfx.config.mem_row_size_in_kb = 2;
1944 adev->gfx.config.mem_row_size_in_kb = 1;
1946 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1947 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1948 if (adev->gfx.config.mem_row_size_in_kb > 4)
1949 adev->gfx.config.mem_row_size_in_kb = 4;
1952 adev->gfx.config.shader_engine_tile_size = 32;
1953 adev->gfx.config.num_gpus = 1;
1954 adev->gfx.config.multi_gpu_tile_size = 64;
1956 /* fix up row size */
1957 switch (adev->gfx.config.mem_row_size_in_kb) {
1960 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1963 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1966 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1969 adev->gfx.config.gb_addr_config = gb_addr_config;
1974 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1975 int mec, int pipe, int queue)
1979 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1981 ring = &adev->gfx.compute_ring[ring_id];
1986 ring->queue = queue;
1988 ring->ring_obj = NULL;
1989 ring->use_doorbell = true;
1990 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1991 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1992 + (ring_id * GFX8_MEC_HPD_SIZE);
1993 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1995 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1996 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1999 /* type-2 packets are deprecated on MEC, use type-3 instead */
2000 r = amdgpu_ring_init(adev, ring, 1024,
2001 &adev->gfx.eop_irq, irq_type);
2009 static int gfx_v8_0_sw_init(void *handle)
2011 int i, j, k, r, ring_id;
2012 struct amdgpu_ring *ring;
2013 struct amdgpu_kiq *kiq;
2014 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2016 switch (adev->asic_type) {
2020 case CHIP_POLARIS10:
2021 case CHIP_POLARIS11:
2022 case CHIP_POLARIS12:
2024 adev->gfx.mec.num_mec = 2;
2029 adev->gfx.mec.num_mec = 1;
2033 adev->gfx.mec.num_pipe_per_mec = 4;
2034 adev->gfx.mec.num_queue_per_pipe = 8;
2037 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2042 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2046 /* Privileged reg */
2047 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2048 &adev->gfx.priv_reg_irq);
2052 /* Privileged inst */
2053 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2054 &adev->gfx.priv_inst_irq);
2058 /* Add CP EDC/ECC irq */
2059 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 197,
2060 &adev->gfx.cp_ecc_error_irq);
2064 /* SQ interrupts. */
2065 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 239,
2068 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2072 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2074 gfx_v8_0_scratch_init(adev);
2076 r = gfx_v8_0_init_microcode(adev);
2078 DRM_ERROR("Failed to load gfx firmware!\n");
2082 r = gfx_v8_0_rlc_init(adev);
2084 DRM_ERROR("Failed to init rlc BOs!\n");
2088 r = gfx_v8_0_mec_init(adev);
2090 DRM_ERROR("Failed to init MEC BOs!\n");
2094 /* set up the gfx ring */
2095 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2096 ring = &adev->gfx.gfx_ring[i];
2097 ring->ring_obj = NULL;
2098 sprintf(ring->name, "gfx");
2099 /* no gfx doorbells on iceland */
2100 if (adev->asic_type != CHIP_TOPAZ) {
2101 ring->use_doorbell = true;
2102 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2105 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2106 AMDGPU_CP_IRQ_GFX_EOP);
2112 /* set up the compute queues - allocate horizontally across pipes */
2114 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2115 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2116 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2117 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2120 r = gfx_v8_0_compute_ring_init(adev,
2131 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2133 DRM_ERROR("Failed to init KIQ BOs!\n");
2137 kiq = &adev->gfx.kiq;
2138 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2142 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2143 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2147 /* reserve GDS, GWS and OA resource for gfx */
2148 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2149 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2150 &adev->gds.gds_gfx_bo, NULL, NULL);
2154 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2155 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2156 &adev->gds.gws_gfx_bo, NULL, NULL);
2160 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2161 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2162 &adev->gds.oa_gfx_bo, NULL, NULL);
2166 adev->gfx.ce_ram_size = 0x8000;
2168 r = gfx_v8_0_gpu_early_init(adev);
2175 static int gfx_v8_0_sw_fini(void *handle)
2178 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2180 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2181 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2182 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2184 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2185 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2186 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2187 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2189 amdgpu_gfx_compute_mqd_sw_fini(adev);
2190 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2191 amdgpu_gfx_kiq_fini(adev);
2193 gfx_v8_0_mec_fini(adev);
2194 gfx_v8_0_rlc_fini(adev);
2195 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2196 &adev->gfx.rlc.clear_state_gpu_addr,
2197 (void **)&adev->gfx.rlc.cs_ptr);
2198 if ((adev->asic_type == CHIP_CARRIZO) ||
2199 (adev->asic_type == CHIP_STONEY)) {
2200 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2201 &adev->gfx.rlc.cp_table_gpu_addr,
2202 (void **)&adev->gfx.rlc.cp_table_ptr);
2204 gfx_v8_0_free_microcode(adev);
2209 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2211 uint32_t *modearray, *mod2array;
2212 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2213 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2216 modearray = adev->gfx.config.tile_mode_array;
2217 mod2array = adev->gfx.config.macrotile_mode_array;
2219 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2220 modearray[reg_offset] = 0;
2222 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2223 mod2array[reg_offset] = 0;
2225 switch (adev->asic_type) {
2227 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228 PIPE_CONFIG(ADDR_SURF_P2) |
2229 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2230 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2232 PIPE_CONFIG(ADDR_SURF_P2) |
2233 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2234 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2236 PIPE_CONFIG(ADDR_SURF_P2) |
2237 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2238 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240 PIPE_CONFIG(ADDR_SURF_P2) |
2241 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2242 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2243 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2244 PIPE_CONFIG(ADDR_SURF_P2) |
2245 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2246 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2247 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2248 PIPE_CONFIG(ADDR_SURF_P2) |
2249 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2250 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2251 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2252 PIPE_CONFIG(ADDR_SURF_P2) |
2253 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2254 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2255 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2256 PIPE_CONFIG(ADDR_SURF_P2));
2257 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2258 PIPE_CONFIG(ADDR_SURF_P2) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2261 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262 PIPE_CONFIG(ADDR_SURF_P2) |
2263 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2265 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2266 PIPE_CONFIG(ADDR_SURF_P2) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2268 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2269 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2270 PIPE_CONFIG(ADDR_SURF_P2) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2272 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2273 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2274 PIPE_CONFIG(ADDR_SURF_P2) |
2275 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2276 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2277 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2278 PIPE_CONFIG(ADDR_SURF_P2) |
2279 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2280 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2281 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2282 PIPE_CONFIG(ADDR_SURF_P2) |
2283 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2284 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2285 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2286 PIPE_CONFIG(ADDR_SURF_P2) |
2287 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2288 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2290 PIPE_CONFIG(ADDR_SURF_P2) |
2291 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2294 PIPE_CONFIG(ADDR_SURF_P2) |
2295 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2298 PIPE_CONFIG(ADDR_SURF_P2) |
2299 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2301 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2302 PIPE_CONFIG(ADDR_SURF_P2) |
2303 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2305 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2306 PIPE_CONFIG(ADDR_SURF_P2) |
2307 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2309 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2310 PIPE_CONFIG(ADDR_SURF_P2) |
2311 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2313 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2314 PIPE_CONFIG(ADDR_SURF_P2) |
2315 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2317 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2318 PIPE_CONFIG(ADDR_SURF_P2) |
2319 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 PIPE_CONFIG(ADDR_SURF_P2) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326 PIPE_CONFIG(ADDR_SURF_P2) |
2327 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2330 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2331 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2332 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2333 NUM_BANKS(ADDR_SURF_8_BANK));
2334 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2335 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2336 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337 NUM_BANKS(ADDR_SURF_8_BANK));
2338 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2339 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2340 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2341 NUM_BANKS(ADDR_SURF_8_BANK));
2342 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2344 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2345 NUM_BANKS(ADDR_SURF_8_BANK));
2346 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2348 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2349 NUM_BANKS(ADDR_SURF_8_BANK));
2350 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2352 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2353 NUM_BANKS(ADDR_SURF_8_BANK));
2354 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2356 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2357 NUM_BANKS(ADDR_SURF_8_BANK));
2358 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2359 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2360 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2361 NUM_BANKS(ADDR_SURF_16_BANK));
2362 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2363 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2365 NUM_BANKS(ADDR_SURF_16_BANK));
2366 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2367 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2368 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2369 NUM_BANKS(ADDR_SURF_16_BANK));
2370 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2371 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2372 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2373 NUM_BANKS(ADDR_SURF_16_BANK));
2374 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2376 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2377 NUM_BANKS(ADDR_SURF_16_BANK));
2378 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2380 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2381 NUM_BANKS(ADDR_SURF_16_BANK));
2382 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385 NUM_BANKS(ADDR_SURF_8_BANK));
2387 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2388 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2390 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2392 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2393 if (reg_offset != 7)
2394 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2399 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2403 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2415 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2419 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2420 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2423 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2429 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2431 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2432 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2433 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2438 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2441 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2442 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2445 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2446 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2449 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2450 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2451 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2452 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2455 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2458 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2462 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2465 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2466 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2467 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2469 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2470 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2471 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2473 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2474 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2475 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2477 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2478 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2481 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2482 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2483 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2485 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2486 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2487 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2488 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2489 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2490 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2491 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2492 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2493 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2494 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2495 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2496 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2497 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2498 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2499 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2500 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2501 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2502 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2503 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2505 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2506 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2507 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2509 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2511 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2512 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2513 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2514 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2516 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2517 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2518 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2519 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2520 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2522 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2524 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2525 NUM_BANKS(ADDR_SURF_8_BANK));
2526 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529 NUM_BANKS(ADDR_SURF_8_BANK));
2530 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2532 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2533 NUM_BANKS(ADDR_SURF_8_BANK));
2534 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2537 NUM_BANKS(ADDR_SURF_8_BANK));
2538 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2541 NUM_BANKS(ADDR_SURF_8_BANK));
2542 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2543 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2544 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2545 NUM_BANKS(ADDR_SURF_8_BANK));
2546 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2548 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2549 NUM_BANKS(ADDR_SURF_8_BANK));
2550 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2552 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2553 NUM_BANKS(ADDR_SURF_8_BANK));
2554 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2556 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557 NUM_BANKS(ADDR_SURF_8_BANK));
2558 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2560 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2561 NUM_BANKS(ADDR_SURF_8_BANK));
2562 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2564 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2565 NUM_BANKS(ADDR_SURF_8_BANK));
2566 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2568 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2569 NUM_BANKS(ADDR_SURF_8_BANK));
2570 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2573 NUM_BANKS(ADDR_SURF_8_BANK));
2574 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2577 NUM_BANKS(ADDR_SURF_4_BANK));
2579 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2580 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2582 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2583 if (reg_offset != 7)
2584 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2588 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2591 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2592 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2595 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2600 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2601 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2604 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2608 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2609 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2612 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2613 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2616 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2617 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2618 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2620 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2621 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2622 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2625 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2627 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2628 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2629 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2631 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2633 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2634 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2636 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2637 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2638 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2639 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2640 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2641 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2642 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2644 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2645 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2646 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2647 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2649 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2651 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2652 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2654 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2655 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2657 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2658 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2659 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2660 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2661 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2662 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2663 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2664 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2665 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2666 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2667 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2670 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2671 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2673 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2674 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2675 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2678 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2679 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2682 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2683 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2686 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2687 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2688 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2690 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2691 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2694 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2698 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2700 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2702 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2703 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2706 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2711 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2712 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2713 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2714 NUM_BANKS(ADDR_SURF_16_BANK));
2715 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718 NUM_BANKS(ADDR_SURF_16_BANK));
2719 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2721 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2722 NUM_BANKS(ADDR_SURF_16_BANK));
2723 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2725 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2726 NUM_BANKS(ADDR_SURF_16_BANK));
2727 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2729 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2730 NUM_BANKS(ADDR_SURF_16_BANK));
2731 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2734 NUM_BANKS(ADDR_SURF_16_BANK));
2735 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2737 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2738 NUM_BANKS(ADDR_SURF_16_BANK));
2739 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2741 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2742 NUM_BANKS(ADDR_SURF_16_BANK));
2743 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746 NUM_BANKS(ADDR_SURF_16_BANK));
2747 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2749 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2750 NUM_BANKS(ADDR_SURF_16_BANK));
2751 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2753 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2754 NUM_BANKS(ADDR_SURF_16_BANK));
2755 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2758 NUM_BANKS(ADDR_SURF_8_BANK));
2759 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2762 NUM_BANKS(ADDR_SURF_4_BANK));
2763 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2766 NUM_BANKS(ADDR_SURF_4_BANK));
2768 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2769 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2771 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2772 if (reg_offset != 7)
2773 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2776 case CHIP_POLARIS11:
2777 case CHIP_POLARIS12:
2778 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2779 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2781 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2782 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2783 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2785 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2786 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2789 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2790 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2793 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2794 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2797 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2798 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2799 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2801 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2802 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2806 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2807 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2810 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2811 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2812 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2813 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2816 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2817 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2820 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2821 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2822 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2823 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2824 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2825 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2826 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2828 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2829 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2831 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2832 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2833 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2836 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2837 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2839 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2841 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2843 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2844 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2845 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2846 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2847 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2848 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2849 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2850 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2852 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2853 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2855 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2856 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2857 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2858 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2860 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2861 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2863 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2864 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2865 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2867 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2868 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2869 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2870 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2872 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2873 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2875 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2876 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2877 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2880 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2881 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2884 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2885 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2888 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2893 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2896 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2898 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2901 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2902 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2903 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2904 NUM_BANKS(ADDR_SURF_16_BANK));
2906 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2908 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2909 NUM_BANKS(ADDR_SURF_16_BANK));
2911 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2912 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2913 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2914 NUM_BANKS(ADDR_SURF_16_BANK));
2916 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2918 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2919 NUM_BANKS(ADDR_SURF_16_BANK));
2921 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2924 NUM_BANKS(ADDR_SURF_16_BANK));
2926 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2929 NUM_BANKS(ADDR_SURF_16_BANK));
2931 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2934 NUM_BANKS(ADDR_SURF_16_BANK));
2936 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2939 NUM_BANKS(ADDR_SURF_16_BANK));
2941 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2942 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2943 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2944 NUM_BANKS(ADDR_SURF_16_BANK));
2946 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2949 NUM_BANKS(ADDR_SURF_16_BANK));
2951 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954 NUM_BANKS(ADDR_SURF_16_BANK));
2956 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2959 NUM_BANKS(ADDR_SURF_16_BANK));
2961 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2962 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2963 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2964 NUM_BANKS(ADDR_SURF_8_BANK));
2966 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2967 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2968 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2969 NUM_BANKS(ADDR_SURF_4_BANK));
2971 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2972 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2974 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2975 if (reg_offset != 7)
2976 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2979 case CHIP_POLARIS10:
2980 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2981 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2983 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2987 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2988 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2989 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2990 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2991 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2992 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2993 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2995 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2996 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2997 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2998 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2999 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3000 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3001 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3003 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3004 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3007 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3008 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3009 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3010 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3011 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3012 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3013 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3014 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3015 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3016 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3018 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3019 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3020 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3022 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3023 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3024 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3025 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3026 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3027 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3028 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3029 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3030 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3031 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3032 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3033 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3035 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3036 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3037 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3038 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3039 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3040 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3041 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3042 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3043 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3044 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3046 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3047 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3048 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3049 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3050 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3051 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3052 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3053 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3054 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3055 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3056 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3058 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3059 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3060 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3061 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3062 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3063 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3064 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3065 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3066 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3067 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3068 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3070 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3071 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3072 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3074 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3075 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3076 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3078 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3079 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3080 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3082 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3083 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3084 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3086 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3087 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3088 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3090 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3091 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3092 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3094 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3095 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3096 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3098 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3099 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3100 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3103 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3104 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3105 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3106 NUM_BANKS(ADDR_SURF_16_BANK));
3108 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3110 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3111 NUM_BANKS(ADDR_SURF_16_BANK));
3113 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3114 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3115 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3116 NUM_BANKS(ADDR_SURF_16_BANK));
3118 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3121 NUM_BANKS(ADDR_SURF_16_BANK));
3123 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3126 NUM_BANKS(ADDR_SURF_16_BANK));
3128 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3131 NUM_BANKS(ADDR_SURF_16_BANK));
3133 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3135 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3136 NUM_BANKS(ADDR_SURF_16_BANK));
3138 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3140 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3141 NUM_BANKS(ADDR_SURF_16_BANK));
3143 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3146 NUM_BANKS(ADDR_SURF_16_BANK));
3148 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3151 NUM_BANKS(ADDR_SURF_16_BANK));
3153 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156 NUM_BANKS(ADDR_SURF_16_BANK));
3158 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3161 NUM_BANKS(ADDR_SURF_8_BANK));
3163 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3166 NUM_BANKS(ADDR_SURF_4_BANK));
3168 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3171 NUM_BANKS(ADDR_SURF_4_BANK));
3173 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3174 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3176 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3177 if (reg_offset != 7)
3178 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3182 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3183 PIPE_CONFIG(ADDR_SURF_P2) |
3184 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3185 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3186 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3187 PIPE_CONFIG(ADDR_SURF_P2) |
3188 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3189 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3190 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3191 PIPE_CONFIG(ADDR_SURF_P2) |
3192 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3193 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3194 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3195 PIPE_CONFIG(ADDR_SURF_P2) |
3196 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3197 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3198 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3199 PIPE_CONFIG(ADDR_SURF_P2) |
3200 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3201 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3202 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3203 PIPE_CONFIG(ADDR_SURF_P2) |
3204 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3205 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3206 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3207 PIPE_CONFIG(ADDR_SURF_P2) |
3208 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3209 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3210 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3211 PIPE_CONFIG(ADDR_SURF_P2));
3212 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3213 PIPE_CONFIG(ADDR_SURF_P2) |
3214 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3216 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3217 PIPE_CONFIG(ADDR_SURF_P2) |
3218 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3219 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3220 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3221 PIPE_CONFIG(ADDR_SURF_P2) |
3222 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3223 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3224 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3225 PIPE_CONFIG(ADDR_SURF_P2) |
3226 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3227 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3228 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229 PIPE_CONFIG(ADDR_SURF_P2) |
3230 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3232 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3233 PIPE_CONFIG(ADDR_SURF_P2) |
3234 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3235 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3236 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3237 PIPE_CONFIG(ADDR_SURF_P2) |
3238 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3240 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3241 PIPE_CONFIG(ADDR_SURF_P2) |
3242 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3244 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3245 PIPE_CONFIG(ADDR_SURF_P2) |
3246 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3248 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3249 PIPE_CONFIG(ADDR_SURF_P2) |
3250 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3252 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3253 PIPE_CONFIG(ADDR_SURF_P2) |
3254 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3256 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3257 PIPE_CONFIG(ADDR_SURF_P2) |
3258 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3260 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3261 PIPE_CONFIG(ADDR_SURF_P2) |
3262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3264 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3265 PIPE_CONFIG(ADDR_SURF_P2) |
3266 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3268 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3269 PIPE_CONFIG(ADDR_SURF_P2) |
3270 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3272 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273 PIPE_CONFIG(ADDR_SURF_P2) |
3274 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3276 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3277 PIPE_CONFIG(ADDR_SURF_P2) |
3278 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3280 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3281 PIPE_CONFIG(ADDR_SURF_P2) |
3282 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3285 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288 NUM_BANKS(ADDR_SURF_8_BANK));
3289 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3290 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3291 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3292 NUM_BANKS(ADDR_SURF_8_BANK));
3293 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3294 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3295 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3296 NUM_BANKS(ADDR_SURF_8_BANK));
3297 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3300 NUM_BANKS(ADDR_SURF_8_BANK));
3301 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3302 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3303 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3304 NUM_BANKS(ADDR_SURF_8_BANK));
3305 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3306 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3307 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3308 NUM_BANKS(ADDR_SURF_8_BANK));
3309 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3312 NUM_BANKS(ADDR_SURF_8_BANK));
3313 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3316 NUM_BANKS(ADDR_SURF_16_BANK));
3317 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320 NUM_BANKS(ADDR_SURF_16_BANK));
3321 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324 NUM_BANKS(ADDR_SURF_16_BANK));
3325 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3328 NUM_BANKS(ADDR_SURF_16_BANK));
3329 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3332 NUM_BANKS(ADDR_SURF_16_BANK));
3333 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3336 NUM_BANKS(ADDR_SURF_16_BANK));
3337 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3340 NUM_BANKS(ADDR_SURF_8_BANK));
3342 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3343 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3345 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3347 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3348 if (reg_offset != 7)
3349 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3354 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3358 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3359 PIPE_CONFIG(ADDR_SURF_P2) |
3360 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3361 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3362 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3363 PIPE_CONFIG(ADDR_SURF_P2) |
3364 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3365 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3366 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3367 PIPE_CONFIG(ADDR_SURF_P2) |
3368 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3369 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3370 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3371 PIPE_CONFIG(ADDR_SURF_P2) |
3372 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3373 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3374 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3375 PIPE_CONFIG(ADDR_SURF_P2) |
3376 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3377 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3378 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3379 PIPE_CONFIG(ADDR_SURF_P2) |
3380 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3381 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3382 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3383 PIPE_CONFIG(ADDR_SURF_P2) |
3384 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3385 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3386 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3387 PIPE_CONFIG(ADDR_SURF_P2));
3388 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3389 PIPE_CONFIG(ADDR_SURF_P2) |
3390 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3392 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3393 PIPE_CONFIG(ADDR_SURF_P2) |
3394 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3396 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3397 PIPE_CONFIG(ADDR_SURF_P2) |
3398 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3400 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3401 PIPE_CONFIG(ADDR_SURF_P2) |
3402 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3403 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3404 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3405 PIPE_CONFIG(ADDR_SURF_P2) |
3406 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3407 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3408 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3409 PIPE_CONFIG(ADDR_SURF_P2) |
3410 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3412 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3413 PIPE_CONFIG(ADDR_SURF_P2) |
3414 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3416 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3417 PIPE_CONFIG(ADDR_SURF_P2) |
3418 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3420 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3421 PIPE_CONFIG(ADDR_SURF_P2) |
3422 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3424 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3425 PIPE_CONFIG(ADDR_SURF_P2) |
3426 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3428 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3429 PIPE_CONFIG(ADDR_SURF_P2) |
3430 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3432 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3433 PIPE_CONFIG(ADDR_SURF_P2) |
3434 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3436 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3437 PIPE_CONFIG(ADDR_SURF_P2) |
3438 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3440 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3441 PIPE_CONFIG(ADDR_SURF_P2) |
3442 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3444 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3445 PIPE_CONFIG(ADDR_SURF_P2) |
3446 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3448 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3449 PIPE_CONFIG(ADDR_SURF_P2) |
3450 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3452 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3453 PIPE_CONFIG(ADDR_SURF_P2) |
3454 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3456 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3457 PIPE_CONFIG(ADDR_SURF_P2) |
3458 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3461 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3462 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3463 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3464 NUM_BANKS(ADDR_SURF_8_BANK));
3465 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3468 NUM_BANKS(ADDR_SURF_8_BANK));
3469 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3472 NUM_BANKS(ADDR_SURF_8_BANK));
3473 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3474 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3475 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3476 NUM_BANKS(ADDR_SURF_8_BANK));
3477 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3480 NUM_BANKS(ADDR_SURF_8_BANK));
3481 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3484 NUM_BANKS(ADDR_SURF_8_BANK));
3485 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3486 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3487 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3488 NUM_BANKS(ADDR_SURF_8_BANK));
3489 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3492 NUM_BANKS(ADDR_SURF_16_BANK));
3493 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3496 NUM_BANKS(ADDR_SURF_16_BANK));
3497 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3500 NUM_BANKS(ADDR_SURF_16_BANK));
3501 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3504 NUM_BANKS(ADDR_SURF_16_BANK));
3505 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3508 NUM_BANKS(ADDR_SURF_16_BANK));
3509 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3512 NUM_BANKS(ADDR_SURF_16_BANK));
3513 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3516 NUM_BANKS(ADDR_SURF_8_BANK));
3518 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3519 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3521 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3523 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3524 if (reg_offset != 7)
3525 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3531 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3532 u32 se_num, u32 sh_num, u32 instance)
3536 if (instance == 0xffffffff)
3537 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3539 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3541 if (se_num == 0xffffffff)
3542 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3544 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3546 if (sh_num == 0xffffffff)
3547 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3549 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3551 WREG32(mmGRBM_GFX_INDEX, data);
3554 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3555 u32 me, u32 pipe, u32 q)
3557 vi_srbm_select(adev, me, pipe, q, 0);
3560 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3564 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3565 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3567 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3569 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3570 adev->gfx.config.max_sh_per_se);
3572 return (~data) & mask;
3576 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3578 switch (adev->asic_type) {
3581 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3582 RB_XSEL2(1) | PKR_MAP(2) |
3583 PKR_XSEL(1) | PKR_YSEL(1) |
3584 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3585 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3589 case CHIP_POLARIS10:
3590 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3591 SE_XSEL(1) | SE_YSEL(1);
3592 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3597 *rconf |= RB_MAP_PKR0(2);
3600 case CHIP_POLARIS11:
3601 case CHIP_POLARIS12:
3602 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3603 SE_XSEL(1) | SE_YSEL(1);
3611 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3617 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3618 u32 raster_config, u32 raster_config_1,
3619 unsigned rb_mask, unsigned num_rb)
3621 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3622 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3623 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3624 unsigned rb_per_se = num_rb / num_se;
3625 unsigned se_mask[4];
3628 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3629 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3630 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3631 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3633 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3634 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3635 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3637 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3638 (!se_mask[2] && !se_mask[3]))) {
3639 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3641 if (!se_mask[0] && !se_mask[1]) {
3643 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3646 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3650 for (se = 0; se < num_se; se++) {
3651 unsigned raster_config_se = raster_config;
3652 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3653 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3654 int idx = (se / 2) * 2;
3656 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3657 raster_config_se &= ~SE_MAP_MASK;
3659 if (!se_mask[idx]) {
3660 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3662 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3666 pkr0_mask &= rb_mask;
3667 pkr1_mask &= rb_mask;
3668 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3669 raster_config_se &= ~PKR_MAP_MASK;
3672 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3674 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3678 if (rb_per_se >= 2) {
3679 unsigned rb0_mask = 1 << (se * rb_per_se);
3680 unsigned rb1_mask = rb0_mask << 1;
3682 rb0_mask &= rb_mask;
3683 rb1_mask &= rb_mask;
3684 if (!rb0_mask || !rb1_mask) {
3685 raster_config_se &= ~RB_MAP_PKR0_MASK;
3689 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3692 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3696 if (rb_per_se > 2) {
3697 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3698 rb1_mask = rb0_mask << 1;
3699 rb0_mask &= rb_mask;
3700 rb1_mask &= rb_mask;
3701 if (!rb0_mask || !rb1_mask) {
3702 raster_config_se &= ~RB_MAP_PKR1_MASK;
3706 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3709 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3715 /* GRBM_GFX_INDEX has a different offset on VI */
3716 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3717 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3718 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3721 /* GRBM_GFX_INDEX has a different offset on VI */
3722 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3725 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3729 u32 raster_config = 0, raster_config_1 = 0;
3731 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3732 adev->gfx.config.max_sh_per_se;
3733 unsigned num_rb_pipes;
3735 mutex_lock(&adev->grbm_idx_mutex);
3736 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3737 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3738 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3739 data = gfx_v8_0_get_rb_active_bitmap(adev);
3740 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3741 rb_bitmap_width_per_sh);
3744 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3746 adev->gfx.config.backend_enable_mask = active_rbs;
3747 adev->gfx.config.num_rbs = hweight32(active_rbs);
3749 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3750 adev->gfx.config.max_shader_engines, 16);
3752 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3754 if (!adev->gfx.config.backend_enable_mask ||
3755 adev->gfx.config.num_rbs >= num_rb_pipes) {
3756 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3757 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3759 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3760 adev->gfx.config.backend_enable_mask,
3764 /* cache the values for userspace */
3765 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3766 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3767 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3768 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3769 RREG32(mmCC_RB_BACKEND_DISABLE);
3770 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3771 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3772 adev->gfx.config.rb_config[i][j].raster_config =
3773 RREG32(mmPA_SC_RASTER_CONFIG);
3774 adev->gfx.config.rb_config[i][j].raster_config_1 =
3775 RREG32(mmPA_SC_RASTER_CONFIG_1);
3778 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3779 mutex_unlock(&adev->grbm_idx_mutex);
3783 * gfx_v8_0_init_compute_vmid - gart enable
3785 * @adev: amdgpu_device pointer
3787 * Initialize compute vmid sh_mem registers
3790 #define DEFAULT_SH_MEM_BASES (0x6000)
3791 #define FIRST_COMPUTE_VMID (8)
3792 #define LAST_COMPUTE_VMID (16)
3793 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3796 uint32_t sh_mem_config;
3797 uint32_t sh_mem_bases;
3800 * Configure apertures:
3801 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3802 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3803 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3805 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3807 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3808 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3809 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3810 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3811 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3812 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3814 mutex_lock(&adev->srbm_mutex);
3815 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3816 vi_srbm_select(adev, 0, 0, 0, i);
3817 /* CP and shaders */
3818 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3819 WREG32(mmSH_MEM_APE1_BASE, 1);
3820 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3821 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3823 vi_srbm_select(adev, 0, 0, 0, 0);
3824 mutex_unlock(&adev->srbm_mutex);
3827 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3829 switch (adev->asic_type) {
3831 adev->gfx.config.double_offchip_lds_buf = 1;
3835 adev->gfx.config.double_offchip_lds_buf = 0;
3840 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3842 u32 tmp, sh_static_mem_cfg;
3845 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3846 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3847 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3848 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3850 gfx_v8_0_tiling_mode_table_init(adev);
3851 gfx_v8_0_setup_rb(adev);
3852 gfx_v8_0_get_cu_info(adev);
3853 gfx_v8_0_config_init(adev);
3855 /* XXX SH_MEM regs */
3856 /* where to put LDS, scratch, GPUVM in FSA64 space */
3857 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3859 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3861 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3863 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3865 mutex_lock(&adev->srbm_mutex);
3866 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3867 vi_srbm_select(adev, 0, 0, 0, i);
3868 /* CP and shaders */
3870 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3871 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3872 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3873 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3874 WREG32(mmSH_MEM_CONFIG, tmp);
3875 WREG32(mmSH_MEM_BASES, 0);
3877 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3878 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3879 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3880 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3881 WREG32(mmSH_MEM_CONFIG, tmp);
3882 tmp = adev->gmc.shared_aperture_start >> 48;
3883 WREG32(mmSH_MEM_BASES, tmp);
3886 WREG32(mmSH_MEM_APE1_BASE, 1);
3887 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3889 vi_srbm_select(adev, 0, 0, 0, 0);
3890 mutex_unlock(&adev->srbm_mutex);
3892 gfx_v8_0_init_compute_vmid(adev);
3894 mutex_lock(&adev->grbm_idx_mutex);
3896 * making sure that the following register writes will be broadcasted
3897 * to all the shaders
3899 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3901 WREG32(mmPA_SC_FIFO_SIZE,
3902 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3903 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3904 (adev->gfx.config.sc_prim_fifo_size_backend <<
3905 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3906 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3907 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3908 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3909 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3911 tmp = RREG32(mmSPI_ARB_PRIORITY);
3912 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3913 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3914 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3915 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3916 WREG32(mmSPI_ARB_PRIORITY, tmp);
3918 mutex_unlock(&adev->grbm_idx_mutex);
3922 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3927 mutex_lock(&adev->grbm_idx_mutex);
3928 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3929 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3930 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3931 for (k = 0; k < adev->usec_timeout; k++) {
3932 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3936 if (k == adev->usec_timeout) {
3937 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3938 0xffffffff, 0xffffffff);
3939 mutex_unlock(&adev->grbm_idx_mutex);
3940 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3946 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3947 mutex_unlock(&adev->grbm_idx_mutex);
3949 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3950 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3951 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3952 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3953 for (k = 0; k < adev->usec_timeout; k++) {
3954 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3960 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3963 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3965 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3966 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3967 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3968 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3970 WREG32(mmCP_INT_CNTL_RING0, tmp);
3973 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3976 WREG32(mmRLC_CSIB_ADDR_HI,
3977 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3978 WREG32(mmRLC_CSIB_ADDR_LO,
3979 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3980 WREG32(mmRLC_CSIB_LENGTH,
3981 adev->gfx.rlc.clear_state_size);
3984 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3987 int *unique_indices,
3990 int *ind_start_offsets,
3995 bool new_entry = true;
3997 for (; ind_offset < list_size; ind_offset++) {
4001 ind_start_offsets[*offset_count] = ind_offset;
4002 *offset_count = *offset_count + 1;
4003 BUG_ON(*offset_count >= max_offset);
4006 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4013 /* look for the matching indice */
4015 indices < *indices_count;
4017 if (unique_indices[indices] ==
4018 register_list_format[ind_offset])
4022 if (indices >= *indices_count) {
4023 unique_indices[*indices_count] =
4024 register_list_format[ind_offset];
4025 indices = *indices_count;
4026 *indices_count = *indices_count + 1;
4027 BUG_ON(*indices_count >= max_indices);
4030 register_list_format[ind_offset] = indices;
4034 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4037 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4038 int indices_count = 0;
4039 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4040 int offset_count = 0;
4043 unsigned int *register_list_format =
4044 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4045 if (!register_list_format)
4047 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4048 adev->gfx.rlc.reg_list_format_size_bytes);
4050 gfx_v8_0_parse_ind_reg_list(register_list_format,
4051 RLC_FormatDirectRegListLength,
4052 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4055 ARRAY_SIZE(unique_indices),
4056 indirect_start_offsets,
4058 ARRAY_SIZE(indirect_start_offsets));
4060 /* save and restore list */
4061 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4063 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4064 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4065 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4068 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4069 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4070 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4072 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4073 list_size = list_size >> 1;
4074 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4075 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4077 /* starting offsets starts */
4078 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4079 adev->gfx.rlc.starting_offsets_start);
4080 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4081 WREG32(mmRLC_GPM_SCRATCH_DATA,
4082 indirect_start_offsets[i]);
4084 /* unique indices */
4085 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4086 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4087 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4088 if (unique_indices[i] != 0) {
4089 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4090 WREG32(data + i, unique_indices[i] >> 20);
4093 kfree(register_list_format);
4098 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4100 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4103 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4107 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4109 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4110 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4111 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4112 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4113 WREG32(mmRLC_PG_DELAY, data);
4115 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4116 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4120 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4123 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4126 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4129 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4132 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4134 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4137 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4139 if ((adev->asic_type == CHIP_CARRIZO) ||
4140 (adev->asic_type == CHIP_STONEY)) {
4141 gfx_v8_0_init_csb(adev);
4142 gfx_v8_0_init_save_restore_list(adev);
4143 gfx_v8_0_enable_save_restore_machine(adev);
4144 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4145 gfx_v8_0_init_power_gating(adev);
4146 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4147 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4148 (adev->asic_type == CHIP_POLARIS12) ||
4149 (adev->asic_type == CHIP_VEGAM)) {
4150 gfx_v8_0_init_csb(adev);
4151 gfx_v8_0_init_save_restore_list(adev);
4152 gfx_v8_0_enable_save_restore_machine(adev);
4153 gfx_v8_0_init_power_gating(adev);
4158 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4160 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4162 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4163 gfx_v8_0_wait_for_rlc_serdes(adev);
4166 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4168 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4171 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4175 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4177 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4179 /* carrizo do enable cp interrupt after cp inited */
4180 if (!(adev->flags & AMD_IS_APU))
4181 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4186 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4188 const struct rlc_firmware_header_v2_0 *hdr;
4189 const __le32 *fw_data;
4190 unsigned i, fw_size;
4192 if (!adev->gfx.rlc_fw)
4195 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4196 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4198 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4199 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4200 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4202 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4203 for (i = 0; i < fw_size; i++)
4204 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4205 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4210 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4215 gfx_v8_0_rlc_stop(adev);
4218 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4219 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4220 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4221 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4222 if (adev->asic_type == CHIP_POLARIS11 ||
4223 adev->asic_type == CHIP_POLARIS10 ||
4224 adev->asic_type == CHIP_POLARIS12 ||
4225 adev->asic_type == CHIP_VEGAM) {
4226 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4228 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4232 WREG32(mmRLC_PG_CNTL, 0);
4234 gfx_v8_0_rlc_reset(adev);
4235 gfx_v8_0_init_pg(adev);
4238 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4239 /* legacy rlc firmware loading */
4240 r = gfx_v8_0_rlc_load_microcode(adev);
4245 gfx_v8_0_rlc_start(adev);
4250 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4253 u32 tmp = RREG32(mmCP_ME_CNTL);
4256 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4257 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4258 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4260 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4261 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4262 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4263 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4264 adev->gfx.gfx_ring[i].ready = false;
4266 WREG32(mmCP_ME_CNTL, tmp);
4270 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4272 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4273 const struct gfx_firmware_header_v1_0 *ce_hdr;
4274 const struct gfx_firmware_header_v1_0 *me_hdr;
4275 const __le32 *fw_data;
4276 unsigned i, fw_size;
4278 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4281 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4282 adev->gfx.pfp_fw->data;
4283 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4284 adev->gfx.ce_fw->data;
4285 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4286 adev->gfx.me_fw->data;
4288 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4289 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4290 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4292 gfx_v8_0_cp_gfx_enable(adev, false);
4295 fw_data = (const __le32 *)
4296 (adev->gfx.pfp_fw->data +
4297 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4298 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4299 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4300 for (i = 0; i < fw_size; i++)
4301 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4302 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4305 fw_data = (const __le32 *)
4306 (adev->gfx.ce_fw->data +
4307 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4308 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4309 WREG32(mmCP_CE_UCODE_ADDR, 0);
4310 for (i = 0; i < fw_size; i++)
4311 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4312 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4315 fw_data = (const __le32 *)
4316 (adev->gfx.me_fw->data +
4317 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4318 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4319 WREG32(mmCP_ME_RAM_WADDR, 0);
4320 for (i = 0; i < fw_size; i++)
4321 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4322 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4327 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4330 const struct cs_section_def *sect = NULL;
4331 const struct cs_extent_def *ext = NULL;
4333 /* begin clear state */
4335 /* context control state */
4338 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4339 for (ext = sect->section; ext->extent != NULL; ++ext) {
4340 if (sect->id == SECT_CONTEXT)
4341 count += 2 + ext->reg_count;
4346 /* pa_sc_raster_config/pa_sc_raster_config1 */
4348 /* end clear state */
4356 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4358 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4359 const struct cs_section_def *sect = NULL;
4360 const struct cs_extent_def *ext = NULL;
4364 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4365 WREG32(mmCP_ENDIAN_SWAP, 0);
4366 WREG32(mmCP_DEVICE_ID, 1);
4368 gfx_v8_0_cp_gfx_enable(adev, true);
4370 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4372 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4376 /* clear state buffer */
4377 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4378 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4380 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4381 amdgpu_ring_write(ring, 0x80000000);
4382 amdgpu_ring_write(ring, 0x80000000);
4384 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4385 for (ext = sect->section; ext->extent != NULL; ++ext) {
4386 if (sect->id == SECT_CONTEXT) {
4387 amdgpu_ring_write(ring,
4388 PACKET3(PACKET3_SET_CONTEXT_REG,
4390 amdgpu_ring_write(ring,
4391 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4392 for (i = 0; i < ext->reg_count; i++)
4393 amdgpu_ring_write(ring, ext->extent[i]);
4398 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4399 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4400 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4401 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4403 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4404 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4406 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4407 amdgpu_ring_write(ring, 0);
4409 /* init the CE partitions */
4410 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4411 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4412 amdgpu_ring_write(ring, 0x8000);
4413 amdgpu_ring_write(ring, 0x8000);
4415 amdgpu_ring_commit(ring);
4419 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4422 /* no gfx doorbells on iceland */
4423 if (adev->asic_type == CHIP_TOPAZ)
4426 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4428 if (ring->use_doorbell) {
4429 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4430 DOORBELL_OFFSET, ring->doorbell_index);
4431 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4433 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4436 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4439 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4441 if (adev->flags & AMD_IS_APU)
4444 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4445 DOORBELL_RANGE_LOWER,
4446 AMDGPU_DOORBELL_GFX_RING0);
4447 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4449 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4450 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4453 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4455 struct amdgpu_ring *ring;
4458 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4461 /* Set the write pointer delay */
4462 WREG32(mmCP_RB_WPTR_DELAY, 0);
4464 /* set the RB to use vmid 0 */
4465 WREG32(mmCP_RB_VMID, 0);
4467 /* Set ring buffer size */
4468 ring = &adev->gfx.gfx_ring[0];
4469 rb_bufsz = order_base_2(ring->ring_size / 8);
4470 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4471 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4472 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4473 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4475 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4477 WREG32(mmCP_RB0_CNTL, tmp);
4479 /* Initialize the ring buffer's read and write pointers */
4480 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4482 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4484 /* set the wb address wether it's enabled or not */
4485 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4486 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4487 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4489 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4490 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4491 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4493 WREG32(mmCP_RB0_CNTL, tmp);
4495 rb_addr = ring->gpu_addr >> 8;
4496 WREG32(mmCP_RB0_BASE, rb_addr);
4497 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4499 gfx_v8_0_set_cpg_door_bell(adev, ring);
4500 /* start the ring */
4501 amdgpu_ring_clear_ring(ring);
4502 gfx_v8_0_cp_gfx_start(adev);
4504 r = amdgpu_ring_test_ring(ring);
4506 ring->ready = false;
4511 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4516 WREG32(mmCP_MEC_CNTL, 0);
4518 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4519 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4520 adev->gfx.compute_ring[i].ready = false;
4521 adev->gfx.kiq.ring.ready = false;
4526 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4528 const struct gfx_firmware_header_v1_0 *mec_hdr;
4529 const __le32 *fw_data;
4530 unsigned i, fw_size;
4532 if (!adev->gfx.mec_fw)
4535 gfx_v8_0_cp_compute_enable(adev, false);
4537 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4538 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4540 fw_data = (const __le32 *)
4541 (adev->gfx.mec_fw->data +
4542 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4543 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4546 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4547 for (i = 0; i < fw_size; i++)
4548 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4549 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4551 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4552 if (adev->gfx.mec2_fw) {
4553 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4555 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4556 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4558 fw_data = (const __le32 *)
4559 (adev->gfx.mec2_fw->data +
4560 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4561 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4563 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4564 for (i = 0; i < fw_size; i++)
4565 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4566 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4573 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4576 struct amdgpu_device *adev = ring->adev;
4578 /* tell RLC which is KIQ queue */
4579 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4581 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4582 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4584 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4587 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4589 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4590 uint32_t scratch, tmp = 0;
4591 uint64_t queue_mask = 0;
4594 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4595 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4598 /* This situation may be hit in the future if a new HW
4599 * generation exposes more than 64 queues. If so, the
4600 * definition of queue_mask needs updating */
4601 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4602 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4606 queue_mask |= (1ull << i);
4609 r = amdgpu_gfx_scratch_get(adev, &scratch);
4611 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4614 WREG32(scratch, 0xCAFEDEAD);
4616 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4618 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4619 amdgpu_gfx_scratch_free(adev, scratch);
4623 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4624 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4625 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4626 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4627 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4628 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4629 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4630 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4631 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4632 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4633 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4634 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4637 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4638 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4639 amdgpu_ring_write(kiq_ring,
4640 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4641 amdgpu_ring_write(kiq_ring,
4642 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4643 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4644 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4645 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4646 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4647 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4648 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4649 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4651 /* write to scratch for completion */
4652 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4653 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4654 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4655 amdgpu_ring_commit(kiq_ring);
4657 for (i = 0; i < adev->usec_timeout; i++) {
4658 tmp = RREG32(scratch);
4659 if (tmp == 0xDEADBEEF)
4663 if (i >= adev->usec_timeout) {
4664 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4668 amdgpu_gfx_scratch_free(adev, scratch);
4673 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4677 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4678 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4679 for (i = 0; i < adev->usec_timeout; i++) {
4680 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4684 if (i == adev->usec_timeout)
4687 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4688 WREG32(mmCP_HQD_PQ_RPTR, 0);
4689 WREG32(mmCP_HQD_PQ_WPTR, 0);
4694 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4696 struct amdgpu_device *adev = ring->adev;
4697 struct vi_mqd *mqd = ring->mqd_ptr;
4698 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4701 mqd->header = 0xC0310800;
4702 mqd->compute_pipelinestat_enable = 0x00000001;
4703 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4704 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4705 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4706 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4707 mqd->compute_misc_reserved = 0x00000003;
4708 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4709 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4710 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4711 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4712 eop_base_addr = ring->eop_gpu_addr >> 8;
4713 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4714 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4716 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4717 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4718 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4719 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4721 mqd->cp_hqd_eop_control = tmp;
4723 /* enable doorbell? */
4724 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4725 CP_HQD_PQ_DOORBELL_CONTROL,
4727 ring->use_doorbell ? 1 : 0);
4729 mqd->cp_hqd_pq_doorbell_control = tmp;
4731 /* set the pointer to the MQD */
4732 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4733 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4735 /* set MQD vmid to 0 */
4736 tmp = RREG32(mmCP_MQD_CONTROL);
4737 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4738 mqd->cp_mqd_control = tmp;
4740 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4741 hqd_gpu_addr = ring->gpu_addr >> 8;
4742 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4743 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4745 /* set up the HQD, this is similar to CP_RB0_CNTL */
4746 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4747 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4748 (order_base_2(ring->ring_size / 4) - 1));
4749 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4750 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4752 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4754 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4755 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4756 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4757 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4758 mqd->cp_hqd_pq_control = tmp;
4760 /* set the wb address whether it's enabled or not */
4761 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4762 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4763 mqd->cp_hqd_pq_rptr_report_addr_hi =
4764 upper_32_bits(wb_gpu_addr) & 0xffff;
4766 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4767 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4768 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4769 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4772 /* enable the doorbell if requested */
4773 if (ring->use_doorbell) {
4774 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4775 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4776 DOORBELL_OFFSET, ring->doorbell_index);
4778 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4780 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4781 DOORBELL_SOURCE, 0);
4782 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4786 mqd->cp_hqd_pq_doorbell_control = tmp;
4788 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4790 mqd->cp_hqd_pq_wptr = ring->wptr;
4791 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4793 /* set the vmid for the queue */
4794 mqd->cp_hqd_vmid = 0;
4796 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4797 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4798 mqd->cp_hqd_persistent_state = tmp;
4801 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4802 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4803 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4804 mqd->cp_hqd_ib_control = tmp;
4806 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4807 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4808 mqd->cp_hqd_iq_timer = tmp;
4810 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4811 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4812 mqd->cp_hqd_ctx_save_control = tmp;
4815 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4816 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4817 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4818 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4819 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4820 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4821 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4822 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4823 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4824 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4825 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4826 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4827 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4828 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4829 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4831 /* activate the queue */
4832 mqd->cp_hqd_active = 1;
4837 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4843 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4844 mqd_data = &mqd->cp_mqd_base_addr_lo;
4846 /* disable wptr polling */
4847 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4849 /* program all HQD registers */
4850 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4851 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4853 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4854 * This is safe since EOP RPTR==WPTR for any inactive HQD
4855 * on ASICs that do not support context-save.
4856 * EOP writes/reads can start anywhere in the ring.
4858 if (adev->asic_type != CHIP_TONGA) {
4859 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4860 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4861 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4864 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4865 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4867 /* activate the HQD */
4868 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4869 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4874 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4876 struct amdgpu_device *adev = ring->adev;
4877 struct vi_mqd *mqd = ring->mqd_ptr;
4878 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4880 gfx_v8_0_kiq_setting(ring);
4882 if (adev->in_gpu_reset) { /* for GPU_RESET case */
4883 /* reset MQD to a clean status */
4884 if (adev->gfx.mec.mqd_backup[mqd_idx])
4885 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4887 /* reset ring buffer */
4889 amdgpu_ring_clear_ring(ring);
4890 mutex_lock(&adev->srbm_mutex);
4891 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4892 gfx_v8_0_mqd_commit(adev, mqd);
4893 vi_srbm_select(adev, 0, 0, 0, 0);
4894 mutex_unlock(&adev->srbm_mutex);
4896 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4897 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4898 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4899 mutex_lock(&adev->srbm_mutex);
4900 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4901 gfx_v8_0_mqd_init(ring);
4902 gfx_v8_0_mqd_commit(adev, mqd);
4903 vi_srbm_select(adev, 0, 0, 0, 0);
4904 mutex_unlock(&adev->srbm_mutex);
4906 if (adev->gfx.mec.mqd_backup[mqd_idx])
4907 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4913 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4915 struct amdgpu_device *adev = ring->adev;
4916 struct vi_mqd *mqd = ring->mqd_ptr;
4917 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4919 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4920 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4921 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4922 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4923 mutex_lock(&adev->srbm_mutex);
4924 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4925 gfx_v8_0_mqd_init(ring);
4926 vi_srbm_select(adev, 0, 0, 0, 0);
4927 mutex_unlock(&adev->srbm_mutex);
4929 if (adev->gfx.mec.mqd_backup[mqd_idx])
4930 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4931 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4932 /* reset MQD to a clean status */
4933 if (adev->gfx.mec.mqd_backup[mqd_idx])
4934 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4935 /* reset ring buffer */
4937 amdgpu_ring_clear_ring(ring);
4939 amdgpu_ring_clear_ring(ring);
4944 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4946 if (adev->asic_type > CHIP_TONGA) {
4947 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4948 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4950 /* enable doorbells */
4951 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4954 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4956 struct amdgpu_ring *ring = NULL;
4959 gfx_v8_0_cp_compute_enable(adev, true);
4961 ring = &adev->gfx.kiq.ring;
4963 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4964 if (unlikely(r != 0))
4967 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4969 r = gfx_v8_0_kiq_init_queue(ring);
4970 amdgpu_bo_kunmap(ring->mqd_obj);
4971 ring->mqd_ptr = NULL;
4973 amdgpu_bo_unreserve(ring->mqd_obj);
4977 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4978 ring = &adev->gfx.compute_ring[i];
4980 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4981 if (unlikely(r != 0))
4983 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4985 r = gfx_v8_0_kcq_init_queue(ring);
4986 amdgpu_bo_kunmap(ring->mqd_obj);
4987 ring->mqd_ptr = NULL;
4989 amdgpu_bo_unreserve(ring->mqd_obj);
4994 gfx_v8_0_set_mec_doorbell_range(adev);
4996 r = gfx_v8_0_kiq_kcq_enable(adev);
5001 ring = &adev->gfx.kiq.ring;
5003 r = amdgpu_ring_test_ring(ring);
5005 ring->ready = false;
5010 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5011 ring = &adev->gfx.compute_ring[i];
5013 r = amdgpu_ring_test_ring(ring);
5015 ring->ready = false;
5022 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5026 if (!(adev->flags & AMD_IS_APU))
5027 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5029 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5030 /* legacy firmware loading */
5031 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5035 r = gfx_v8_0_cp_compute_load_microcode(adev);
5040 r = gfx_v8_0_cp_gfx_resume(adev);
5044 r = gfx_v8_0_kiq_resume(adev);
5048 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5053 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5055 gfx_v8_0_cp_gfx_enable(adev, enable);
5056 gfx_v8_0_cp_compute_enable(adev, enable);
5059 static int gfx_v8_0_hw_init(void *handle)
5062 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5064 gfx_v8_0_init_golden_registers(adev);
5065 gfx_v8_0_gpu_init(adev);
5067 r = gfx_v8_0_rlc_resume(adev);
5071 r = gfx_v8_0_cp_resume(adev);
5076 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5078 struct amdgpu_device *adev = kiq_ring->adev;
5079 uint32_t scratch, tmp = 0;
5082 r = amdgpu_gfx_scratch_get(adev, &scratch);
5084 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5087 WREG32(scratch, 0xCAFEDEAD);
5089 r = amdgpu_ring_alloc(kiq_ring, 10);
5091 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5092 amdgpu_gfx_scratch_free(adev, scratch);
5097 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5098 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5099 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5100 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5101 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5102 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5103 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5104 amdgpu_ring_write(kiq_ring, 0);
5105 amdgpu_ring_write(kiq_ring, 0);
5106 amdgpu_ring_write(kiq_ring, 0);
5107 /* write to scratch for completion */
5108 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5109 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5110 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5111 amdgpu_ring_commit(kiq_ring);
5113 for (i = 0; i < adev->usec_timeout; i++) {
5114 tmp = RREG32(scratch);
5115 if (tmp == 0xDEADBEEF)
5119 if (i >= adev->usec_timeout) {
5120 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5123 amdgpu_gfx_scratch_free(adev, scratch);
5127 static int gfx_v8_0_hw_fini(void *handle)
5129 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5132 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5133 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5135 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5137 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5139 /* disable KCQ to avoid CPC touch memory not valid anymore */
5140 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5141 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5143 if (amdgpu_sriov_vf(adev)) {
5144 pr_debug("For SRIOV client, shouldn't do anything.\n");
5147 gfx_v8_0_cp_enable(adev, false);
5148 gfx_v8_0_rlc_stop(adev);
5150 amdgpu_device_ip_set_powergating_state(adev,
5151 AMD_IP_BLOCK_TYPE_GFX,
5152 AMD_PG_STATE_UNGATE);
5157 static int gfx_v8_0_suspend(void *handle)
5159 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5160 adev->gfx.in_suspend = true;
5161 return gfx_v8_0_hw_fini(adev);
5164 static int gfx_v8_0_resume(void *handle)
5167 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5169 r = gfx_v8_0_hw_init(adev);
5170 adev->gfx.in_suspend = false;
5174 static bool gfx_v8_0_is_idle(void *handle)
5176 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5178 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5184 static int gfx_v8_0_wait_for_idle(void *handle)
5187 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5189 for (i = 0; i < adev->usec_timeout; i++) {
5190 if (gfx_v8_0_is_idle(handle))
5198 static bool gfx_v8_0_check_soft_reset(void *handle)
5200 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5201 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5205 tmp = RREG32(mmGRBM_STATUS);
5206 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5207 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5208 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5209 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5210 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5211 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5212 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5213 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5214 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5215 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5216 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5217 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5218 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5222 tmp = RREG32(mmGRBM_STATUS2);
5223 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5224 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5225 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5227 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5228 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5229 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5230 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5232 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5234 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5236 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5237 SOFT_RESET_GRBM, 1);
5241 tmp = RREG32(mmSRBM_STATUS);
5242 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5243 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5244 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5245 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5246 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5247 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5249 if (grbm_soft_reset || srbm_soft_reset) {
5250 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5251 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5254 adev->gfx.grbm_soft_reset = 0;
5255 adev->gfx.srbm_soft_reset = 0;
5260 static int gfx_v8_0_pre_soft_reset(void *handle)
5262 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5263 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5265 if ((!adev->gfx.grbm_soft_reset) &&
5266 (!adev->gfx.srbm_soft_reset))
5269 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5270 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5273 gfx_v8_0_rlc_stop(adev);
5275 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5276 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5277 /* Disable GFX parsing/prefetching */
5278 gfx_v8_0_cp_gfx_enable(adev, false);
5280 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5281 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5282 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5283 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5286 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5287 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5289 mutex_lock(&adev->srbm_mutex);
5290 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5291 gfx_v8_0_deactivate_hqd(adev, 2);
5292 vi_srbm_select(adev, 0, 0, 0, 0);
5293 mutex_unlock(&adev->srbm_mutex);
5295 /* Disable MEC parsing/prefetching */
5296 gfx_v8_0_cp_compute_enable(adev, false);
5302 static int gfx_v8_0_soft_reset(void *handle)
5304 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5305 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5308 if ((!adev->gfx.grbm_soft_reset) &&
5309 (!adev->gfx.srbm_soft_reset))
5312 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5313 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5315 if (grbm_soft_reset || srbm_soft_reset) {
5316 tmp = RREG32(mmGMCON_DEBUG);
5317 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5318 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5319 WREG32(mmGMCON_DEBUG, tmp);
5323 if (grbm_soft_reset) {
5324 tmp = RREG32(mmGRBM_SOFT_RESET);
5325 tmp |= grbm_soft_reset;
5326 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5327 WREG32(mmGRBM_SOFT_RESET, tmp);
5328 tmp = RREG32(mmGRBM_SOFT_RESET);
5332 tmp &= ~grbm_soft_reset;
5333 WREG32(mmGRBM_SOFT_RESET, tmp);
5334 tmp = RREG32(mmGRBM_SOFT_RESET);
5337 if (srbm_soft_reset) {
5338 tmp = RREG32(mmSRBM_SOFT_RESET);
5339 tmp |= srbm_soft_reset;
5340 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5341 WREG32(mmSRBM_SOFT_RESET, tmp);
5342 tmp = RREG32(mmSRBM_SOFT_RESET);
5346 tmp &= ~srbm_soft_reset;
5347 WREG32(mmSRBM_SOFT_RESET, tmp);
5348 tmp = RREG32(mmSRBM_SOFT_RESET);
5351 if (grbm_soft_reset || srbm_soft_reset) {
5352 tmp = RREG32(mmGMCON_DEBUG);
5353 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5354 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5355 WREG32(mmGMCON_DEBUG, tmp);
5358 /* Wait a little for things to settle down */
5364 static int gfx_v8_0_post_soft_reset(void *handle)
5366 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5367 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5369 if ((!adev->gfx.grbm_soft_reset) &&
5370 (!adev->gfx.srbm_soft_reset))
5373 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5374 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5376 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5377 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5378 gfx_v8_0_cp_gfx_resume(adev);
5380 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5381 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5382 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5383 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5386 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5387 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5389 mutex_lock(&adev->srbm_mutex);
5390 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5391 gfx_v8_0_deactivate_hqd(adev, 2);
5392 vi_srbm_select(adev, 0, 0, 0, 0);
5393 mutex_unlock(&adev->srbm_mutex);
5395 gfx_v8_0_kiq_resume(adev);
5397 gfx_v8_0_rlc_start(adev);
5403 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5405 * @adev: amdgpu_device pointer
5407 * Fetches a GPU clock counter snapshot.
5408 * Returns the 64 bit clock counter snapshot.
5410 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5414 mutex_lock(&adev->gfx.gpu_clock_mutex);
5415 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5416 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5417 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5418 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5422 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5424 uint32_t gds_base, uint32_t gds_size,
5425 uint32_t gws_base, uint32_t gws_size,
5426 uint32_t oa_base, uint32_t oa_size)
5428 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5429 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5431 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5432 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5434 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5435 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5438 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5439 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5440 WRITE_DATA_DST_SEL(0)));
5441 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5442 amdgpu_ring_write(ring, 0);
5443 amdgpu_ring_write(ring, gds_base);
5446 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5447 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5448 WRITE_DATA_DST_SEL(0)));
5449 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5450 amdgpu_ring_write(ring, 0);
5451 amdgpu_ring_write(ring, gds_size);
5454 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5455 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5456 WRITE_DATA_DST_SEL(0)));
5457 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5458 amdgpu_ring_write(ring, 0);
5459 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5462 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5463 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5464 WRITE_DATA_DST_SEL(0)));
5465 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5466 amdgpu_ring_write(ring, 0);
5467 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5470 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5472 WREG32(mmSQ_IND_INDEX,
5473 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5474 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5475 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5476 (SQ_IND_INDEX__FORCE_READ_MASK));
5477 return RREG32(mmSQ_IND_DATA);
5480 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5481 uint32_t wave, uint32_t thread,
5482 uint32_t regno, uint32_t num, uint32_t *out)
5484 WREG32(mmSQ_IND_INDEX,
5485 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5486 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5487 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5488 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5489 (SQ_IND_INDEX__FORCE_READ_MASK) |
5490 (SQ_IND_INDEX__AUTO_INCR_MASK));
5492 *(out++) = RREG32(mmSQ_IND_DATA);
5495 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5497 /* type 0 wave data */
5498 dst[(*no_fields)++] = 0;
5499 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5500 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5501 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5502 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5503 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5504 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5505 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5506 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5507 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5508 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5509 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5510 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5511 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5512 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5513 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5514 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5515 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5516 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5519 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5520 uint32_t wave, uint32_t start,
5521 uint32_t size, uint32_t *dst)
5524 adev, simd, wave, 0,
5525 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5529 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5530 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5531 .select_se_sh = &gfx_v8_0_select_se_sh,
5532 .read_wave_data = &gfx_v8_0_read_wave_data,
5533 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5534 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5537 static int gfx_v8_0_early_init(void *handle)
5539 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5541 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5542 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5543 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5544 gfx_v8_0_set_ring_funcs(adev);
5545 gfx_v8_0_set_irq_funcs(adev);
5546 gfx_v8_0_set_gds_init(adev);
5547 gfx_v8_0_set_rlc_funcs(adev);
5552 static int gfx_v8_0_late_init(void *handle)
5554 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5557 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5561 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5565 /* requires IBs so do in late init after IB pool is initialized */
5566 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5570 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5572 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5576 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5579 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5584 amdgpu_device_ip_set_powergating_state(adev,
5585 AMD_IP_BLOCK_TYPE_GFX,
5591 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5594 if ((adev->asic_type == CHIP_POLARIS11) ||
5595 (adev->asic_type == CHIP_POLARIS12) ||
5596 (adev->asic_type == CHIP_VEGAM))
5597 /* Send msg to SMU via Powerplay */
5598 amdgpu_device_ip_set_powergating_state(adev,
5599 AMD_IP_BLOCK_TYPE_SMC,
5601 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5603 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5606 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5609 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5612 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5615 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5618 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5621 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5624 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5627 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5629 /* Read any GFX register to wake up GFX. */
5631 RREG32(mmDB_RENDER_CONTROL);
5634 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5637 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5638 cz_enable_gfx_cg_power_gating(adev, true);
5639 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5640 cz_enable_gfx_pipeline_power_gating(adev, true);
5642 cz_enable_gfx_cg_power_gating(adev, false);
5643 cz_enable_gfx_pipeline_power_gating(adev, false);
5647 static int gfx_v8_0_set_powergating_state(void *handle,
5648 enum amd_powergating_state state)
5650 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5651 bool enable = (state == AMD_PG_STATE_GATE);
5653 if (amdgpu_sriov_vf(adev))
5656 switch (adev->asic_type) {
5660 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5661 cz_enable_sck_slow_down_on_power_up(adev, true);
5662 cz_enable_sck_slow_down_on_power_down(adev, true);
5664 cz_enable_sck_slow_down_on_power_up(adev, false);
5665 cz_enable_sck_slow_down_on_power_down(adev, false);
5667 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5668 cz_enable_cp_power_gating(adev, true);
5670 cz_enable_cp_power_gating(adev, false);
5672 cz_update_gfx_cg_power_gating(adev, enable);
5674 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5675 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5677 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5679 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5680 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5682 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5684 case CHIP_POLARIS11:
5685 case CHIP_POLARIS12:
5687 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5688 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5690 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5692 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5693 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5695 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5697 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5698 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5700 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5709 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5711 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5714 if (amdgpu_sriov_vf(adev))
5717 /* AMD_CG_SUPPORT_GFX_MGCG */
5718 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5719 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5720 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5722 /* AMD_CG_SUPPORT_GFX_CGLG */
5723 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5724 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5725 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5727 /* AMD_CG_SUPPORT_GFX_CGLS */
5728 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5729 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5731 /* AMD_CG_SUPPORT_GFX_CGTS */
5732 data = RREG32(mmCGTS_SM_CTRL_REG);
5733 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5734 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5736 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5737 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5738 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5740 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5741 data = RREG32(mmRLC_MEM_SLP_CNTL);
5742 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5743 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5745 /* AMD_CG_SUPPORT_GFX_CP_LS */
5746 data = RREG32(mmCP_MEM_SLP_CNTL);
5747 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5748 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5751 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5752 uint32_t reg_addr, uint32_t cmd)
5756 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5758 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5759 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5761 data = RREG32(mmRLC_SERDES_WR_CTRL);
5762 if (adev->asic_type == CHIP_STONEY)
5763 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5764 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5765 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5766 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5767 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5768 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5769 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5770 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5771 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5773 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5774 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5775 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5776 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5777 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5778 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5779 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5780 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5781 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5782 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5783 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5784 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5785 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5786 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5787 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5789 WREG32(mmRLC_SERDES_WR_CTRL, data);
5792 #define MSG_ENTER_RLC_SAFE_MODE 1
5793 #define MSG_EXIT_RLC_SAFE_MODE 0
5794 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5795 #define RLC_GPR_REG2__REQ__SHIFT 0
5796 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5797 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5799 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5804 data = RREG32(mmRLC_CNTL);
5805 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5808 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5809 data |= RLC_SAFE_MODE__CMD_MASK;
5810 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5811 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5812 WREG32(mmRLC_SAFE_MODE, data);
5814 for (i = 0; i < adev->usec_timeout; i++) {
5815 if ((RREG32(mmRLC_GPM_STAT) &
5816 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5817 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5818 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5819 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5824 for (i = 0; i < adev->usec_timeout; i++) {
5825 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5829 adev->gfx.rlc.in_safe_mode = true;
5833 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5838 data = RREG32(mmRLC_CNTL);
5839 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5842 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5843 if (adev->gfx.rlc.in_safe_mode) {
5844 data |= RLC_SAFE_MODE__CMD_MASK;
5845 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5846 WREG32(mmRLC_SAFE_MODE, data);
5847 adev->gfx.rlc.in_safe_mode = false;
5851 for (i = 0; i < adev->usec_timeout; i++) {
5852 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5858 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5859 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5860 .exit_safe_mode = iceland_exit_rlc_safe_mode
5863 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5866 uint32_t temp, data;
5868 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5870 /* It is disabled by HW by default */
5871 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5872 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5873 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5874 /* 1 - RLC memory Light sleep */
5875 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5877 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5878 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5881 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5882 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5883 if (adev->flags & AMD_IS_APU)
5884 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5885 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5886 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5888 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5889 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5890 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5891 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5894 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5896 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5897 gfx_v8_0_wait_for_rlc_serdes(adev);
5899 /* 5 - clear mgcg override */
5900 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5902 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5903 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5904 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5905 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5906 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5907 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5908 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5909 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5910 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5911 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5912 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5913 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5915 WREG32(mmCGTS_SM_CTRL_REG, data);
5919 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5920 gfx_v8_0_wait_for_rlc_serdes(adev);
5922 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5923 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5924 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5925 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5926 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5927 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5929 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5931 /* 2 - disable MGLS in RLC */
5932 data = RREG32(mmRLC_MEM_SLP_CNTL);
5933 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5934 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5935 WREG32(mmRLC_MEM_SLP_CNTL, data);
5938 /* 3 - disable MGLS in CP */
5939 data = RREG32(mmCP_MEM_SLP_CNTL);
5940 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5941 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5942 WREG32(mmCP_MEM_SLP_CNTL, data);
5945 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5946 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5947 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5948 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5950 WREG32(mmCGTS_SM_CTRL_REG, data);
5952 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5953 gfx_v8_0_wait_for_rlc_serdes(adev);
5955 /* 6 - set mgcg override */
5956 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5960 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5961 gfx_v8_0_wait_for_rlc_serdes(adev);
5964 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5967 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5970 uint32_t temp, temp1, data, data1;
5972 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5974 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5976 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5977 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5978 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5980 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5982 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5983 gfx_v8_0_wait_for_rlc_serdes(adev);
5985 /* 2 - clear cgcg override */
5986 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5988 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5989 gfx_v8_0_wait_for_rlc_serdes(adev);
5991 /* 3 - write cmd to set CGLS */
5992 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5994 /* 4 - enable cgcg */
5995 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5997 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5999 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6001 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6002 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6005 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6007 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6011 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6013 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6014 * Cmp_busy/GFX_Idle interrupts
6016 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6018 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6019 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6022 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6023 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6024 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6026 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6028 /* read gfx register to wake up cgcg */
6029 RREG32(mmCB_CGTT_SCLK_CTRL);
6030 RREG32(mmCB_CGTT_SCLK_CTRL);
6031 RREG32(mmCB_CGTT_SCLK_CTRL);
6032 RREG32(mmCB_CGTT_SCLK_CTRL);
6034 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6035 gfx_v8_0_wait_for_rlc_serdes(adev);
6037 /* write cmd to Set CGCG Overrride */
6038 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6040 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6041 gfx_v8_0_wait_for_rlc_serdes(adev);
6043 /* write cmd to Clear CGLS */
6044 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6046 /* disable cgcg, cgls should be disabled too. */
6047 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6048 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6050 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6051 /* enable interrupts again for PG */
6052 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6055 gfx_v8_0_wait_for_rlc_serdes(adev);
6057 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6059 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6063 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6064 * === MGCG + MGLS + TS(CG/LS) ===
6066 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6067 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6069 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6070 * === CGCG + CGLS ===
6072 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6073 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6078 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6079 enum amd_clockgating_state state)
6081 uint32_t msg_id, pp_state = 0;
6082 uint32_t pp_support_state = 0;
6084 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6085 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6086 pp_support_state = PP_STATE_SUPPORT_LS;
6087 pp_state = PP_STATE_LS;
6089 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6090 pp_support_state |= PP_STATE_SUPPORT_CG;
6091 pp_state |= PP_STATE_CG;
6093 if (state == AMD_CG_STATE_UNGATE)
6096 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6100 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6101 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6104 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6105 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6106 pp_support_state = PP_STATE_SUPPORT_LS;
6107 pp_state = PP_STATE_LS;
6110 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6111 pp_support_state |= PP_STATE_SUPPORT_CG;
6112 pp_state |= PP_STATE_CG;
6115 if (state == AMD_CG_STATE_UNGATE)
6118 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6122 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6123 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6129 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6130 enum amd_clockgating_state state)
6133 uint32_t msg_id, pp_state = 0;
6134 uint32_t pp_support_state = 0;
6136 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6137 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6138 pp_support_state = PP_STATE_SUPPORT_LS;
6139 pp_state = PP_STATE_LS;
6141 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6142 pp_support_state |= PP_STATE_SUPPORT_CG;
6143 pp_state |= PP_STATE_CG;
6145 if (state == AMD_CG_STATE_UNGATE)
6148 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6152 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6153 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6156 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6157 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6158 pp_support_state = PP_STATE_SUPPORT_LS;
6159 pp_state = PP_STATE_LS;
6161 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6162 pp_support_state |= PP_STATE_SUPPORT_CG;
6163 pp_state |= PP_STATE_CG;
6165 if (state == AMD_CG_STATE_UNGATE)
6168 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6172 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6173 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6176 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6177 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6178 pp_support_state = PP_STATE_SUPPORT_LS;
6179 pp_state = PP_STATE_LS;
6182 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6183 pp_support_state |= PP_STATE_SUPPORT_CG;
6184 pp_state |= PP_STATE_CG;
6187 if (state == AMD_CG_STATE_UNGATE)
6190 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6194 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6195 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6198 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6199 pp_support_state = PP_STATE_SUPPORT_LS;
6201 if (state == AMD_CG_STATE_UNGATE)
6204 pp_state = PP_STATE_LS;
6206 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6210 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6211 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6214 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6215 pp_support_state = PP_STATE_SUPPORT_LS;
6217 if (state == AMD_CG_STATE_UNGATE)
6220 pp_state = PP_STATE_LS;
6221 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6225 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6226 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6232 static int gfx_v8_0_set_clockgating_state(void *handle,
6233 enum amd_clockgating_state state)
6235 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6237 if (amdgpu_sriov_vf(adev))
6240 switch (adev->asic_type) {
6244 gfx_v8_0_update_gfx_clock_gating(adev,
6245 state == AMD_CG_STATE_GATE);
6248 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6250 case CHIP_POLARIS10:
6251 case CHIP_POLARIS11:
6252 case CHIP_POLARIS12:
6254 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6262 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6264 return ring->adev->wb.wb[ring->rptr_offs];
6267 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6269 struct amdgpu_device *adev = ring->adev;
6271 if (ring->use_doorbell)
6272 /* XXX check if swapping is necessary on BE */
6273 return ring->adev->wb.wb[ring->wptr_offs];
6275 return RREG32(mmCP_RB0_WPTR);
6278 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6280 struct amdgpu_device *adev = ring->adev;
6282 if (ring->use_doorbell) {
6283 /* XXX check if swapping is necessary on BE */
6284 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6285 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6287 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6288 (void)RREG32(mmCP_RB0_WPTR);
6292 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6294 u32 ref_and_mask, reg_mem_engine;
6296 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6297 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6300 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6303 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6310 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6311 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6314 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6315 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6316 WAIT_REG_MEM_FUNCTION(3) | /* == */
6318 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6319 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6320 amdgpu_ring_write(ring, ref_and_mask);
6321 amdgpu_ring_write(ring, ref_and_mask);
6322 amdgpu_ring_write(ring, 0x20); /* poll interval */
6325 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6327 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6328 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6331 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6332 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6336 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6337 struct amdgpu_ib *ib,
6338 unsigned vmid, bool ctx_switch)
6340 u32 header, control = 0;
6342 if (ib->flags & AMDGPU_IB_FLAG_CE)
6343 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6345 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6347 control |= ib->length_dw | (vmid << 24);
6349 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6350 control |= INDIRECT_BUFFER_PRE_ENB(1);
6352 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6353 gfx_v8_0_ring_emit_de_meta(ring);
6356 amdgpu_ring_write(ring, header);
6357 amdgpu_ring_write(ring,
6361 (ib->gpu_addr & 0xFFFFFFFC));
6362 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6363 amdgpu_ring_write(ring, control);
6366 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6367 struct amdgpu_ib *ib,
6368 unsigned vmid, bool ctx_switch)
6370 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6372 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6373 amdgpu_ring_write(ring,
6377 (ib->gpu_addr & 0xFFFFFFFC));
6378 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6379 amdgpu_ring_write(ring, control);
6382 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6383 u64 seq, unsigned flags)
6385 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6386 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6388 /* EVENT_WRITE_EOP - flush caches, send int */
6389 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6390 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6392 EOP_TC_WB_ACTION_EN |
6393 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6395 amdgpu_ring_write(ring, addr & 0xfffffffc);
6396 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6397 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6398 amdgpu_ring_write(ring, lower_32_bits(seq));
6399 amdgpu_ring_write(ring, upper_32_bits(seq));
6403 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6405 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6406 uint32_t seq = ring->fence_drv.sync_seq;
6407 uint64_t addr = ring->fence_drv.gpu_addr;
6409 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6410 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6411 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6412 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6413 amdgpu_ring_write(ring, addr & 0xfffffffc);
6414 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6415 amdgpu_ring_write(ring, seq);
6416 amdgpu_ring_write(ring, 0xffffffff);
6417 amdgpu_ring_write(ring, 4); /* poll interval */
6420 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6421 unsigned vmid, uint64_t pd_addr)
6423 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6425 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6427 /* wait for the invalidate to complete */
6428 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6429 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6430 WAIT_REG_MEM_FUNCTION(0) | /* always */
6431 WAIT_REG_MEM_ENGINE(0))); /* me */
6432 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6433 amdgpu_ring_write(ring, 0);
6434 amdgpu_ring_write(ring, 0); /* ref */
6435 amdgpu_ring_write(ring, 0); /* mask */
6436 amdgpu_ring_write(ring, 0x20); /* poll interval */
6438 /* compute doesn't have PFP */
6440 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6441 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6442 amdgpu_ring_write(ring, 0x0);
6446 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6448 return ring->adev->wb.wb[ring->wptr_offs];
6451 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6453 struct amdgpu_device *adev = ring->adev;
6455 /* XXX check if swapping is necessary on BE */
6456 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6457 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6460 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6463 struct amdgpu_device *adev = ring->adev;
6464 int pipe_num, tmp, reg;
6465 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6467 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6469 /* first me only has 2 entries, GFX and HP3D */
6473 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6475 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6479 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6480 struct amdgpu_ring *ring,
6485 struct amdgpu_ring *iring;
6487 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6488 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6490 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6492 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6494 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6495 /* Clear all reservations - everyone reacquires all resources */
6496 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6497 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6500 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6501 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6504 /* Lower all pipes without a current reservation */
6505 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6506 iring = &adev->gfx.gfx_ring[i];
6507 pipe = amdgpu_gfx_queue_to_bit(adev,
6511 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6512 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6515 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6516 iring = &adev->gfx.compute_ring[i];
6517 pipe = amdgpu_gfx_queue_to_bit(adev,
6521 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6522 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6526 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6529 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6530 struct amdgpu_ring *ring,
6533 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6534 uint32_t queue_priority = acquire ? 0xf : 0x0;
6536 mutex_lock(&adev->srbm_mutex);
6537 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6539 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6540 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6542 vi_srbm_select(adev, 0, 0, 0, 0);
6543 mutex_unlock(&adev->srbm_mutex);
6545 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6546 enum drm_sched_priority priority)
6548 struct amdgpu_device *adev = ring->adev;
6549 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6551 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6554 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6555 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6558 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6562 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6563 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6565 /* RELEASE_MEM - flush caches, send int */
6566 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6567 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6569 EOP_TC_WB_ACTION_EN |
6570 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6572 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6573 amdgpu_ring_write(ring, addr & 0xfffffffc);
6574 amdgpu_ring_write(ring, upper_32_bits(addr));
6575 amdgpu_ring_write(ring, lower_32_bits(seq));
6576 amdgpu_ring_write(ring, upper_32_bits(seq));
6579 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6580 u64 seq, unsigned int flags)
6582 /* we only allocate 32bit for each seq wb address */
6583 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6585 /* write fence seq to the "addr" */
6586 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6587 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6588 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6589 amdgpu_ring_write(ring, lower_32_bits(addr));
6590 amdgpu_ring_write(ring, upper_32_bits(addr));
6591 amdgpu_ring_write(ring, lower_32_bits(seq));
6593 if (flags & AMDGPU_FENCE_FLAG_INT) {
6594 /* set register to trigger INT */
6595 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6596 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6597 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6598 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6599 amdgpu_ring_write(ring, 0);
6600 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6604 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6606 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6607 amdgpu_ring_write(ring, 0);
6610 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6614 if (amdgpu_sriov_vf(ring->adev))
6615 gfx_v8_0_ring_emit_ce_meta(ring);
6617 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6618 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6619 gfx_v8_0_ring_emit_vgt_flush(ring);
6620 /* set load_global_config & load_global_uconfig */
6622 /* set load_cs_sh_regs */
6624 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6627 /* set load_ce_ram if preamble presented */
6628 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6631 /* still load_ce_ram if this is the first time preamble presented
6632 * although there is no context switch happens.
6634 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6638 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6639 amdgpu_ring_write(ring, dw2);
6640 amdgpu_ring_write(ring, 0);
6643 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6647 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6648 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6649 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6650 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6651 ret = ring->wptr & ring->buf_mask;
6652 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6656 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6660 BUG_ON(offset > ring->buf_mask);
6661 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6663 cur = (ring->wptr & ring->buf_mask) - 1;
6664 if (likely(cur > offset))
6665 ring->ring[offset] = cur - offset;
6667 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6670 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6672 struct amdgpu_device *adev = ring->adev;
6674 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6675 amdgpu_ring_write(ring, 0 | /* src: register*/
6676 (5 << 8) | /* dst: memory */
6677 (1 << 20)); /* write confirm */
6678 amdgpu_ring_write(ring, reg);
6679 amdgpu_ring_write(ring, 0);
6680 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6681 adev->virt.reg_val_offs * 4));
6682 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6683 adev->virt.reg_val_offs * 4));
6686 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6691 switch (ring->funcs->type) {
6692 case AMDGPU_RING_TYPE_GFX:
6693 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6695 case AMDGPU_RING_TYPE_KIQ:
6696 cmd = 1 << 16; /* no inc addr */
6703 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6704 amdgpu_ring_write(ring, cmd);
6705 amdgpu_ring_write(ring, reg);
6706 amdgpu_ring_write(ring, 0);
6707 amdgpu_ring_write(ring, val);
6710 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6711 enum amdgpu_interrupt_state state)
6713 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6714 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6717 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6719 enum amdgpu_interrupt_state state)
6721 u32 mec_int_cntl, mec_int_cntl_reg;
6724 * amdgpu controls only the first MEC. That's why this function only
6725 * handles the setting of interrupts for this specific MEC. All other
6726 * pipes' interrupts are set by amdkfd.
6732 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6735 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6738 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6741 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6744 DRM_DEBUG("invalid pipe %d\n", pipe);
6748 DRM_DEBUG("invalid me %d\n", me);
6753 case AMDGPU_IRQ_STATE_DISABLE:
6754 mec_int_cntl = RREG32(mec_int_cntl_reg);
6755 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6756 WREG32(mec_int_cntl_reg, mec_int_cntl);
6758 case AMDGPU_IRQ_STATE_ENABLE:
6759 mec_int_cntl = RREG32(mec_int_cntl_reg);
6760 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6761 WREG32(mec_int_cntl_reg, mec_int_cntl);
6768 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6769 struct amdgpu_irq_src *source,
6771 enum amdgpu_interrupt_state state)
6773 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6774 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6779 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6780 struct amdgpu_irq_src *source,
6782 enum amdgpu_interrupt_state state)
6784 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6785 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6790 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6791 struct amdgpu_irq_src *src,
6793 enum amdgpu_interrupt_state state)
6796 case AMDGPU_CP_IRQ_GFX_EOP:
6797 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6799 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6800 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6802 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6803 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6805 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6806 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6808 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6809 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6811 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6812 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6814 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6815 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6817 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6818 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6820 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6821 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6829 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6830 struct amdgpu_irq_src *source,
6832 enum amdgpu_interrupt_state state)
6837 case AMDGPU_IRQ_STATE_DISABLE:
6841 case AMDGPU_IRQ_STATE_ENABLE:
6849 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6850 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6851 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6852 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6853 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6854 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6856 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6858 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6860 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6862 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6864 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6866 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6868 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6874 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6875 struct amdgpu_irq_src *source,
6877 enum amdgpu_interrupt_state state)
6882 case AMDGPU_IRQ_STATE_DISABLE:
6886 case AMDGPU_IRQ_STATE_ENABLE:
6894 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6900 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6901 struct amdgpu_irq_src *source,
6902 struct amdgpu_iv_entry *entry)
6905 u8 me_id, pipe_id, queue_id;
6906 struct amdgpu_ring *ring;
6908 DRM_DEBUG("IH: CP EOP\n");
6909 me_id = (entry->ring_id & 0x0c) >> 2;
6910 pipe_id = (entry->ring_id & 0x03) >> 0;
6911 queue_id = (entry->ring_id & 0x70) >> 4;
6915 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6919 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6920 ring = &adev->gfx.compute_ring[i];
6921 /* Per-queue interrupt is supported for MEC starting from VI.
6922 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6924 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6925 amdgpu_fence_process(ring);
6932 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6933 struct amdgpu_irq_src *source,
6934 struct amdgpu_iv_entry *entry)
6936 DRM_ERROR("Illegal register access in command stream\n");
6937 schedule_work(&adev->reset_work);
6941 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6942 struct amdgpu_irq_src *source,
6943 struct amdgpu_iv_entry *entry)
6945 DRM_ERROR("Illegal instruction in command stream\n");
6946 schedule_work(&adev->reset_work);
6950 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6951 struct amdgpu_irq_src *source,
6952 struct amdgpu_iv_entry *entry)
6954 DRM_ERROR("CP EDC/ECC error detected.");
6958 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6959 struct amdgpu_irq_src *source,
6960 struct amdgpu_iv_entry *entry)
6965 /* Parse all fields according to SQ_INTERRUPT* registers */
6966 enc = (entry->src_data[0] >> 26) & 0x3;
6967 se_id = (entry->src_data[0] >> 24) & 0x3;
6971 DRM_INFO("SQ general purpose intr detected:"
6972 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6973 "host_cmd_overflow %d, cmd_timestamp %d,"
6974 "reg_timestamp %d, thread_trace_buff_full %d,"
6975 "wlt %d, thread_trace %d.\n",
6977 (entry->src_data[0] >> 7) & 0x1,
6978 (entry->src_data[0] >> 6) & 0x1,
6979 (entry->src_data[0] >> 5) & 0x1,
6980 (entry->src_data[0] >> 4) & 0x1,
6981 (entry->src_data[0] >> 3) & 0x1,
6982 (entry->src_data[0] >> 2) & 0x1,
6983 (entry->src_data[0] >> 1) & 0x1,
6984 entry->src_data[0] & 0x1
6991 sprintf(type, "instruction intr");
6993 sprintf(type, "EDC/ECC error");
6997 "se_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d\n",
6999 (entry->src_data[0] >> 20) & 0xf,
7000 (entry->src_data[0] >> 18) & 0x3,
7001 (entry->src_data[0] >> 14) & 0xf,
7002 (entry->src_data[0] >> 10) & 0xf
7006 DRM_ERROR("SQ invalid encoding type\n.");
7013 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7014 struct amdgpu_irq_src *src,
7016 enum amdgpu_interrupt_state state)
7018 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7021 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7022 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7023 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7025 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7027 GENERIC2_INT_ENABLE,
7028 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7030 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7032 GENERIC2_INT_ENABLE,
7033 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7036 BUG(); /* kiq only support GENERIC2_INT now */
7042 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7043 struct amdgpu_irq_src *source,
7044 struct amdgpu_iv_entry *entry)
7046 u8 me_id, pipe_id, queue_id;
7047 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7049 me_id = (entry->ring_id & 0x0c) >> 2;
7050 pipe_id = (entry->ring_id & 0x03) >> 0;
7051 queue_id = (entry->ring_id & 0x70) >> 4;
7052 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7053 me_id, pipe_id, queue_id);
7055 amdgpu_fence_process(ring);
7059 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7061 .early_init = gfx_v8_0_early_init,
7062 .late_init = gfx_v8_0_late_init,
7063 .sw_init = gfx_v8_0_sw_init,
7064 .sw_fini = gfx_v8_0_sw_fini,
7065 .hw_init = gfx_v8_0_hw_init,
7066 .hw_fini = gfx_v8_0_hw_fini,
7067 .suspend = gfx_v8_0_suspend,
7068 .resume = gfx_v8_0_resume,
7069 .is_idle = gfx_v8_0_is_idle,
7070 .wait_for_idle = gfx_v8_0_wait_for_idle,
7071 .check_soft_reset = gfx_v8_0_check_soft_reset,
7072 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7073 .soft_reset = gfx_v8_0_soft_reset,
7074 .post_soft_reset = gfx_v8_0_post_soft_reset,
7075 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7076 .set_powergating_state = gfx_v8_0_set_powergating_state,
7077 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7080 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7081 .type = AMDGPU_RING_TYPE_GFX,
7083 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7084 .support_64bit_ptrs = false,
7085 .get_rptr = gfx_v8_0_ring_get_rptr,
7086 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7087 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7088 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7090 7 + /* PIPELINE_SYNC */
7091 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7092 8 + /* FENCE for VM_FLUSH */
7093 20 + /* GDS switch */
7094 4 + /* double SWITCH_BUFFER,
7095 the first COND_EXEC jump to the place just
7096 prior to this double SWITCH_BUFFER */
7104 8 + 8 + /* FENCE x2 */
7105 2, /* SWITCH_BUFFER */
7106 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7107 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7108 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7109 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7110 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7111 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7112 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7113 .test_ring = gfx_v8_0_ring_test_ring,
7114 .test_ib = gfx_v8_0_ring_test_ib,
7115 .insert_nop = amdgpu_ring_insert_nop,
7116 .pad_ib = amdgpu_ring_generic_pad_ib,
7117 .emit_switch_buffer = gfx_v8_ring_emit_sb,
7118 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7119 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7120 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7121 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7124 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7125 .type = AMDGPU_RING_TYPE_COMPUTE,
7127 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7128 .support_64bit_ptrs = false,
7129 .get_rptr = gfx_v8_0_ring_get_rptr,
7130 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7131 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7133 20 + /* gfx_v8_0_ring_emit_gds_switch */
7134 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7135 5 + /* hdp_invalidate */
7136 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7137 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7138 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7139 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7140 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7141 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7142 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7143 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7144 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7145 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7146 .test_ring = gfx_v8_0_ring_test_ring,
7147 .test_ib = gfx_v8_0_ring_test_ib,
7148 .insert_nop = amdgpu_ring_insert_nop,
7149 .pad_ib = amdgpu_ring_generic_pad_ib,
7150 .set_priority = gfx_v8_0_ring_set_priority_compute,
7151 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7154 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7155 .type = AMDGPU_RING_TYPE_KIQ,
7157 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7158 .support_64bit_ptrs = false,
7159 .get_rptr = gfx_v8_0_ring_get_rptr,
7160 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7161 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7163 20 + /* gfx_v8_0_ring_emit_gds_switch */
7164 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7165 5 + /* hdp_invalidate */
7166 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7167 17 + /* gfx_v8_0_ring_emit_vm_flush */
7168 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7169 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7170 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7171 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7172 .test_ring = gfx_v8_0_ring_test_ring,
7173 .test_ib = gfx_v8_0_ring_test_ib,
7174 .insert_nop = amdgpu_ring_insert_nop,
7175 .pad_ib = amdgpu_ring_generic_pad_ib,
7176 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7177 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7180 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7184 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7186 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7187 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7189 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7190 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7193 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7194 .set = gfx_v8_0_set_eop_interrupt_state,
7195 .process = gfx_v8_0_eop_irq,
7198 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7199 .set = gfx_v8_0_set_priv_reg_fault_state,
7200 .process = gfx_v8_0_priv_reg_irq,
7203 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7204 .set = gfx_v8_0_set_priv_inst_fault_state,
7205 .process = gfx_v8_0_priv_inst_irq,
7208 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7209 .set = gfx_v8_0_kiq_set_interrupt_state,
7210 .process = gfx_v8_0_kiq_irq,
7213 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7214 .set = gfx_v8_0_set_cp_ecc_int_state,
7215 .process = gfx_v8_0_cp_ecc_error_irq,
7218 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7219 .set = gfx_v8_0_set_sq_int_state,
7220 .process = gfx_v8_0_sq_irq,
7223 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7225 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7226 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7228 adev->gfx.priv_reg_irq.num_types = 1;
7229 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7231 adev->gfx.priv_inst_irq.num_types = 1;
7232 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7234 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7235 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7237 adev->gfx.cp_ecc_error_irq.num_types = 1;
7238 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7240 adev->gfx.sq_irq.num_types = 1;
7241 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7244 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7246 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7249 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7251 /* init asci gds info */
7252 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7253 adev->gds.gws.total_size = 64;
7254 adev->gds.oa.total_size = 16;
7256 if (adev->gds.mem.total_size == 64 * 1024) {
7257 adev->gds.mem.gfx_partition_size = 4096;
7258 adev->gds.mem.cs_partition_size = 4096;
7260 adev->gds.gws.gfx_partition_size = 4;
7261 adev->gds.gws.cs_partition_size = 4;
7263 adev->gds.oa.gfx_partition_size = 4;
7264 adev->gds.oa.cs_partition_size = 1;
7266 adev->gds.mem.gfx_partition_size = 1024;
7267 adev->gds.mem.cs_partition_size = 1024;
7269 adev->gds.gws.gfx_partition_size = 16;
7270 adev->gds.gws.cs_partition_size = 16;
7272 adev->gds.oa.gfx_partition_size = 4;
7273 adev->gds.oa.cs_partition_size = 4;
7277 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7285 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7286 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7288 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7291 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7295 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7296 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7298 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7300 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7303 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7305 int i, j, k, counter, active_cu_number = 0;
7306 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7307 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7308 unsigned disable_masks[4 * 2];
7311 memset(cu_info, 0, sizeof(*cu_info));
7313 if (adev->flags & AMD_IS_APU)
7316 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7318 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7320 mutex_lock(&adev->grbm_idx_mutex);
7321 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7322 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7326 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7328 gfx_v8_0_set_user_cu_inactive_bitmap(
7329 adev, disable_masks[i * 2 + j]);
7330 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7331 cu_info->bitmap[i][j] = bitmap;
7333 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7334 if (bitmap & mask) {
7335 if (counter < ao_cu_num)
7341 active_cu_number += counter;
7343 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7344 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7347 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7348 mutex_unlock(&adev->grbm_idx_mutex);
7350 cu_info->number = active_cu_number;
7351 cu_info->ao_cu_mask = ao_cu_mask;
7352 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7353 cu_info->max_waves_per_simd = 10;
7354 cu_info->max_scratch_slots_per_cu = 32;
7355 cu_info->wave_front_size = 64;
7356 cu_info->lds_size = 64;
7359 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7361 .type = AMD_IP_BLOCK_TYPE_GFX,
7365 .funcs = &gfx_v8_0_ip_funcs,
7368 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7370 .type = AMD_IP_BLOCK_TYPE_GFX,
7374 .funcs = &gfx_v8_0_ip_funcs,
7377 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7379 uint64_t ce_payload_addr;
7382 struct vi_ce_ib_state regular;
7383 struct vi_ce_ib_state_chained_ib chained;
7386 if (ring->adev->virt.chained_ib_support) {
7387 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7388 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7389 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7391 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7392 offsetof(struct vi_gfx_meta_data, ce_payload);
7393 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7396 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7397 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7398 WRITE_DATA_DST_SEL(8) |
7400 WRITE_DATA_CACHE_POLICY(0));
7401 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7402 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7403 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7406 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7408 uint64_t de_payload_addr, gds_addr, csa_addr;
7411 struct vi_de_ib_state regular;
7412 struct vi_de_ib_state_chained_ib chained;
7415 csa_addr = amdgpu_csa_vaddr(ring->adev);
7416 gds_addr = csa_addr + 4096;
7417 if (ring->adev->virt.chained_ib_support) {
7418 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7419 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7420 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7421 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7423 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7424 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7425 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7426 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7429 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7430 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7431 WRITE_DATA_DST_SEL(8) |
7433 WRITE_DATA_CACHE_POLICY(0));
7434 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7435 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7436 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);