2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
29 #include "vi_structs.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #include "ivsrcid/ivsrcid_vislands30.h"
56 #define GFX8_NUM_GFX_RINGS 1
57 #define GFX8_MEC_HPD_SIZE 4096
59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
64 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
82 #define SET_BPM_SERDES_CMD 1
83 #define CLE_BPM_SERDES_CMD 0
85 /* BPM Register Address*/
87 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
88 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
89 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
90 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
91 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
95 #define RLC_FormatDirectRegListLength 14
97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
175 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
176 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
177 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
178 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
179 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
180 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
181 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
182 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
183 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
184 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
185 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
186 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
187 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
188 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
189 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
190 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
193 static const u32 golden_settings_tonga_a11[] =
195 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
196 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
197 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
198 mmGB_GPU_ID, 0x0000000f, 0x00000000,
199 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
200 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
201 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
202 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
203 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
204 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
205 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
206 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
207 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
208 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
209 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
210 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
213 static const u32 tonga_golden_common_all[] =
215 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
216 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
217 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
218 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
219 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
220 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
221 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
222 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
225 static const u32 tonga_mgcg_cgcg_init[] =
227 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
228 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
229 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
232 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
234 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
236 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
237 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
238 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
239 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
240 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
243 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
244 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
245 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
246 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
247 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
248 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
249 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
250 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
251 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
252 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
253 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
254 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
255 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
256 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
257 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
258 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
261 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
266 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
271 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
274 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
275 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
276 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
277 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
278 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
279 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
280 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
281 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
282 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
283 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
284 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
285 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
286 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
287 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
288 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
289 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
290 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
291 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
292 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
293 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
294 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
295 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
296 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
297 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
298 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
299 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
300 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
301 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
304 static const u32 golden_settings_vegam_a11[] =
306 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
307 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
308 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
309 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
310 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
311 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
312 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
313 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
314 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
315 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
316 mmSQ_CONFIG, 0x07f80000, 0x01180000,
317 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
318 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
319 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
320 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
321 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
322 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
325 static const u32 vegam_golden_common_all[] =
327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
335 static const u32 golden_settings_polaris11_a11[] =
337 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
338 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
339 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
340 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
341 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
342 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
343 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
344 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
345 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
347 mmSQ_CONFIG, 0x07f80000, 0x01180000,
348 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
349 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
350 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
351 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
352 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
353 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
356 static const u32 polaris11_golden_common_all[] =
358 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
359 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
360 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
361 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
362 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
363 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
366 static const u32 golden_settings_polaris10_a11[] =
368 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
369 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
370 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
371 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
372 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
373 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
374 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
375 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
376 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
377 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
378 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
379 mmSQ_CONFIG, 0x07f80000, 0x07180000,
380 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
381 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
382 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
383 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
384 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
387 static const u32 polaris10_golden_common_all[] =
389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
391 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
392 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
393 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
394 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
395 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
396 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
399 static const u32 fiji_golden_common_all[] =
401 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
402 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
403 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
404 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
405 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
406 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
407 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
408 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
409 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
410 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
413 static const u32 golden_settings_fiji_a10[] =
415 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
416 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
420 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
421 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
422 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
423 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
424 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
425 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
428 static const u32 fiji_mgcg_cgcg_init[] =
430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
462 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
463 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
464 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
467 static const u32 golden_settings_iceland_a11[] =
469 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
470 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
471 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
472 mmGB_GPU_ID, 0x0000000f, 0x00000000,
473 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
474 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
475 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
476 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
477 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
478 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
479 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
480 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
481 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
482 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
483 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
484 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
487 static const u32 iceland_golden_common_all[] =
489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
491 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
492 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
493 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
494 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
495 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
496 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
499 static const u32 iceland_mgcg_cgcg_init[] =
501 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
502 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
503 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
504 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
505 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
506 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
507 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
508 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
509 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
510 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
511 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
512 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
514 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
516 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
517 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
518 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
519 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
520 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
521 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
522 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
523 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
524 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
525 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
526 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
527 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
528 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
530 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
531 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
532 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
533 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
534 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
535 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
536 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
537 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
538 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
539 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
540 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
541 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
542 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
543 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
544 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
545 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
546 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
547 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
548 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
549 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
550 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
551 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
552 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
553 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
554 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
555 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
556 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
557 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
560 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
563 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
564 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
567 static const u32 cz_golden_settings_a11[] =
569 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
570 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
571 mmGB_GPU_ID, 0x0000000f, 0x00000000,
572 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
573 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
574 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
575 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
576 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
577 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
578 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
579 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
580 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
583 static const u32 cz_golden_common_all[] =
585 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
586 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
587 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
588 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
589 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
590 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
591 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
592 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
595 static const u32 cz_mgcg_cgcg_init[] =
597 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
598 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
599 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
600 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
601 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
602 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
603 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
604 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
606 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
608 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
609 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
610 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
612 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
613 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
614 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
615 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
616 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
617 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
618 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
619 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
620 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
621 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
622 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
623 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
624 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
625 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
626 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
627 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
628 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
629 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
630 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
631 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
632 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
633 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
634 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
635 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
636 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
637 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
638 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
639 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
640 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
641 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
642 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
643 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
644 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
645 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
646 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
647 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
648 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
649 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
650 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
651 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
652 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
653 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
654 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
655 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
656 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
657 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
658 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
659 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
660 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
661 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
662 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
663 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
664 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
665 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
666 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
667 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
668 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
669 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
670 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
671 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
674 static const u32 stoney_golden_settings_a11[] =
676 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
677 mmGB_GPU_ID, 0x0000000f, 0x00000000,
678 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
679 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
680 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
681 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
682 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
683 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
684 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
685 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
688 static const u32 stoney_golden_common_all[] =
690 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
691 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
692 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
693 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
694 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
695 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
696 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
697 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
700 static const u32 stoney_mgcg_cgcg_init[] =
702 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
703 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
704 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
705 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
706 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 static const char * const sq_edc_source_names[] = {
711 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
712 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
713 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
714 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
715 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
716 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
717 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
731 switch (adev->asic_type) {
733 amdgpu_device_program_register_sequence(adev,
734 iceland_mgcg_cgcg_init,
735 ARRAY_SIZE(iceland_mgcg_cgcg_init));
736 amdgpu_device_program_register_sequence(adev,
737 golden_settings_iceland_a11,
738 ARRAY_SIZE(golden_settings_iceland_a11));
739 amdgpu_device_program_register_sequence(adev,
740 iceland_golden_common_all,
741 ARRAY_SIZE(iceland_golden_common_all));
744 amdgpu_device_program_register_sequence(adev,
746 ARRAY_SIZE(fiji_mgcg_cgcg_init));
747 amdgpu_device_program_register_sequence(adev,
748 golden_settings_fiji_a10,
749 ARRAY_SIZE(golden_settings_fiji_a10));
750 amdgpu_device_program_register_sequence(adev,
751 fiji_golden_common_all,
752 ARRAY_SIZE(fiji_golden_common_all));
756 amdgpu_device_program_register_sequence(adev,
757 tonga_mgcg_cgcg_init,
758 ARRAY_SIZE(tonga_mgcg_cgcg_init));
759 amdgpu_device_program_register_sequence(adev,
760 golden_settings_tonga_a11,
761 ARRAY_SIZE(golden_settings_tonga_a11));
762 amdgpu_device_program_register_sequence(adev,
763 tonga_golden_common_all,
764 ARRAY_SIZE(tonga_golden_common_all));
767 amdgpu_device_program_register_sequence(adev,
768 golden_settings_vegam_a11,
769 ARRAY_SIZE(golden_settings_vegam_a11));
770 amdgpu_device_program_register_sequence(adev,
771 vegam_golden_common_all,
772 ARRAY_SIZE(vegam_golden_common_all));
776 amdgpu_device_program_register_sequence(adev,
777 golden_settings_polaris11_a11,
778 ARRAY_SIZE(golden_settings_polaris11_a11));
779 amdgpu_device_program_register_sequence(adev,
780 polaris11_golden_common_all,
781 ARRAY_SIZE(polaris11_golden_common_all));
784 amdgpu_device_program_register_sequence(adev,
785 golden_settings_polaris10_a11,
786 ARRAY_SIZE(golden_settings_polaris10_a11));
787 amdgpu_device_program_register_sequence(adev,
788 polaris10_golden_common_all,
789 ARRAY_SIZE(polaris10_golden_common_all));
790 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
791 if (adev->pdev->revision == 0xc7 &&
792 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
793 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
794 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
795 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
796 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
800 amdgpu_device_program_register_sequence(adev,
802 ARRAY_SIZE(cz_mgcg_cgcg_init));
803 amdgpu_device_program_register_sequence(adev,
804 cz_golden_settings_a11,
805 ARRAY_SIZE(cz_golden_settings_a11));
806 amdgpu_device_program_register_sequence(adev,
807 cz_golden_common_all,
808 ARRAY_SIZE(cz_golden_common_all));
811 amdgpu_device_program_register_sequence(adev,
812 stoney_mgcg_cgcg_init,
813 ARRAY_SIZE(stoney_mgcg_cgcg_init));
814 amdgpu_device_program_register_sequence(adev,
815 stoney_golden_settings_a11,
816 ARRAY_SIZE(stoney_golden_settings_a11));
817 amdgpu_device_program_register_sequence(adev,
818 stoney_golden_common_all,
819 ARRAY_SIZE(stoney_golden_common_all));
826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
828 adev->gfx.scratch.num_reg = 8;
829 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
830 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
835 struct amdgpu_device *adev = ring->adev;
841 r = amdgpu_gfx_scratch_get(adev, &scratch);
845 WREG32(scratch, 0xCAFEDEAD);
846 r = amdgpu_ring_alloc(ring, 3);
848 goto error_free_scratch;
850 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
852 amdgpu_ring_write(ring, 0xDEADBEEF);
853 amdgpu_ring_commit(ring);
855 for (i = 0; i < adev->usec_timeout; i++) {
856 tmp = RREG32(scratch);
857 if (tmp == 0xDEADBEEF)
862 if (i >= adev->usec_timeout)
866 amdgpu_gfx_scratch_free(adev, scratch);
870 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
872 struct amdgpu_device *adev = ring->adev;
874 struct dma_fence *f = NULL;
881 r = amdgpu_device_wb_get(adev, &index);
885 gpu_addr = adev->wb.gpu_addr + (index * 4);
886 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
887 memset(&ib, 0, sizeof(ib));
888 r = amdgpu_ib_get(adev, NULL, 16, &ib);
892 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
893 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
894 ib.ptr[2] = lower_32_bits(gpu_addr);
895 ib.ptr[3] = upper_32_bits(gpu_addr);
896 ib.ptr[4] = 0xDEADBEEF;
899 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
903 r = dma_fence_wait_timeout(f, false, timeout);
911 tmp = adev->wb.wb[index];
912 if (tmp == 0xDEADBEEF)
918 amdgpu_ib_free(adev, &ib, NULL);
921 amdgpu_device_wb_free(adev, index);
926 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
928 release_firmware(adev->gfx.pfp_fw);
929 adev->gfx.pfp_fw = NULL;
930 release_firmware(adev->gfx.me_fw);
931 adev->gfx.me_fw = NULL;
932 release_firmware(adev->gfx.ce_fw);
933 adev->gfx.ce_fw = NULL;
934 release_firmware(adev->gfx.rlc_fw);
935 adev->gfx.rlc_fw = NULL;
936 release_firmware(adev->gfx.mec_fw);
937 adev->gfx.mec_fw = NULL;
938 if ((adev->asic_type != CHIP_STONEY) &&
939 (adev->asic_type != CHIP_TOPAZ))
940 release_firmware(adev->gfx.mec2_fw);
941 adev->gfx.mec2_fw = NULL;
943 kfree(adev->gfx.rlc.register_list_format);
946 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
948 const char *chip_name;
951 struct amdgpu_firmware_info *info = NULL;
952 const struct common_firmware_header *header = NULL;
953 const struct gfx_firmware_header_v1_0 *cp_hdr;
954 const struct rlc_firmware_header_v2_0 *rlc_hdr;
955 unsigned int *tmp = NULL, i;
959 switch (adev->asic_type) {
967 chip_name = "carrizo";
973 chip_name = "stoney";
976 chip_name = "polaris10";
979 chip_name = "polaris11";
982 chip_name = "polaris12";
991 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
992 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
993 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
994 if (err == -ENOENT) {
995 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
996 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
999 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1000 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1004 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1007 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1008 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1009 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1011 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1013 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1014 if (err == -ENOENT) {
1015 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1016 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1019 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1020 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1024 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1027 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1028 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1030 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1032 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1033 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1034 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1035 if (err == -ENOENT) {
1036 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1037 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1040 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1041 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1045 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1048 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1049 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1050 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1053 * Support for MCBP/Virtualization in combination with chained IBs is
1054 * formal released on feature version #46
1056 if (adev->gfx.ce_feature_version >= 46 &&
1057 adev->gfx.pfp_feature_version >= 46) {
1058 adev->virt.chained_ib_support = true;
1059 DRM_INFO("Chained IB support enabled!\n");
1061 adev->virt.chained_ib_support = false;
1063 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1064 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1067 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1068 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1069 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1070 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1072 adev->gfx.rlc.save_and_restore_offset =
1073 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1074 adev->gfx.rlc.clear_state_descriptor_offset =
1075 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1076 adev->gfx.rlc.avail_scratch_ram_locations =
1077 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1078 adev->gfx.rlc.reg_restore_list_size =
1079 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1080 adev->gfx.rlc.reg_list_format_start =
1081 le32_to_cpu(rlc_hdr->reg_list_format_start);
1082 adev->gfx.rlc.reg_list_format_separate_start =
1083 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1084 adev->gfx.rlc.starting_offsets_start =
1085 le32_to_cpu(rlc_hdr->starting_offsets_start);
1086 adev->gfx.rlc.reg_list_format_size_bytes =
1087 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1088 adev->gfx.rlc.reg_list_size_bytes =
1089 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1091 adev->gfx.rlc.register_list_format =
1092 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1093 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1095 if (!adev->gfx.rlc.register_list_format) {
1100 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1101 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1102 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1103 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1105 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1107 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1108 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1109 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1110 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1112 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1113 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1114 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1115 if (err == -ENOENT) {
1116 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1117 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1120 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1121 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1125 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1128 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1129 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1130 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1132 if ((adev->asic_type != CHIP_STONEY) &&
1133 (adev->asic_type != CHIP_TOPAZ)) {
1134 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1135 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1136 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1137 if (err == -ENOENT) {
1138 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1139 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1142 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1143 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1146 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1149 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1150 adev->gfx.mec2_fw->data;
1151 adev->gfx.mec2_fw_version =
1152 le32_to_cpu(cp_hdr->header.ucode_version);
1153 adev->gfx.mec2_feature_version =
1154 le32_to_cpu(cp_hdr->ucode_feature_version);
1157 adev->gfx.mec2_fw = NULL;
1161 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1162 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1163 info->fw = adev->gfx.pfp_fw;
1164 header = (const struct common_firmware_header *)info->fw->data;
1165 adev->firmware.fw_size +=
1166 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1168 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1169 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1170 info->fw = adev->gfx.me_fw;
1171 header = (const struct common_firmware_header *)info->fw->data;
1172 adev->firmware.fw_size +=
1173 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1175 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1176 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1177 info->fw = adev->gfx.ce_fw;
1178 header = (const struct common_firmware_header *)info->fw->data;
1179 adev->firmware.fw_size +=
1180 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1182 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1183 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1184 info->fw = adev->gfx.rlc_fw;
1185 header = (const struct common_firmware_header *)info->fw->data;
1186 adev->firmware.fw_size +=
1187 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1189 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1190 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1191 info->fw = adev->gfx.mec_fw;
1192 header = (const struct common_firmware_header *)info->fw->data;
1193 adev->firmware.fw_size +=
1194 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1196 /* we need account JT in */
1197 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1198 adev->firmware.fw_size +=
1199 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1201 if (amdgpu_sriov_vf(adev)) {
1202 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1203 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1204 info->fw = adev->gfx.mec_fw;
1205 adev->firmware.fw_size +=
1206 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1209 if (adev->gfx.mec2_fw) {
1210 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1211 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1212 info->fw = adev->gfx.mec2_fw;
1213 header = (const struct common_firmware_header *)info->fw->data;
1214 adev->firmware.fw_size +=
1215 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1221 "gfx8: Failed to load firmware \"%s\"\n",
1223 release_firmware(adev->gfx.pfp_fw);
1224 adev->gfx.pfp_fw = NULL;
1225 release_firmware(adev->gfx.me_fw);
1226 adev->gfx.me_fw = NULL;
1227 release_firmware(adev->gfx.ce_fw);
1228 adev->gfx.ce_fw = NULL;
1229 release_firmware(adev->gfx.rlc_fw);
1230 adev->gfx.rlc_fw = NULL;
1231 release_firmware(adev->gfx.mec_fw);
1232 adev->gfx.mec_fw = NULL;
1233 release_firmware(adev->gfx.mec2_fw);
1234 adev->gfx.mec2_fw = NULL;
1239 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1240 volatile u32 *buffer)
1243 const struct cs_section_def *sect = NULL;
1244 const struct cs_extent_def *ext = NULL;
1246 if (adev->gfx.rlc.cs_data == NULL)
1251 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1252 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1254 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1255 buffer[count++] = cpu_to_le32(0x80000000);
1256 buffer[count++] = cpu_to_le32(0x80000000);
1258 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1259 for (ext = sect->section; ext->extent != NULL; ++ext) {
1260 if (sect->id == SECT_CONTEXT) {
1262 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1263 buffer[count++] = cpu_to_le32(ext->reg_index -
1264 PACKET3_SET_CONTEXT_REG_START);
1265 for (i = 0; i < ext->reg_count; i++)
1266 buffer[count++] = cpu_to_le32(ext->extent[i]);
1273 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1274 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1275 PACKET3_SET_CONTEXT_REG_START);
1276 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1277 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1279 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1280 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1282 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1283 buffer[count++] = cpu_to_le32(0);
1286 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1288 const __le32 *fw_data;
1289 volatile u32 *dst_ptr;
1290 int me, i, max_me = 4;
1292 u32 table_offset, table_size;
1294 if (adev->asic_type == CHIP_CARRIZO)
1297 /* write the cp table buffer */
1298 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1299 for (me = 0; me < max_me; me++) {
1301 const struct gfx_firmware_header_v1_0 *hdr =
1302 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1303 fw_data = (const __le32 *)
1304 (adev->gfx.ce_fw->data +
1305 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1306 table_offset = le32_to_cpu(hdr->jt_offset);
1307 table_size = le32_to_cpu(hdr->jt_size);
1308 } else if (me == 1) {
1309 const struct gfx_firmware_header_v1_0 *hdr =
1310 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1311 fw_data = (const __le32 *)
1312 (adev->gfx.pfp_fw->data +
1313 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1314 table_offset = le32_to_cpu(hdr->jt_offset);
1315 table_size = le32_to_cpu(hdr->jt_size);
1316 } else if (me == 2) {
1317 const struct gfx_firmware_header_v1_0 *hdr =
1318 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1319 fw_data = (const __le32 *)
1320 (adev->gfx.me_fw->data +
1321 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1322 table_offset = le32_to_cpu(hdr->jt_offset);
1323 table_size = le32_to_cpu(hdr->jt_size);
1324 } else if (me == 3) {
1325 const struct gfx_firmware_header_v1_0 *hdr =
1326 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1327 fw_data = (const __le32 *)
1328 (adev->gfx.mec_fw->data +
1329 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1330 table_offset = le32_to_cpu(hdr->jt_offset);
1331 table_size = le32_to_cpu(hdr->jt_size);
1332 } else if (me == 4) {
1333 const struct gfx_firmware_header_v1_0 *hdr =
1334 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1335 fw_data = (const __le32 *)
1336 (adev->gfx.mec2_fw->data +
1337 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1338 table_offset = le32_to_cpu(hdr->jt_offset);
1339 table_size = le32_to_cpu(hdr->jt_size);
1342 for (i = 0; i < table_size; i ++) {
1343 dst_ptr[bo_offset + i] =
1344 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1347 bo_offset += table_size;
1351 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1353 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1354 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1357 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1359 volatile u32 *dst_ptr;
1361 const struct cs_section_def *cs_data;
1364 adev->gfx.rlc.cs_data = vi_cs_data;
1366 cs_data = adev->gfx.rlc.cs_data;
1369 /* clear state block */
1370 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1372 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1373 AMDGPU_GEM_DOMAIN_VRAM,
1374 &adev->gfx.rlc.clear_state_obj,
1375 &adev->gfx.rlc.clear_state_gpu_addr,
1376 (void **)&adev->gfx.rlc.cs_ptr);
1378 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1379 gfx_v8_0_rlc_fini(adev);
1383 /* set up the cs buffer */
1384 dst_ptr = adev->gfx.rlc.cs_ptr;
1385 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1386 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1387 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1390 if ((adev->asic_type == CHIP_CARRIZO) ||
1391 (adev->asic_type == CHIP_STONEY)) {
1392 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1393 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1394 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1395 &adev->gfx.rlc.cp_table_obj,
1396 &adev->gfx.rlc.cp_table_gpu_addr,
1397 (void **)&adev->gfx.rlc.cp_table_ptr);
1399 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1403 cz_init_cp_jump_table(adev);
1405 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1406 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1412 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1414 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1417 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1421 size_t mec_hpd_size;
1423 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1425 /* take ownership of the relevant compute queues */
1426 amdgpu_gfx_compute_queue_acquire(adev);
1428 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1430 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1431 AMDGPU_GEM_DOMAIN_VRAM,
1432 &adev->gfx.mec.hpd_eop_obj,
1433 &adev->gfx.mec.hpd_eop_gpu_addr,
1436 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1440 memset(hpd, 0, mec_hpd_size);
1442 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1443 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1448 static const u32 vgpr_init_compute_shader[] =
1450 0x7e000209, 0x7e020208,
1451 0x7e040207, 0x7e060206,
1452 0x7e080205, 0x7e0a0204,
1453 0x7e0c0203, 0x7e0e0202,
1454 0x7e100201, 0x7e120200,
1455 0x7e140209, 0x7e160208,
1456 0x7e180207, 0x7e1a0206,
1457 0x7e1c0205, 0x7e1e0204,
1458 0x7e200203, 0x7e220202,
1459 0x7e240201, 0x7e260200,
1460 0x7e280209, 0x7e2a0208,
1461 0x7e2c0207, 0x7e2e0206,
1462 0x7e300205, 0x7e320204,
1463 0x7e340203, 0x7e360202,
1464 0x7e380201, 0x7e3a0200,
1465 0x7e3c0209, 0x7e3e0208,
1466 0x7e400207, 0x7e420206,
1467 0x7e440205, 0x7e460204,
1468 0x7e480203, 0x7e4a0202,
1469 0x7e4c0201, 0x7e4e0200,
1470 0x7e500209, 0x7e520208,
1471 0x7e540207, 0x7e560206,
1472 0x7e580205, 0x7e5a0204,
1473 0x7e5c0203, 0x7e5e0202,
1474 0x7e600201, 0x7e620200,
1475 0x7e640209, 0x7e660208,
1476 0x7e680207, 0x7e6a0206,
1477 0x7e6c0205, 0x7e6e0204,
1478 0x7e700203, 0x7e720202,
1479 0x7e740201, 0x7e760200,
1480 0x7e780209, 0x7e7a0208,
1481 0x7e7c0207, 0x7e7e0206,
1482 0xbf8a0000, 0xbf810000,
1485 static const u32 sgpr_init_compute_shader[] =
1487 0xbe8a0100, 0xbe8c0102,
1488 0xbe8e0104, 0xbe900106,
1489 0xbe920108, 0xbe940100,
1490 0xbe960102, 0xbe980104,
1491 0xbe9a0106, 0xbe9c0108,
1492 0xbe9e0100, 0xbea00102,
1493 0xbea20104, 0xbea40106,
1494 0xbea60108, 0xbea80100,
1495 0xbeaa0102, 0xbeac0104,
1496 0xbeae0106, 0xbeb00108,
1497 0xbeb20100, 0xbeb40102,
1498 0xbeb60104, 0xbeb80106,
1499 0xbeba0108, 0xbebc0100,
1500 0xbebe0102, 0xbec00104,
1501 0xbec20106, 0xbec40108,
1502 0xbec60100, 0xbec80102,
1503 0xbee60004, 0xbee70005,
1504 0xbeea0006, 0xbeeb0007,
1505 0xbee80008, 0xbee90009,
1506 0xbefc0000, 0xbf8a0000,
1507 0xbf810000, 0x00000000,
1510 static const u32 vgpr_init_regs[] =
1512 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1513 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1514 mmCOMPUTE_NUM_THREAD_X, 256*4,
1515 mmCOMPUTE_NUM_THREAD_Y, 1,
1516 mmCOMPUTE_NUM_THREAD_Z, 1,
1517 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1518 mmCOMPUTE_PGM_RSRC2, 20,
1519 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1520 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1521 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1522 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1523 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1524 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1525 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1526 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1527 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1528 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1531 static const u32 sgpr1_init_regs[] =
1533 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1534 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1535 mmCOMPUTE_NUM_THREAD_X, 256*5,
1536 mmCOMPUTE_NUM_THREAD_Y, 1,
1537 mmCOMPUTE_NUM_THREAD_Z, 1,
1538 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1539 mmCOMPUTE_PGM_RSRC2, 20,
1540 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1541 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1542 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1543 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1544 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1545 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1546 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1547 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1548 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1549 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1552 static const u32 sgpr2_init_regs[] =
1554 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1555 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1556 mmCOMPUTE_NUM_THREAD_X, 256*5,
1557 mmCOMPUTE_NUM_THREAD_Y, 1,
1558 mmCOMPUTE_NUM_THREAD_Z, 1,
1559 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1560 mmCOMPUTE_PGM_RSRC2, 20,
1561 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1562 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1563 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1564 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1565 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1566 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1567 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1568 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1569 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1570 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1573 static const u32 sec_ded_counter_registers[] =
1576 mmCPC_EDC_SCRATCH_CNT,
1577 mmCPC_EDC_UCODE_CNT,
1584 mmDC_EDC_CSINVOC_CNT,
1585 mmDC_EDC_RESTORE_CNT,
1591 mmSQC_ATC_EDC_GATCL1_CNT,
1597 mmTCP_ATC_EDC_GATCL1_CNT,
1602 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1604 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1605 struct amdgpu_ib ib;
1606 struct dma_fence *f = NULL;
1609 unsigned total_size, vgpr_offset, sgpr_offset;
1612 /* only supported on CZ */
1613 if (adev->asic_type != CHIP_CARRIZO)
1616 /* bail if the compute ring is not ready */
1617 if (!ring->sched.ready)
1620 tmp = RREG32(mmGB_EDC_MODE);
1621 WREG32(mmGB_EDC_MODE, 0);
1624 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1626 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1628 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1629 total_size = ALIGN(total_size, 256);
1630 vgpr_offset = total_size;
1631 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1632 sgpr_offset = total_size;
1633 total_size += sizeof(sgpr_init_compute_shader);
1635 /* allocate an indirect buffer to put the commands in */
1636 memset(&ib, 0, sizeof(ib));
1637 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1639 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1643 /* load the compute shaders */
1644 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1645 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1647 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1648 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1650 /* init the ib length to 0 */
1654 /* write the register state for the compute dispatch */
1655 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1656 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1657 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1658 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1660 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1661 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1662 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1663 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1664 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1665 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1667 /* write dispatch packet */
1668 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1669 ib.ptr[ib.length_dw++] = 8; /* x */
1670 ib.ptr[ib.length_dw++] = 1; /* y */
1671 ib.ptr[ib.length_dw++] = 1; /* z */
1672 ib.ptr[ib.length_dw++] =
1673 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1675 /* write CS partial flush packet */
1676 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1677 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1680 /* write the register state for the compute dispatch */
1681 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1682 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1683 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1684 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1686 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1687 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1688 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1689 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1690 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1691 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1693 /* write dispatch packet */
1694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1695 ib.ptr[ib.length_dw++] = 8; /* x */
1696 ib.ptr[ib.length_dw++] = 1; /* y */
1697 ib.ptr[ib.length_dw++] = 1; /* z */
1698 ib.ptr[ib.length_dw++] =
1699 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1701 /* write CS partial flush packet */
1702 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1703 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1706 /* write the register state for the compute dispatch */
1707 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1708 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1709 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1710 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1712 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1713 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1714 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1715 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1716 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1717 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1719 /* write dispatch packet */
1720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1721 ib.ptr[ib.length_dw++] = 8; /* x */
1722 ib.ptr[ib.length_dw++] = 1; /* y */
1723 ib.ptr[ib.length_dw++] = 1; /* z */
1724 ib.ptr[ib.length_dw++] =
1725 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1727 /* write CS partial flush packet */
1728 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1729 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1731 /* shedule the ib on the ring */
1732 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1734 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1738 /* wait for the GPU to finish processing the IB */
1739 r = dma_fence_wait(f, false);
1741 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1745 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1746 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1747 WREG32(mmGB_EDC_MODE, tmp);
1749 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1750 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1751 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1754 /* read back registers to clear the counters */
1755 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1756 RREG32(sec_ded_counter_registers[i]);
1759 amdgpu_ib_free(adev, &ib, NULL);
1765 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1768 u32 mc_shared_chmap, mc_arb_ramcfg;
1769 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1773 switch (adev->asic_type) {
1775 adev->gfx.config.max_shader_engines = 1;
1776 adev->gfx.config.max_tile_pipes = 2;
1777 adev->gfx.config.max_cu_per_sh = 6;
1778 adev->gfx.config.max_sh_per_se = 1;
1779 adev->gfx.config.max_backends_per_se = 2;
1780 adev->gfx.config.max_texture_channel_caches = 2;
1781 adev->gfx.config.max_gprs = 256;
1782 adev->gfx.config.max_gs_threads = 32;
1783 adev->gfx.config.max_hw_contexts = 8;
1785 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1786 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1787 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1788 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1789 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1792 adev->gfx.config.max_shader_engines = 4;
1793 adev->gfx.config.max_tile_pipes = 16;
1794 adev->gfx.config.max_cu_per_sh = 16;
1795 adev->gfx.config.max_sh_per_se = 1;
1796 adev->gfx.config.max_backends_per_se = 4;
1797 adev->gfx.config.max_texture_channel_caches = 16;
1798 adev->gfx.config.max_gprs = 256;
1799 adev->gfx.config.max_gs_threads = 32;
1800 adev->gfx.config.max_hw_contexts = 8;
1802 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1803 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1804 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1805 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1806 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1808 case CHIP_POLARIS11:
1809 case CHIP_POLARIS12:
1810 ret = amdgpu_atombios_get_gfx_info(adev);
1813 adev->gfx.config.max_gprs = 256;
1814 adev->gfx.config.max_gs_threads = 32;
1815 adev->gfx.config.max_hw_contexts = 8;
1817 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1818 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1819 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1820 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1821 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1823 case CHIP_POLARIS10:
1825 ret = amdgpu_atombios_get_gfx_info(adev);
1828 adev->gfx.config.max_gprs = 256;
1829 adev->gfx.config.max_gs_threads = 32;
1830 adev->gfx.config.max_hw_contexts = 8;
1832 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1833 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1834 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1835 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1836 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1839 adev->gfx.config.max_shader_engines = 4;
1840 adev->gfx.config.max_tile_pipes = 8;
1841 adev->gfx.config.max_cu_per_sh = 8;
1842 adev->gfx.config.max_sh_per_se = 1;
1843 adev->gfx.config.max_backends_per_se = 2;
1844 adev->gfx.config.max_texture_channel_caches = 8;
1845 adev->gfx.config.max_gprs = 256;
1846 adev->gfx.config.max_gs_threads = 32;
1847 adev->gfx.config.max_hw_contexts = 8;
1849 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1850 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1851 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1852 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1853 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1856 adev->gfx.config.max_shader_engines = 1;
1857 adev->gfx.config.max_tile_pipes = 2;
1858 adev->gfx.config.max_sh_per_se = 1;
1859 adev->gfx.config.max_backends_per_se = 2;
1860 adev->gfx.config.max_cu_per_sh = 8;
1861 adev->gfx.config.max_texture_channel_caches = 2;
1862 adev->gfx.config.max_gprs = 256;
1863 adev->gfx.config.max_gs_threads = 32;
1864 adev->gfx.config.max_hw_contexts = 8;
1866 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1867 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1868 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1869 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1870 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1873 adev->gfx.config.max_shader_engines = 1;
1874 adev->gfx.config.max_tile_pipes = 2;
1875 adev->gfx.config.max_sh_per_se = 1;
1876 adev->gfx.config.max_backends_per_se = 1;
1877 adev->gfx.config.max_cu_per_sh = 3;
1878 adev->gfx.config.max_texture_channel_caches = 2;
1879 adev->gfx.config.max_gprs = 256;
1880 adev->gfx.config.max_gs_threads = 16;
1881 adev->gfx.config.max_hw_contexts = 8;
1883 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1884 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1885 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1886 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1887 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1890 adev->gfx.config.max_shader_engines = 2;
1891 adev->gfx.config.max_tile_pipes = 4;
1892 adev->gfx.config.max_cu_per_sh = 2;
1893 adev->gfx.config.max_sh_per_se = 1;
1894 adev->gfx.config.max_backends_per_se = 2;
1895 adev->gfx.config.max_texture_channel_caches = 4;
1896 adev->gfx.config.max_gprs = 256;
1897 adev->gfx.config.max_gs_threads = 32;
1898 adev->gfx.config.max_hw_contexts = 8;
1900 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1901 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1902 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1903 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1904 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1908 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1909 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1910 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1912 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1913 adev->gfx.config.mem_max_burst_length_bytes = 256;
1914 if (adev->flags & AMD_IS_APU) {
1915 /* Get memory bank mapping mode. */
1916 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1917 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1918 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1920 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1921 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1922 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1924 /* Validate settings in case only one DIMM installed. */
1925 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1926 dimm00_addr_map = 0;
1927 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1928 dimm01_addr_map = 0;
1929 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1930 dimm10_addr_map = 0;
1931 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1932 dimm11_addr_map = 0;
1934 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1935 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1936 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1937 adev->gfx.config.mem_row_size_in_kb = 2;
1939 adev->gfx.config.mem_row_size_in_kb = 1;
1941 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1942 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1943 if (adev->gfx.config.mem_row_size_in_kb > 4)
1944 adev->gfx.config.mem_row_size_in_kb = 4;
1947 adev->gfx.config.shader_engine_tile_size = 32;
1948 adev->gfx.config.num_gpus = 1;
1949 adev->gfx.config.multi_gpu_tile_size = 64;
1951 /* fix up row size */
1952 switch (adev->gfx.config.mem_row_size_in_kb) {
1955 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1958 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1961 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1964 adev->gfx.config.gb_addr_config = gb_addr_config;
1969 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1970 int mec, int pipe, int queue)
1974 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1976 ring = &adev->gfx.compute_ring[ring_id];
1981 ring->queue = queue;
1983 ring->ring_obj = NULL;
1984 ring->use_doorbell = true;
1985 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1986 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1987 + (ring_id * GFX8_MEC_HPD_SIZE);
1988 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1990 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1991 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1994 /* type-2 packets are deprecated on MEC, use type-3 instead */
1995 r = amdgpu_ring_init(adev, ring, 1024,
1996 &adev->gfx.eop_irq, irq_type);
2004 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2006 static int gfx_v8_0_sw_init(void *handle)
2008 int i, j, k, r, ring_id;
2009 struct amdgpu_ring *ring;
2010 struct amdgpu_kiq *kiq;
2011 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2013 switch (adev->asic_type) {
2017 case CHIP_POLARIS10:
2018 case CHIP_POLARIS11:
2019 case CHIP_POLARIS12:
2021 adev->gfx.mec.num_mec = 2;
2026 adev->gfx.mec.num_mec = 1;
2030 adev->gfx.mec.num_pipe_per_mec = 4;
2031 adev->gfx.mec.num_queue_per_pipe = 8;
2034 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
2038 /* Privileged reg */
2039 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
2040 &adev->gfx.priv_reg_irq);
2044 /* Privileged inst */
2045 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
2046 &adev->gfx.priv_inst_irq);
2050 /* Add CP EDC/ECC irq */
2051 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
2052 &adev->gfx.cp_ecc_error_irq);
2056 /* SQ interrupts. */
2057 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2060 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2064 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2066 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2068 gfx_v8_0_scratch_init(adev);
2070 r = gfx_v8_0_init_microcode(adev);
2072 DRM_ERROR("Failed to load gfx firmware!\n");
2076 r = gfx_v8_0_rlc_init(adev);
2078 DRM_ERROR("Failed to init rlc BOs!\n");
2082 r = gfx_v8_0_mec_init(adev);
2084 DRM_ERROR("Failed to init MEC BOs!\n");
2088 /* set up the gfx ring */
2089 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2090 ring = &adev->gfx.gfx_ring[i];
2091 ring->ring_obj = NULL;
2092 sprintf(ring->name, "gfx");
2093 /* no gfx doorbells on iceland */
2094 if (adev->asic_type != CHIP_TOPAZ) {
2095 ring->use_doorbell = true;
2096 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2099 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2100 AMDGPU_CP_IRQ_GFX_EOP);
2106 /* set up the compute queues - allocate horizontally across pipes */
2108 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2109 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2110 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2111 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2114 r = gfx_v8_0_compute_ring_init(adev,
2125 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2127 DRM_ERROR("Failed to init KIQ BOs!\n");
2131 kiq = &adev->gfx.kiq;
2132 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2136 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2137 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2141 adev->gfx.ce_ram_size = 0x8000;
2143 r = gfx_v8_0_gpu_early_init(adev);
2150 static int gfx_v8_0_sw_fini(void *handle)
2153 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2155 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2156 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2157 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2159 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2160 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2161 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2162 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2164 amdgpu_gfx_compute_mqd_sw_fini(adev);
2165 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2166 amdgpu_gfx_kiq_fini(adev);
2168 gfx_v8_0_mec_fini(adev);
2169 gfx_v8_0_rlc_fini(adev);
2170 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2171 &adev->gfx.rlc.clear_state_gpu_addr,
2172 (void **)&adev->gfx.rlc.cs_ptr);
2173 if ((adev->asic_type == CHIP_CARRIZO) ||
2174 (adev->asic_type == CHIP_STONEY)) {
2175 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2176 &adev->gfx.rlc.cp_table_gpu_addr,
2177 (void **)&adev->gfx.rlc.cp_table_ptr);
2179 gfx_v8_0_free_microcode(adev);
2184 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2186 uint32_t *modearray, *mod2array;
2187 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2188 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2191 modearray = adev->gfx.config.tile_mode_array;
2192 mod2array = adev->gfx.config.macrotile_mode_array;
2194 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2195 modearray[reg_offset] = 0;
2197 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2198 mod2array[reg_offset] = 0;
2200 switch (adev->asic_type) {
2202 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2203 PIPE_CONFIG(ADDR_SURF_P2) |
2204 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2205 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2206 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2207 PIPE_CONFIG(ADDR_SURF_P2) |
2208 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2210 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211 PIPE_CONFIG(ADDR_SURF_P2) |
2212 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2213 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2214 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2215 PIPE_CONFIG(ADDR_SURF_P2) |
2216 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2218 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219 PIPE_CONFIG(ADDR_SURF_P2) |
2220 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2221 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2222 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2223 PIPE_CONFIG(ADDR_SURF_P2) |
2224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2226 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2227 PIPE_CONFIG(ADDR_SURF_P2) |
2228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2230 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2231 PIPE_CONFIG(ADDR_SURF_P2));
2232 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2233 PIPE_CONFIG(ADDR_SURF_P2) |
2234 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2235 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2236 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 PIPE_CONFIG(ADDR_SURF_P2) |
2238 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2240 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2241 PIPE_CONFIG(ADDR_SURF_P2) |
2242 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2244 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2245 PIPE_CONFIG(ADDR_SURF_P2) |
2246 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2248 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2249 PIPE_CONFIG(ADDR_SURF_P2) |
2250 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2252 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2253 PIPE_CONFIG(ADDR_SURF_P2) |
2254 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2256 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2257 PIPE_CONFIG(ADDR_SURF_P2) |
2258 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2260 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2261 PIPE_CONFIG(ADDR_SURF_P2) |
2262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2264 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2265 PIPE_CONFIG(ADDR_SURF_P2) |
2266 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2268 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2269 PIPE_CONFIG(ADDR_SURF_P2) |
2270 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2272 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2273 PIPE_CONFIG(ADDR_SURF_P2) |
2274 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2276 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2277 PIPE_CONFIG(ADDR_SURF_P2) |
2278 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2280 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2281 PIPE_CONFIG(ADDR_SURF_P2) |
2282 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2284 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2285 PIPE_CONFIG(ADDR_SURF_P2) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2288 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2289 PIPE_CONFIG(ADDR_SURF_P2) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2292 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2293 PIPE_CONFIG(ADDR_SURF_P2) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2296 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2297 PIPE_CONFIG(ADDR_SURF_P2) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2300 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301 PIPE_CONFIG(ADDR_SURF_P2) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2305 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2306 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2307 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2308 NUM_BANKS(ADDR_SURF_8_BANK));
2309 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312 NUM_BANKS(ADDR_SURF_8_BANK));
2313 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2316 NUM_BANKS(ADDR_SURF_8_BANK));
2317 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2320 NUM_BANKS(ADDR_SURF_8_BANK));
2321 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2324 NUM_BANKS(ADDR_SURF_8_BANK));
2325 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2328 NUM_BANKS(ADDR_SURF_8_BANK));
2329 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2332 NUM_BANKS(ADDR_SURF_8_BANK));
2333 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2336 NUM_BANKS(ADDR_SURF_16_BANK));
2337 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2340 NUM_BANKS(ADDR_SURF_16_BANK));
2341 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2344 NUM_BANKS(ADDR_SURF_16_BANK));
2345 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2348 NUM_BANKS(ADDR_SURF_16_BANK));
2349 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2352 NUM_BANKS(ADDR_SURF_16_BANK));
2353 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2356 NUM_BANKS(ADDR_SURF_16_BANK));
2357 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2360 NUM_BANKS(ADDR_SURF_8_BANK));
2362 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2363 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2365 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2367 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2368 if (reg_offset != 7)
2369 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2374 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2377 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2378 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2381 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2386 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2389 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2390 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2391 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2393 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2394 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2395 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2397 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2398 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2401 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2402 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2403 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2404 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2407 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2408 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2409 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2420 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2440 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2442 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2445 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2448 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2452 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2453 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2456 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2457 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2460 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2461 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2464 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2465 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2466 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2468 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2469 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2472 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2476 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2477 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2480 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2492 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2497 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2500 NUM_BANKS(ADDR_SURF_8_BANK));
2501 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504 NUM_BANKS(ADDR_SURF_8_BANK));
2505 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2508 NUM_BANKS(ADDR_SURF_8_BANK));
2509 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512 NUM_BANKS(ADDR_SURF_8_BANK));
2513 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2516 NUM_BANKS(ADDR_SURF_8_BANK));
2517 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520 NUM_BANKS(ADDR_SURF_8_BANK));
2521 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 NUM_BANKS(ADDR_SURF_8_BANK));
2525 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2528 NUM_BANKS(ADDR_SURF_8_BANK));
2529 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2532 NUM_BANKS(ADDR_SURF_8_BANK));
2533 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536 NUM_BANKS(ADDR_SURF_8_BANK));
2537 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2540 NUM_BANKS(ADDR_SURF_8_BANK));
2541 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2544 NUM_BANKS(ADDR_SURF_8_BANK));
2545 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2547 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 NUM_BANKS(ADDR_SURF_8_BANK));
2549 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2551 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2552 NUM_BANKS(ADDR_SURF_4_BANK));
2554 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2555 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2557 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2558 if (reg_offset != 7)
2559 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2563 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2566 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2567 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2570 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2574 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2575 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2578 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2579 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2582 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2583 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2584 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2586 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2587 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2590 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2591 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2592 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2593 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2594 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2596 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2597 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2598 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2600 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2601 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2609 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2617 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2626 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2629 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2634 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2636 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2637 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2641 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2642 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2644 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2645 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2646 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2648 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2649 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2650 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2652 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2653 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2654 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2657 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2658 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2661 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2665 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2666 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2673 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2681 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2686 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2688 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2689 NUM_BANKS(ADDR_SURF_16_BANK));
2690 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693 NUM_BANKS(ADDR_SURF_16_BANK));
2694 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2696 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2697 NUM_BANKS(ADDR_SURF_16_BANK));
2698 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2700 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2701 NUM_BANKS(ADDR_SURF_16_BANK));
2702 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2704 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705 NUM_BANKS(ADDR_SURF_16_BANK));
2706 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2709 NUM_BANKS(ADDR_SURF_16_BANK));
2710 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2713 NUM_BANKS(ADDR_SURF_16_BANK));
2714 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2716 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2717 NUM_BANKS(ADDR_SURF_16_BANK));
2718 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2720 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2721 NUM_BANKS(ADDR_SURF_16_BANK));
2722 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2724 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2725 NUM_BANKS(ADDR_SURF_16_BANK));
2726 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729 NUM_BANKS(ADDR_SURF_16_BANK));
2730 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2733 NUM_BANKS(ADDR_SURF_8_BANK));
2734 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2736 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2737 NUM_BANKS(ADDR_SURF_4_BANK));
2738 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741 NUM_BANKS(ADDR_SURF_4_BANK));
2743 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2744 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2746 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2747 if (reg_offset != 7)
2748 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2751 case CHIP_POLARIS11:
2752 case CHIP_POLARIS12:
2753 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2756 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2757 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2760 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2761 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2764 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2765 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2769 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2773 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2777 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2781 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2785 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2786 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2787 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2788 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2791 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2796 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2799 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2802 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2806 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2810 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2812 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2816 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2819 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2822 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2824 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2826 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2827 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2828 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2831 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2832 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2835 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2836 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2839 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2840 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2842 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2843 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2847 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2851 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2855 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2859 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2871 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2876 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2878 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2879 NUM_BANKS(ADDR_SURF_16_BANK));
2881 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2883 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2884 NUM_BANKS(ADDR_SURF_16_BANK));
2886 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2888 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2889 NUM_BANKS(ADDR_SURF_16_BANK));
2891 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2894 NUM_BANKS(ADDR_SURF_16_BANK));
2896 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2897 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2898 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2899 NUM_BANKS(ADDR_SURF_16_BANK));
2901 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2902 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2903 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2904 NUM_BANKS(ADDR_SURF_16_BANK));
2906 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2908 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2909 NUM_BANKS(ADDR_SURF_16_BANK));
2911 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2912 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2913 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2914 NUM_BANKS(ADDR_SURF_16_BANK));
2916 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2917 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2918 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2919 NUM_BANKS(ADDR_SURF_16_BANK));
2921 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2924 NUM_BANKS(ADDR_SURF_16_BANK));
2926 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929 NUM_BANKS(ADDR_SURF_16_BANK));
2931 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2934 NUM_BANKS(ADDR_SURF_16_BANK));
2936 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2939 NUM_BANKS(ADDR_SURF_8_BANK));
2941 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2942 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2943 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2944 NUM_BANKS(ADDR_SURF_4_BANK));
2946 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2947 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2949 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2950 if (reg_offset != 7)
2951 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2954 case CHIP_POLARIS10:
2955 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2956 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2959 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2960 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2963 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2966 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2967 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2970 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2971 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2974 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2975 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2976 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2979 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2982 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2983 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2984 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2985 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2986 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2987 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2989 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2990 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2992 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2993 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2994 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3001 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3003 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3004 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3006 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3008 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3013 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3014 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3016 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3018 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3020 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3021 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3023 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3024 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3026 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3027 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3029 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3033 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3034 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3037 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3038 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3041 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3042 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3045 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3046 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3049 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3050 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3053 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3057 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3058 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3065 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3073 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3078 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3080 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3081 NUM_BANKS(ADDR_SURF_16_BANK));
3083 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3085 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3086 NUM_BANKS(ADDR_SURF_16_BANK));
3088 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3091 NUM_BANKS(ADDR_SURF_16_BANK));
3093 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3096 NUM_BANKS(ADDR_SURF_16_BANK));
3098 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3099 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3100 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3101 NUM_BANKS(ADDR_SURF_16_BANK));
3103 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3104 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3105 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3106 NUM_BANKS(ADDR_SURF_16_BANK));
3108 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3110 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3111 NUM_BANKS(ADDR_SURF_16_BANK));
3113 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3114 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3115 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3116 NUM_BANKS(ADDR_SURF_16_BANK));
3118 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3121 NUM_BANKS(ADDR_SURF_16_BANK));
3123 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3126 NUM_BANKS(ADDR_SURF_16_BANK));
3128 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3131 NUM_BANKS(ADDR_SURF_16_BANK));
3133 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3135 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3136 NUM_BANKS(ADDR_SURF_8_BANK));
3138 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3140 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3141 NUM_BANKS(ADDR_SURF_4_BANK));
3143 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3146 NUM_BANKS(ADDR_SURF_4_BANK));
3148 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3149 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3151 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3152 if (reg_offset != 7)
3153 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3157 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3158 PIPE_CONFIG(ADDR_SURF_P2) |
3159 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3160 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3161 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3162 PIPE_CONFIG(ADDR_SURF_P2) |
3163 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3165 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3166 PIPE_CONFIG(ADDR_SURF_P2) |
3167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3168 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3169 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170 PIPE_CONFIG(ADDR_SURF_P2) |
3171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3173 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174 PIPE_CONFIG(ADDR_SURF_P2) |
3175 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3177 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3178 PIPE_CONFIG(ADDR_SURF_P2) |
3179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3181 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3182 PIPE_CONFIG(ADDR_SURF_P2) |
3183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3184 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3185 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3186 PIPE_CONFIG(ADDR_SURF_P2));
3187 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3188 PIPE_CONFIG(ADDR_SURF_P2) |
3189 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3191 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3192 PIPE_CONFIG(ADDR_SURF_P2) |
3193 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3195 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3196 PIPE_CONFIG(ADDR_SURF_P2) |
3197 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3199 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3200 PIPE_CONFIG(ADDR_SURF_P2) |
3201 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204 PIPE_CONFIG(ADDR_SURF_P2) |
3205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3208 PIPE_CONFIG(ADDR_SURF_P2) |
3209 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3212 PIPE_CONFIG(ADDR_SURF_P2) |
3213 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3215 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3216 PIPE_CONFIG(ADDR_SURF_P2) |
3217 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3219 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3220 PIPE_CONFIG(ADDR_SURF_P2) |
3221 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3223 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3224 PIPE_CONFIG(ADDR_SURF_P2) |
3225 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3227 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3228 PIPE_CONFIG(ADDR_SURF_P2) |
3229 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3231 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3232 PIPE_CONFIG(ADDR_SURF_P2) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3235 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3236 PIPE_CONFIG(ADDR_SURF_P2) |
3237 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3239 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3240 PIPE_CONFIG(ADDR_SURF_P2) |
3241 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3243 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3244 PIPE_CONFIG(ADDR_SURF_P2) |
3245 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3247 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3248 PIPE_CONFIG(ADDR_SURF_P2) |
3249 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3252 PIPE_CONFIG(ADDR_SURF_P2) |
3253 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3255 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3256 PIPE_CONFIG(ADDR_SURF_P2) |
3257 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3260 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3263 NUM_BANKS(ADDR_SURF_8_BANK));
3264 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267 NUM_BANKS(ADDR_SURF_8_BANK));
3268 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3271 NUM_BANKS(ADDR_SURF_8_BANK));
3272 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3275 NUM_BANKS(ADDR_SURF_8_BANK));
3276 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3279 NUM_BANKS(ADDR_SURF_8_BANK));
3280 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283 NUM_BANKS(ADDR_SURF_8_BANK));
3284 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3287 NUM_BANKS(ADDR_SURF_8_BANK));
3288 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3291 NUM_BANKS(ADDR_SURF_16_BANK));
3292 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295 NUM_BANKS(ADDR_SURF_16_BANK));
3296 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3299 NUM_BANKS(ADDR_SURF_16_BANK));
3300 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303 NUM_BANKS(ADDR_SURF_16_BANK));
3304 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 NUM_BANKS(ADDR_SURF_16_BANK));
3308 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311 NUM_BANKS(ADDR_SURF_16_BANK));
3312 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3315 NUM_BANKS(ADDR_SURF_8_BANK));
3317 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3318 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3320 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3322 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3323 if (reg_offset != 7)
3324 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3329 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3333 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3334 PIPE_CONFIG(ADDR_SURF_P2) |
3335 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3336 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3337 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3338 PIPE_CONFIG(ADDR_SURF_P2) |
3339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3340 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3341 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3342 PIPE_CONFIG(ADDR_SURF_P2) |
3343 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3344 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3345 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3346 PIPE_CONFIG(ADDR_SURF_P2) |
3347 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3348 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3349 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3350 PIPE_CONFIG(ADDR_SURF_P2) |
3351 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3352 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3353 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3354 PIPE_CONFIG(ADDR_SURF_P2) |
3355 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3356 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3357 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3358 PIPE_CONFIG(ADDR_SURF_P2) |
3359 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3360 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3361 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3362 PIPE_CONFIG(ADDR_SURF_P2));
3363 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3364 PIPE_CONFIG(ADDR_SURF_P2) |
3365 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3367 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3368 PIPE_CONFIG(ADDR_SURF_P2) |
3369 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3371 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3372 PIPE_CONFIG(ADDR_SURF_P2) |
3373 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3375 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3376 PIPE_CONFIG(ADDR_SURF_P2) |
3377 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3379 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380 PIPE_CONFIG(ADDR_SURF_P2) |
3381 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3382 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3383 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3384 PIPE_CONFIG(ADDR_SURF_P2) |
3385 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3387 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3388 PIPE_CONFIG(ADDR_SURF_P2) |
3389 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3391 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3392 PIPE_CONFIG(ADDR_SURF_P2) |
3393 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3395 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3396 PIPE_CONFIG(ADDR_SURF_P2) |
3397 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3399 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3400 PIPE_CONFIG(ADDR_SURF_P2) |
3401 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3403 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3404 PIPE_CONFIG(ADDR_SURF_P2) |
3405 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3407 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3408 PIPE_CONFIG(ADDR_SURF_P2) |
3409 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3411 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3412 PIPE_CONFIG(ADDR_SURF_P2) |
3413 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3415 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3416 PIPE_CONFIG(ADDR_SURF_P2) |
3417 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3418 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3419 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3420 PIPE_CONFIG(ADDR_SURF_P2) |
3421 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3423 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3424 PIPE_CONFIG(ADDR_SURF_P2) |
3425 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3427 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3428 PIPE_CONFIG(ADDR_SURF_P2) |
3429 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3431 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3432 PIPE_CONFIG(ADDR_SURF_P2) |
3433 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3436 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3437 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3438 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3439 NUM_BANKS(ADDR_SURF_8_BANK));
3440 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3443 NUM_BANKS(ADDR_SURF_8_BANK));
3444 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3447 NUM_BANKS(ADDR_SURF_8_BANK));
3448 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3451 NUM_BANKS(ADDR_SURF_8_BANK));
3452 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3455 NUM_BANKS(ADDR_SURF_8_BANK));
3456 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3459 NUM_BANKS(ADDR_SURF_8_BANK));
3460 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3463 NUM_BANKS(ADDR_SURF_8_BANK));
3464 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3467 NUM_BANKS(ADDR_SURF_16_BANK));
3468 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3471 NUM_BANKS(ADDR_SURF_16_BANK));
3472 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3475 NUM_BANKS(ADDR_SURF_16_BANK));
3476 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3479 NUM_BANKS(ADDR_SURF_16_BANK));
3480 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3483 NUM_BANKS(ADDR_SURF_16_BANK));
3484 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3487 NUM_BANKS(ADDR_SURF_16_BANK));
3488 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3491 NUM_BANKS(ADDR_SURF_8_BANK));
3493 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3494 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3496 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3498 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3499 if (reg_offset != 7)
3500 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3506 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3507 u32 se_num, u32 sh_num, u32 instance)
3511 if (instance == 0xffffffff)
3512 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3514 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3516 if (se_num == 0xffffffff)
3517 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3519 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3521 if (sh_num == 0xffffffff)
3522 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3524 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3526 WREG32(mmGRBM_GFX_INDEX, data);
3529 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3530 u32 me, u32 pipe, u32 q)
3532 vi_srbm_select(adev, me, pipe, q, 0);
3535 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3539 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3540 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3542 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3544 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3545 adev->gfx.config.max_sh_per_se);
3547 return (~data) & mask;
3551 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3553 switch (adev->asic_type) {
3556 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3557 RB_XSEL2(1) | PKR_MAP(2) |
3558 PKR_XSEL(1) | PKR_YSEL(1) |
3559 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3560 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3564 case CHIP_POLARIS10:
3565 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3566 SE_XSEL(1) | SE_YSEL(1);
3567 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3572 *rconf |= RB_MAP_PKR0(2);
3575 case CHIP_POLARIS11:
3576 case CHIP_POLARIS12:
3577 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3578 SE_XSEL(1) | SE_YSEL(1);
3586 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3592 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3593 u32 raster_config, u32 raster_config_1,
3594 unsigned rb_mask, unsigned num_rb)
3596 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3597 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3598 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3599 unsigned rb_per_se = num_rb / num_se;
3600 unsigned se_mask[4];
3603 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3604 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3605 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3606 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3608 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3609 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3610 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3612 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3613 (!se_mask[2] && !se_mask[3]))) {
3614 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3616 if (!se_mask[0] && !se_mask[1]) {
3618 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3621 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3625 for (se = 0; se < num_se; se++) {
3626 unsigned raster_config_se = raster_config;
3627 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3628 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3629 int idx = (se / 2) * 2;
3631 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3632 raster_config_se &= ~SE_MAP_MASK;
3634 if (!se_mask[idx]) {
3635 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3637 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3641 pkr0_mask &= rb_mask;
3642 pkr1_mask &= rb_mask;
3643 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3644 raster_config_se &= ~PKR_MAP_MASK;
3647 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3649 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3653 if (rb_per_se >= 2) {
3654 unsigned rb0_mask = 1 << (se * rb_per_se);
3655 unsigned rb1_mask = rb0_mask << 1;
3657 rb0_mask &= rb_mask;
3658 rb1_mask &= rb_mask;
3659 if (!rb0_mask || !rb1_mask) {
3660 raster_config_se &= ~RB_MAP_PKR0_MASK;
3664 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3667 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3671 if (rb_per_se > 2) {
3672 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3673 rb1_mask = rb0_mask << 1;
3674 rb0_mask &= rb_mask;
3675 rb1_mask &= rb_mask;
3676 if (!rb0_mask || !rb1_mask) {
3677 raster_config_se &= ~RB_MAP_PKR1_MASK;
3681 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3684 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3690 /* GRBM_GFX_INDEX has a different offset on VI */
3691 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3692 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3693 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3696 /* GRBM_GFX_INDEX has a different offset on VI */
3697 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3700 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3704 u32 raster_config = 0, raster_config_1 = 0;
3706 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3707 adev->gfx.config.max_sh_per_se;
3708 unsigned num_rb_pipes;
3710 mutex_lock(&adev->grbm_idx_mutex);
3711 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3712 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3713 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3714 data = gfx_v8_0_get_rb_active_bitmap(adev);
3715 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3716 rb_bitmap_width_per_sh);
3719 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3721 adev->gfx.config.backend_enable_mask = active_rbs;
3722 adev->gfx.config.num_rbs = hweight32(active_rbs);
3724 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3725 adev->gfx.config.max_shader_engines, 16);
3727 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3729 if (!adev->gfx.config.backend_enable_mask ||
3730 adev->gfx.config.num_rbs >= num_rb_pipes) {
3731 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3732 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3734 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3735 adev->gfx.config.backend_enable_mask,
3739 /* cache the values for userspace */
3740 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3741 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3742 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3743 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3744 RREG32(mmCC_RB_BACKEND_DISABLE);
3745 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3746 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3747 adev->gfx.config.rb_config[i][j].raster_config =
3748 RREG32(mmPA_SC_RASTER_CONFIG);
3749 adev->gfx.config.rb_config[i][j].raster_config_1 =
3750 RREG32(mmPA_SC_RASTER_CONFIG_1);
3753 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3754 mutex_unlock(&adev->grbm_idx_mutex);
3758 * gfx_v8_0_init_compute_vmid - gart enable
3760 * @adev: amdgpu_device pointer
3762 * Initialize compute vmid sh_mem registers
3765 #define DEFAULT_SH_MEM_BASES (0x6000)
3766 #define FIRST_COMPUTE_VMID (8)
3767 #define LAST_COMPUTE_VMID (16)
3768 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3771 uint32_t sh_mem_config;
3772 uint32_t sh_mem_bases;
3775 * Configure apertures:
3776 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3777 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3778 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3780 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3782 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3783 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3784 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3785 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3786 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3787 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3789 mutex_lock(&adev->srbm_mutex);
3790 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3791 vi_srbm_select(adev, 0, 0, 0, i);
3792 /* CP and shaders */
3793 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3794 WREG32(mmSH_MEM_APE1_BASE, 1);
3795 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3796 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3798 vi_srbm_select(adev, 0, 0, 0, 0);
3799 mutex_unlock(&adev->srbm_mutex);
3802 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3804 switch (adev->asic_type) {
3806 adev->gfx.config.double_offchip_lds_buf = 1;
3810 adev->gfx.config.double_offchip_lds_buf = 0;
3815 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3817 u32 tmp, sh_static_mem_cfg;
3820 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3821 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3822 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3823 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3825 gfx_v8_0_tiling_mode_table_init(adev);
3826 gfx_v8_0_setup_rb(adev);
3827 gfx_v8_0_get_cu_info(adev);
3828 gfx_v8_0_config_init(adev);
3830 /* XXX SH_MEM regs */
3831 /* where to put LDS, scratch, GPUVM in FSA64 space */
3832 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3834 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3836 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3838 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3840 mutex_lock(&adev->srbm_mutex);
3841 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3842 vi_srbm_select(adev, 0, 0, 0, i);
3843 /* CP and shaders */
3845 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3846 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3847 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3848 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3849 WREG32(mmSH_MEM_CONFIG, tmp);
3850 WREG32(mmSH_MEM_BASES, 0);
3852 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3853 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3854 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3855 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3856 WREG32(mmSH_MEM_CONFIG, tmp);
3857 tmp = adev->gmc.shared_aperture_start >> 48;
3858 WREG32(mmSH_MEM_BASES, tmp);
3861 WREG32(mmSH_MEM_APE1_BASE, 1);
3862 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3864 vi_srbm_select(adev, 0, 0, 0, 0);
3865 mutex_unlock(&adev->srbm_mutex);
3867 gfx_v8_0_init_compute_vmid(adev);
3869 mutex_lock(&adev->grbm_idx_mutex);
3871 * making sure that the following register writes will be broadcasted
3872 * to all the shaders
3874 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3876 WREG32(mmPA_SC_FIFO_SIZE,
3877 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3878 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3879 (adev->gfx.config.sc_prim_fifo_size_backend <<
3880 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3881 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3882 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3883 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3884 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3886 tmp = RREG32(mmSPI_ARB_PRIORITY);
3887 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3888 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3889 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3890 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3891 WREG32(mmSPI_ARB_PRIORITY, tmp);
3893 mutex_unlock(&adev->grbm_idx_mutex);
3897 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3902 mutex_lock(&adev->grbm_idx_mutex);
3903 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3904 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3905 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3906 for (k = 0; k < adev->usec_timeout; k++) {
3907 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3911 if (k == adev->usec_timeout) {
3912 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3913 0xffffffff, 0xffffffff);
3914 mutex_unlock(&adev->grbm_idx_mutex);
3915 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3921 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3922 mutex_unlock(&adev->grbm_idx_mutex);
3924 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3925 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3926 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3927 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3928 for (k = 0; k < adev->usec_timeout; k++) {
3929 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3935 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3938 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3940 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3941 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3942 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3943 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3945 WREG32(mmCP_INT_CNTL_RING0, tmp);
3948 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3951 WREG32(mmRLC_CSIB_ADDR_HI,
3952 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3953 WREG32(mmRLC_CSIB_ADDR_LO,
3954 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3955 WREG32(mmRLC_CSIB_LENGTH,
3956 adev->gfx.rlc.clear_state_size);
3959 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3962 int *unique_indices,
3965 int *ind_start_offsets,
3970 bool new_entry = true;
3972 for (; ind_offset < list_size; ind_offset++) {
3976 ind_start_offsets[*offset_count] = ind_offset;
3977 *offset_count = *offset_count + 1;
3978 BUG_ON(*offset_count >= max_offset);
3981 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3988 /* look for the matching indice */
3990 indices < *indices_count;
3992 if (unique_indices[indices] ==
3993 register_list_format[ind_offset])
3997 if (indices >= *indices_count) {
3998 unique_indices[*indices_count] =
3999 register_list_format[ind_offset];
4000 indices = *indices_count;
4001 *indices_count = *indices_count + 1;
4002 BUG_ON(*indices_count >= max_indices);
4005 register_list_format[ind_offset] = indices;
4009 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4012 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4013 int indices_count = 0;
4014 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4015 int offset_count = 0;
4018 unsigned int *register_list_format =
4019 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4020 if (!register_list_format)
4022 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4023 adev->gfx.rlc.reg_list_format_size_bytes);
4025 gfx_v8_0_parse_ind_reg_list(register_list_format,
4026 RLC_FormatDirectRegListLength,
4027 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4030 ARRAY_SIZE(unique_indices),
4031 indirect_start_offsets,
4033 ARRAY_SIZE(indirect_start_offsets));
4035 /* save and restore list */
4036 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4038 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4039 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4040 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4043 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4044 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4045 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4047 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4048 list_size = list_size >> 1;
4049 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4050 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4052 /* starting offsets starts */
4053 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4054 adev->gfx.rlc.starting_offsets_start);
4055 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4056 WREG32(mmRLC_GPM_SCRATCH_DATA,
4057 indirect_start_offsets[i]);
4059 /* unique indices */
4060 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4061 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4062 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4063 if (unique_indices[i] != 0) {
4064 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4065 WREG32(data + i, unique_indices[i] >> 20);
4068 kfree(register_list_format);
4073 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4075 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4078 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4082 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4084 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4085 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4086 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4087 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4088 WREG32(mmRLC_PG_DELAY, data);
4090 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4091 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4095 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4098 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4101 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4104 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4107 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4109 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4112 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4114 if ((adev->asic_type == CHIP_CARRIZO) ||
4115 (adev->asic_type == CHIP_STONEY)) {
4116 gfx_v8_0_init_csb(adev);
4117 gfx_v8_0_init_save_restore_list(adev);
4118 gfx_v8_0_enable_save_restore_machine(adev);
4119 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4120 gfx_v8_0_init_power_gating(adev);
4121 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4122 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4123 (adev->asic_type == CHIP_POLARIS12) ||
4124 (adev->asic_type == CHIP_VEGAM)) {
4125 gfx_v8_0_init_csb(adev);
4126 gfx_v8_0_init_save_restore_list(adev);
4127 gfx_v8_0_enable_save_restore_machine(adev);
4128 gfx_v8_0_init_power_gating(adev);
4133 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4135 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4137 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4138 gfx_v8_0_wait_for_rlc_serdes(adev);
4141 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4143 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4146 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4150 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4152 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4154 /* carrizo do enable cp interrupt after cp inited */
4155 if (!(adev->flags & AMD_IS_APU))
4156 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4161 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4163 gfx_v8_0_rlc_stop(adev);
4164 gfx_v8_0_rlc_reset(adev);
4165 gfx_v8_0_init_pg(adev);
4166 gfx_v8_0_rlc_start(adev);
4171 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4174 u32 tmp = RREG32(mmCP_ME_CNTL);
4177 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4178 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4179 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4181 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4182 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4183 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4184 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4185 adev->gfx.gfx_ring[i].sched.ready = false;
4187 WREG32(mmCP_ME_CNTL, tmp);
4191 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4194 const struct cs_section_def *sect = NULL;
4195 const struct cs_extent_def *ext = NULL;
4197 /* begin clear state */
4199 /* context control state */
4202 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4203 for (ext = sect->section; ext->extent != NULL; ++ext) {
4204 if (sect->id == SECT_CONTEXT)
4205 count += 2 + ext->reg_count;
4210 /* pa_sc_raster_config/pa_sc_raster_config1 */
4212 /* end clear state */
4220 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4222 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4223 const struct cs_section_def *sect = NULL;
4224 const struct cs_extent_def *ext = NULL;
4228 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4229 WREG32(mmCP_ENDIAN_SWAP, 0);
4230 WREG32(mmCP_DEVICE_ID, 1);
4232 gfx_v8_0_cp_gfx_enable(adev, true);
4234 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4236 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4240 /* clear state buffer */
4241 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4242 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4244 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4245 amdgpu_ring_write(ring, 0x80000000);
4246 amdgpu_ring_write(ring, 0x80000000);
4248 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4249 for (ext = sect->section; ext->extent != NULL; ++ext) {
4250 if (sect->id == SECT_CONTEXT) {
4251 amdgpu_ring_write(ring,
4252 PACKET3(PACKET3_SET_CONTEXT_REG,
4254 amdgpu_ring_write(ring,
4255 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4256 for (i = 0; i < ext->reg_count; i++)
4257 amdgpu_ring_write(ring, ext->extent[i]);
4262 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4263 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4264 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4265 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4267 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4268 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4270 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4271 amdgpu_ring_write(ring, 0);
4273 /* init the CE partitions */
4274 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4275 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4276 amdgpu_ring_write(ring, 0x8000);
4277 amdgpu_ring_write(ring, 0x8000);
4279 amdgpu_ring_commit(ring);
4283 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4286 /* no gfx doorbells on iceland */
4287 if (adev->asic_type == CHIP_TOPAZ)
4290 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4292 if (ring->use_doorbell) {
4293 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4294 DOORBELL_OFFSET, ring->doorbell_index);
4295 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4297 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4300 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4303 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4305 if (adev->flags & AMD_IS_APU)
4308 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4309 DOORBELL_RANGE_LOWER,
4310 AMDGPU_DOORBELL_GFX_RING0);
4311 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4313 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4314 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4317 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4319 struct amdgpu_ring *ring;
4322 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4325 /* Set the write pointer delay */
4326 WREG32(mmCP_RB_WPTR_DELAY, 0);
4328 /* set the RB to use vmid 0 */
4329 WREG32(mmCP_RB_VMID, 0);
4331 /* Set ring buffer size */
4332 ring = &adev->gfx.gfx_ring[0];
4333 rb_bufsz = order_base_2(ring->ring_size / 8);
4334 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4335 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4336 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4337 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4339 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4341 WREG32(mmCP_RB0_CNTL, tmp);
4343 /* Initialize the ring buffer's read and write pointers */
4344 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4346 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4348 /* set the wb address wether it's enabled or not */
4349 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4350 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4351 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4353 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4354 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4355 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4357 WREG32(mmCP_RB0_CNTL, tmp);
4359 rb_addr = ring->gpu_addr >> 8;
4360 WREG32(mmCP_RB0_BASE, rb_addr);
4361 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4363 gfx_v8_0_set_cpg_door_bell(adev, ring);
4364 /* start the ring */
4365 amdgpu_ring_clear_ring(ring);
4366 gfx_v8_0_cp_gfx_start(adev);
4367 ring->sched.ready = true;
4368 r = amdgpu_ring_test_helper(ring);
4373 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4378 WREG32(mmCP_MEC_CNTL, 0);
4380 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4381 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4382 adev->gfx.compute_ring[i].sched.ready = false;
4383 adev->gfx.kiq.ring.sched.ready = false;
4389 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4392 struct amdgpu_device *adev = ring->adev;
4394 /* tell RLC which is KIQ queue */
4395 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4397 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4398 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4400 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4403 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4405 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4406 uint64_t queue_mask = 0;
4409 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4410 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4413 /* This situation may be hit in the future if a new HW
4414 * generation exposes more than 64 queues. If so, the
4415 * definition of queue_mask needs updating */
4416 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4417 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4421 queue_mask |= (1ull << i);
4424 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4426 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4430 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4431 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4432 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4433 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4434 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4435 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4436 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4437 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4438 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4439 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4440 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4441 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4444 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4445 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4446 amdgpu_ring_write(kiq_ring,
4447 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4448 amdgpu_ring_write(kiq_ring,
4449 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4450 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4451 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4452 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4453 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4454 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4455 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4456 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4459 r = amdgpu_ring_test_helper(kiq_ring);
4461 DRM_ERROR("KCQ enable failed\n");
4465 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4469 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4470 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4471 for (i = 0; i < adev->usec_timeout; i++) {
4472 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4476 if (i == adev->usec_timeout)
4479 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4480 WREG32(mmCP_HQD_PQ_RPTR, 0);
4481 WREG32(mmCP_HQD_PQ_WPTR, 0);
4486 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4488 struct amdgpu_device *adev = ring->adev;
4489 struct vi_mqd *mqd = ring->mqd_ptr;
4490 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4493 mqd->header = 0xC0310800;
4494 mqd->compute_pipelinestat_enable = 0x00000001;
4495 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4496 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4497 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4498 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4499 mqd->compute_misc_reserved = 0x00000003;
4500 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4501 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4502 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4503 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4504 eop_base_addr = ring->eop_gpu_addr >> 8;
4505 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4506 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4508 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4509 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4510 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4511 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4513 mqd->cp_hqd_eop_control = tmp;
4515 /* enable doorbell? */
4516 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4517 CP_HQD_PQ_DOORBELL_CONTROL,
4519 ring->use_doorbell ? 1 : 0);
4521 mqd->cp_hqd_pq_doorbell_control = tmp;
4523 /* set the pointer to the MQD */
4524 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4525 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4527 /* set MQD vmid to 0 */
4528 tmp = RREG32(mmCP_MQD_CONTROL);
4529 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4530 mqd->cp_mqd_control = tmp;
4532 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4533 hqd_gpu_addr = ring->gpu_addr >> 8;
4534 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4535 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4537 /* set up the HQD, this is similar to CP_RB0_CNTL */
4538 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4539 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4540 (order_base_2(ring->ring_size / 4) - 1));
4541 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4542 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4544 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4546 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4547 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4548 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4549 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4550 mqd->cp_hqd_pq_control = tmp;
4552 /* set the wb address whether it's enabled or not */
4553 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4554 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4555 mqd->cp_hqd_pq_rptr_report_addr_hi =
4556 upper_32_bits(wb_gpu_addr) & 0xffff;
4558 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4559 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4560 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4561 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4564 /* enable the doorbell if requested */
4565 if (ring->use_doorbell) {
4566 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4567 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4568 DOORBELL_OFFSET, ring->doorbell_index);
4570 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4572 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4573 DOORBELL_SOURCE, 0);
4574 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4578 mqd->cp_hqd_pq_doorbell_control = tmp;
4580 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4582 mqd->cp_hqd_pq_wptr = ring->wptr;
4583 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4585 /* set the vmid for the queue */
4586 mqd->cp_hqd_vmid = 0;
4588 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4589 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4590 mqd->cp_hqd_persistent_state = tmp;
4593 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4594 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4595 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4596 mqd->cp_hqd_ib_control = tmp;
4598 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4599 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4600 mqd->cp_hqd_iq_timer = tmp;
4602 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4603 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4604 mqd->cp_hqd_ctx_save_control = tmp;
4607 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4608 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4609 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4610 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4611 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4612 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4613 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4614 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4615 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4616 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4617 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4618 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4619 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4620 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4621 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4623 /* activate the queue */
4624 mqd->cp_hqd_active = 1;
4629 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4635 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4636 mqd_data = &mqd->cp_mqd_base_addr_lo;
4638 /* disable wptr polling */
4639 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4641 /* program all HQD registers */
4642 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4643 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4645 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4646 * This is safe since EOP RPTR==WPTR for any inactive HQD
4647 * on ASICs that do not support context-save.
4648 * EOP writes/reads can start anywhere in the ring.
4650 if (adev->asic_type != CHIP_TONGA) {
4651 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4652 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4653 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4656 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4657 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4659 /* activate the HQD */
4660 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4661 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4666 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4668 struct amdgpu_device *adev = ring->adev;
4669 struct vi_mqd *mqd = ring->mqd_ptr;
4670 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4672 gfx_v8_0_kiq_setting(ring);
4674 if (adev->in_gpu_reset) { /* for GPU_RESET case */
4675 /* reset MQD to a clean status */
4676 if (adev->gfx.mec.mqd_backup[mqd_idx])
4677 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4679 /* reset ring buffer */
4681 amdgpu_ring_clear_ring(ring);
4682 mutex_lock(&adev->srbm_mutex);
4683 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4684 gfx_v8_0_mqd_commit(adev, mqd);
4685 vi_srbm_select(adev, 0, 0, 0, 0);
4686 mutex_unlock(&adev->srbm_mutex);
4688 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4689 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4690 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4691 mutex_lock(&adev->srbm_mutex);
4692 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4693 gfx_v8_0_mqd_init(ring);
4694 gfx_v8_0_mqd_commit(adev, mqd);
4695 vi_srbm_select(adev, 0, 0, 0, 0);
4696 mutex_unlock(&adev->srbm_mutex);
4698 if (adev->gfx.mec.mqd_backup[mqd_idx])
4699 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4705 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4707 struct amdgpu_device *adev = ring->adev;
4708 struct vi_mqd *mqd = ring->mqd_ptr;
4709 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4711 if (!adev->in_gpu_reset && !adev->in_suspend) {
4712 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4713 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4714 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4715 mutex_lock(&adev->srbm_mutex);
4716 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4717 gfx_v8_0_mqd_init(ring);
4718 vi_srbm_select(adev, 0, 0, 0, 0);
4719 mutex_unlock(&adev->srbm_mutex);
4721 if (adev->gfx.mec.mqd_backup[mqd_idx])
4722 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4723 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4724 /* reset MQD to a clean status */
4725 if (adev->gfx.mec.mqd_backup[mqd_idx])
4726 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4727 /* reset ring buffer */
4729 amdgpu_ring_clear_ring(ring);
4731 amdgpu_ring_clear_ring(ring);
4736 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4738 if (adev->asic_type > CHIP_TONGA) {
4739 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4740 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4742 /* enable doorbells */
4743 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4746 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4748 struct amdgpu_ring *ring;
4751 ring = &adev->gfx.kiq.ring;
4753 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4754 if (unlikely(r != 0))
4757 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4758 if (unlikely(r != 0))
4761 gfx_v8_0_kiq_init_queue(ring);
4762 amdgpu_bo_kunmap(ring->mqd_obj);
4763 ring->mqd_ptr = NULL;
4764 amdgpu_bo_unreserve(ring->mqd_obj);
4765 ring->sched.ready = true;
4769 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4771 struct amdgpu_ring *ring = NULL;
4774 gfx_v8_0_cp_compute_enable(adev, true);
4776 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4777 ring = &adev->gfx.compute_ring[i];
4779 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4780 if (unlikely(r != 0))
4782 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4784 r = gfx_v8_0_kcq_init_queue(ring);
4785 amdgpu_bo_kunmap(ring->mqd_obj);
4786 ring->mqd_ptr = NULL;
4788 amdgpu_bo_unreserve(ring->mqd_obj);
4793 gfx_v8_0_set_mec_doorbell_range(adev);
4795 r = gfx_v8_0_kiq_kcq_enable(adev);
4799 /* Test KCQs - reversing the order of rings seems to fix ring test failure
4802 for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
4803 ring = &adev->gfx.compute_ring[i];
4804 r = amdgpu_ring_test_helper(ring);
4811 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4815 if (!(adev->flags & AMD_IS_APU))
4816 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4818 r = gfx_v8_0_kiq_resume(adev);
4822 r = gfx_v8_0_cp_gfx_resume(adev);
4826 r = gfx_v8_0_kcq_resume(adev);
4829 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4834 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4836 gfx_v8_0_cp_gfx_enable(adev, enable);
4837 gfx_v8_0_cp_compute_enable(adev, enable);
4840 static int gfx_v8_0_hw_init(void *handle)
4843 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4845 gfx_v8_0_init_golden_registers(adev);
4846 gfx_v8_0_constants_init(adev);
4848 r = gfx_v8_0_rlc_resume(adev);
4852 r = gfx_v8_0_cp_resume(adev);
4857 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4860 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4862 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4864 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4866 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4867 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4869 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4870 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4871 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4872 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4873 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4874 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4875 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4876 amdgpu_ring_write(kiq_ring, 0);
4877 amdgpu_ring_write(kiq_ring, 0);
4878 amdgpu_ring_write(kiq_ring, 0);
4880 r = amdgpu_ring_test_helper(kiq_ring);
4882 DRM_ERROR("KCQ disable failed\n");
4887 static bool gfx_v8_0_is_idle(void *handle)
4889 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4891 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4892 || RREG32(mmGRBM_STATUS2) != 0x8)
4898 static bool gfx_v8_0_rlc_is_idle(void *handle)
4900 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4902 if (RREG32(mmGRBM_STATUS2) != 0x8)
4908 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4911 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4913 for (i = 0; i < adev->usec_timeout; i++) {
4914 if (gfx_v8_0_rlc_is_idle(handle))
4922 static int gfx_v8_0_wait_for_idle(void *handle)
4925 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4927 for (i = 0; i < adev->usec_timeout; i++) {
4928 if (gfx_v8_0_is_idle(handle))
4936 static int gfx_v8_0_hw_fini(void *handle)
4938 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4940 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4941 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4943 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4945 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4947 /* disable KCQ to avoid CPC touch memory not valid anymore */
4948 gfx_v8_0_kcq_disable(adev);
4950 if (amdgpu_sriov_vf(adev)) {
4951 pr_debug("For SRIOV client, shouldn't do anything.\n");
4954 adev->gfx.rlc.funcs->enter_safe_mode(adev);
4955 if (!gfx_v8_0_wait_for_idle(adev))
4956 gfx_v8_0_cp_enable(adev, false);
4958 pr_err("cp is busy, skip halt cp\n");
4959 if (!gfx_v8_0_wait_for_rlc_idle(adev))
4960 gfx_v8_0_rlc_stop(adev);
4962 pr_err("rlc is busy, skip halt rlc\n");
4963 adev->gfx.rlc.funcs->exit_safe_mode(adev);
4967 static int gfx_v8_0_suspend(void *handle)
4969 return gfx_v8_0_hw_fini(handle);
4972 static int gfx_v8_0_resume(void *handle)
4974 return gfx_v8_0_hw_init(handle);
4977 static bool gfx_v8_0_check_soft_reset(void *handle)
4979 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4980 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4984 tmp = RREG32(mmGRBM_STATUS);
4985 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4986 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4987 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4988 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4989 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4990 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4991 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4992 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4993 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4994 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4995 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4996 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4997 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5001 tmp = RREG32(mmGRBM_STATUS2);
5002 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5003 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5004 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5006 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5007 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5008 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5009 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5011 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5013 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5015 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5016 SOFT_RESET_GRBM, 1);
5020 tmp = RREG32(mmSRBM_STATUS);
5021 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5022 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5023 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5024 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5025 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5026 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5028 if (grbm_soft_reset || srbm_soft_reset) {
5029 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5030 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5033 adev->gfx.grbm_soft_reset = 0;
5034 adev->gfx.srbm_soft_reset = 0;
5039 static int gfx_v8_0_pre_soft_reset(void *handle)
5041 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5044 if ((!adev->gfx.grbm_soft_reset) &&
5045 (!adev->gfx.srbm_soft_reset))
5048 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5049 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5052 gfx_v8_0_rlc_stop(adev);
5054 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5055 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5056 /* Disable GFX parsing/prefetching */
5057 gfx_v8_0_cp_gfx_enable(adev, false);
5059 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5060 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5061 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5062 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5065 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5066 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5068 mutex_lock(&adev->srbm_mutex);
5069 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5070 gfx_v8_0_deactivate_hqd(adev, 2);
5071 vi_srbm_select(adev, 0, 0, 0, 0);
5072 mutex_unlock(&adev->srbm_mutex);
5074 /* Disable MEC parsing/prefetching */
5075 gfx_v8_0_cp_compute_enable(adev, false);
5081 static int gfx_v8_0_soft_reset(void *handle)
5083 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5084 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5087 if ((!adev->gfx.grbm_soft_reset) &&
5088 (!adev->gfx.srbm_soft_reset))
5091 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5092 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5094 if (grbm_soft_reset || srbm_soft_reset) {
5095 tmp = RREG32(mmGMCON_DEBUG);
5096 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5097 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5098 WREG32(mmGMCON_DEBUG, tmp);
5102 if (grbm_soft_reset) {
5103 tmp = RREG32(mmGRBM_SOFT_RESET);
5104 tmp |= grbm_soft_reset;
5105 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5106 WREG32(mmGRBM_SOFT_RESET, tmp);
5107 tmp = RREG32(mmGRBM_SOFT_RESET);
5111 tmp &= ~grbm_soft_reset;
5112 WREG32(mmGRBM_SOFT_RESET, tmp);
5113 tmp = RREG32(mmGRBM_SOFT_RESET);
5116 if (srbm_soft_reset) {
5117 tmp = RREG32(mmSRBM_SOFT_RESET);
5118 tmp |= srbm_soft_reset;
5119 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5120 WREG32(mmSRBM_SOFT_RESET, tmp);
5121 tmp = RREG32(mmSRBM_SOFT_RESET);
5125 tmp &= ~srbm_soft_reset;
5126 WREG32(mmSRBM_SOFT_RESET, tmp);
5127 tmp = RREG32(mmSRBM_SOFT_RESET);
5130 if (grbm_soft_reset || srbm_soft_reset) {
5131 tmp = RREG32(mmGMCON_DEBUG);
5132 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5133 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5134 WREG32(mmGMCON_DEBUG, tmp);
5137 /* Wait a little for things to settle down */
5143 static int gfx_v8_0_post_soft_reset(void *handle)
5145 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5146 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5148 if ((!adev->gfx.grbm_soft_reset) &&
5149 (!adev->gfx.srbm_soft_reset))
5152 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5153 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5155 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5156 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5157 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5158 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5161 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5162 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5164 mutex_lock(&adev->srbm_mutex);
5165 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5166 gfx_v8_0_deactivate_hqd(adev, 2);
5167 vi_srbm_select(adev, 0, 0, 0, 0);
5168 mutex_unlock(&adev->srbm_mutex);
5170 gfx_v8_0_kiq_resume(adev);
5171 gfx_v8_0_kcq_resume(adev);
5174 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5175 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5176 gfx_v8_0_cp_gfx_resume(adev);
5178 gfx_v8_0_rlc_start(adev);
5184 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5186 * @adev: amdgpu_device pointer
5188 * Fetches a GPU clock counter snapshot.
5189 * Returns the 64 bit clock counter snapshot.
5191 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5195 mutex_lock(&adev->gfx.gpu_clock_mutex);
5196 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5197 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5198 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5199 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5203 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5205 uint32_t gds_base, uint32_t gds_size,
5206 uint32_t gws_base, uint32_t gws_size,
5207 uint32_t oa_base, uint32_t oa_size)
5210 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5211 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5212 WRITE_DATA_DST_SEL(0)));
5213 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5214 amdgpu_ring_write(ring, 0);
5215 amdgpu_ring_write(ring, gds_base);
5218 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5219 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5220 WRITE_DATA_DST_SEL(0)));
5221 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5222 amdgpu_ring_write(ring, 0);
5223 amdgpu_ring_write(ring, gds_size);
5226 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5227 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5228 WRITE_DATA_DST_SEL(0)));
5229 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5230 amdgpu_ring_write(ring, 0);
5231 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5234 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5235 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5236 WRITE_DATA_DST_SEL(0)));
5237 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5238 amdgpu_ring_write(ring, 0);
5239 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5242 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5244 WREG32(mmSQ_IND_INDEX,
5245 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5246 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5247 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5248 (SQ_IND_INDEX__FORCE_READ_MASK));
5249 return RREG32(mmSQ_IND_DATA);
5252 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5253 uint32_t wave, uint32_t thread,
5254 uint32_t regno, uint32_t num, uint32_t *out)
5256 WREG32(mmSQ_IND_INDEX,
5257 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5258 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5259 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5260 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5261 (SQ_IND_INDEX__FORCE_READ_MASK) |
5262 (SQ_IND_INDEX__AUTO_INCR_MASK));
5264 *(out++) = RREG32(mmSQ_IND_DATA);
5267 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5269 /* type 0 wave data */
5270 dst[(*no_fields)++] = 0;
5271 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5272 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5273 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5274 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5275 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5276 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5277 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5278 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5279 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5280 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5281 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5282 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5283 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5284 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5285 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5286 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5287 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5288 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5291 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5292 uint32_t wave, uint32_t start,
5293 uint32_t size, uint32_t *dst)
5296 adev, simd, wave, 0,
5297 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5301 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5302 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5303 .select_se_sh = &gfx_v8_0_select_se_sh,
5304 .read_wave_data = &gfx_v8_0_read_wave_data,
5305 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5306 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5309 static int gfx_v8_0_early_init(void *handle)
5311 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5313 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5314 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5315 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5316 gfx_v8_0_set_ring_funcs(adev);
5317 gfx_v8_0_set_irq_funcs(adev);
5318 gfx_v8_0_set_gds_init(adev);
5319 gfx_v8_0_set_rlc_funcs(adev);
5324 static int gfx_v8_0_late_init(void *handle)
5326 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5329 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5333 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5337 /* requires IBs so do in late init after IB pool is initialized */
5338 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5342 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5344 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5348 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5351 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5359 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5362 if (((adev->asic_type == CHIP_POLARIS11) ||
5363 (adev->asic_type == CHIP_POLARIS12) ||
5364 (adev->asic_type == CHIP_VEGAM)) &&
5365 adev->powerplay.pp_funcs->set_powergating_by_smu)
5366 /* Send msg to SMU via Powerplay */
5367 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5369 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5372 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5375 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5378 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5381 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5384 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5387 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5390 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5393 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5395 /* Read any GFX register to wake up GFX. */
5397 RREG32(mmDB_RENDER_CONTROL);
5400 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5403 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5404 cz_enable_gfx_cg_power_gating(adev, true);
5405 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5406 cz_enable_gfx_pipeline_power_gating(adev, true);
5408 cz_enable_gfx_cg_power_gating(adev, false);
5409 cz_enable_gfx_pipeline_power_gating(adev, false);
5413 static int gfx_v8_0_set_powergating_state(void *handle,
5414 enum amd_powergating_state state)
5416 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5417 bool enable = (state == AMD_PG_STATE_GATE);
5419 if (amdgpu_sriov_vf(adev))
5422 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5423 AMD_PG_SUPPORT_RLC_SMU_HS |
5425 AMD_PG_SUPPORT_GFX_DMG))
5426 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5427 switch (adev->asic_type) {
5431 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5432 cz_enable_sck_slow_down_on_power_up(adev, true);
5433 cz_enable_sck_slow_down_on_power_down(adev, true);
5435 cz_enable_sck_slow_down_on_power_up(adev, false);
5436 cz_enable_sck_slow_down_on_power_down(adev, false);
5438 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5439 cz_enable_cp_power_gating(adev, true);
5441 cz_enable_cp_power_gating(adev, false);
5443 cz_update_gfx_cg_power_gating(adev, enable);
5445 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5446 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5448 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5450 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5451 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5453 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5455 case CHIP_POLARIS11:
5456 case CHIP_POLARIS12:
5458 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5459 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5461 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5463 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5464 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5466 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5468 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5469 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5471 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5476 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5477 AMD_PG_SUPPORT_RLC_SMU_HS |
5479 AMD_PG_SUPPORT_GFX_DMG))
5480 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5484 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5486 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5489 if (amdgpu_sriov_vf(adev))
5492 /* AMD_CG_SUPPORT_GFX_MGCG */
5493 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5494 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5495 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5497 /* AMD_CG_SUPPORT_GFX_CGLG */
5498 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5499 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5500 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5502 /* AMD_CG_SUPPORT_GFX_CGLS */
5503 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5504 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5506 /* AMD_CG_SUPPORT_GFX_CGTS */
5507 data = RREG32(mmCGTS_SM_CTRL_REG);
5508 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5509 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5511 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5512 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5513 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5515 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5516 data = RREG32(mmRLC_MEM_SLP_CNTL);
5517 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5518 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5520 /* AMD_CG_SUPPORT_GFX_CP_LS */
5521 data = RREG32(mmCP_MEM_SLP_CNTL);
5522 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5523 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5526 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5527 uint32_t reg_addr, uint32_t cmd)
5531 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5533 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5534 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5536 data = RREG32(mmRLC_SERDES_WR_CTRL);
5537 if (adev->asic_type == CHIP_STONEY)
5538 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5539 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5540 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5541 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5542 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5543 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5544 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5545 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5546 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5548 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5549 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5550 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5551 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5552 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5553 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5554 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5555 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5556 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5557 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5558 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5559 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5560 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5561 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5562 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5564 WREG32(mmRLC_SERDES_WR_CTRL, data);
5567 #define MSG_ENTER_RLC_SAFE_MODE 1
5568 #define MSG_EXIT_RLC_SAFE_MODE 0
5569 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5570 #define RLC_GPR_REG2__REQ__SHIFT 0
5571 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5572 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5574 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5579 data = RREG32(mmRLC_CNTL);
5580 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5583 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5584 data |= RLC_SAFE_MODE__CMD_MASK;
5585 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5586 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5587 WREG32(mmRLC_SAFE_MODE, data);
5589 for (i = 0; i < adev->usec_timeout; i++) {
5590 if ((RREG32(mmRLC_GPM_STAT) &
5591 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5592 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5593 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5594 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5599 for (i = 0; i < adev->usec_timeout; i++) {
5600 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5604 adev->gfx.rlc.in_safe_mode = true;
5608 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5613 data = RREG32(mmRLC_CNTL);
5614 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5617 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5618 if (adev->gfx.rlc.in_safe_mode) {
5619 data |= RLC_SAFE_MODE__CMD_MASK;
5620 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5621 WREG32(mmRLC_SAFE_MODE, data);
5622 adev->gfx.rlc.in_safe_mode = false;
5626 for (i = 0; i < adev->usec_timeout; i++) {
5627 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5633 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5634 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5635 .exit_safe_mode = iceland_exit_rlc_safe_mode
5638 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5641 uint32_t temp, data;
5643 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5645 /* It is disabled by HW by default */
5646 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5647 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5648 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5649 /* 1 - RLC memory Light sleep */
5650 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5652 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5653 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5656 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5657 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5658 if (adev->flags & AMD_IS_APU)
5659 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5660 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5661 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5663 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5664 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5665 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5666 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5669 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5671 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5672 gfx_v8_0_wait_for_rlc_serdes(adev);
5674 /* 5 - clear mgcg override */
5675 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5677 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5678 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5679 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5680 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5681 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5682 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5683 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5684 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5685 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5686 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5687 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5688 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5690 WREG32(mmCGTS_SM_CTRL_REG, data);
5694 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5695 gfx_v8_0_wait_for_rlc_serdes(adev);
5697 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5698 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5699 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5700 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5701 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5702 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5704 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5706 /* 2 - disable MGLS in RLC */
5707 data = RREG32(mmRLC_MEM_SLP_CNTL);
5708 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5709 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5710 WREG32(mmRLC_MEM_SLP_CNTL, data);
5713 /* 3 - disable MGLS in CP */
5714 data = RREG32(mmCP_MEM_SLP_CNTL);
5715 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5716 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5717 WREG32(mmCP_MEM_SLP_CNTL, data);
5720 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5721 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5722 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5723 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5725 WREG32(mmCGTS_SM_CTRL_REG, data);
5727 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5728 gfx_v8_0_wait_for_rlc_serdes(adev);
5730 /* 6 - set mgcg override */
5731 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5735 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5736 gfx_v8_0_wait_for_rlc_serdes(adev);
5739 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5742 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5745 uint32_t temp, temp1, data, data1;
5747 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5749 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5751 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5752 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5753 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5755 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5757 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5758 gfx_v8_0_wait_for_rlc_serdes(adev);
5760 /* 2 - clear cgcg override */
5761 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5763 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5764 gfx_v8_0_wait_for_rlc_serdes(adev);
5766 /* 3 - write cmd to set CGLS */
5767 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5769 /* 4 - enable cgcg */
5770 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5772 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5774 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5776 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5777 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5780 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5782 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5786 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5788 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5789 * Cmp_busy/GFX_Idle interrupts
5791 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5793 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5794 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5797 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5798 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5799 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5801 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5803 /* read gfx register to wake up cgcg */
5804 RREG32(mmCB_CGTT_SCLK_CTRL);
5805 RREG32(mmCB_CGTT_SCLK_CTRL);
5806 RREG32(mmCB_CGTT_SCLK_CTRL);
5807 RREG32(mmCB_CGTT_SCLK_CTRL);
5809 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5810 gfx_v8_0_wait_for_rlc_serdes(adev);
5812 /* write cmd to Set CGCG Overrride */
5813 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5815 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5816 gfx_v8_0_wait_for_rlc_serdes(adev);
5818 /* write cmd to Clear CGLS */
5819 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5821 /* disable cgcg, cgls should be disabled too. */
5822 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5823 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5825 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5826 /* enable interrupts again for PG */
5827 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5830 gfx_v8_0_wait_for_rlc_serdes(adev);
5832 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5834 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5838 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5839 * === MGCG + MGLS + TS(CG/LS) ===
5841 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5842 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5844 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5845 * === CGCG + CGLS ===
5847 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5848 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5853 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5854 enum amd_clockgating_state state)
5856 uint32_t msg_id, pp_state = 0;
5857 uint32_t pp_support_state = 0;
5859 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5860 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5861 pp_support_state = PP_STATE_SUPPORT_LS;
5862 pp_state = PP_STATE_LS;
5864 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5865 pp_support_state |= PP_STATE_SUPPORT_CG;
5866 pp_state |= PP_STATE_CG;
5868 if (state == AMD_CG_STATE_UNGATE)
5871 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5875 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5876 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5879 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5880 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5881 pp_support_state = PP_STATE_SUPPORT_LS;
5882 pp_state = PP_STATE_LS;
5885 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5886 pp_support_state |= PP_STATE_SUPPORT_CG;
5887 pp_state |= PP_STATE_CG;
5890 if (state == AMD_CG_STATE_UNGATE)
5893 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5897 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5898 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5904 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5905 enum amd_clockgating_state state)
5908 uint32_t msg_id, pp_state = 0;
5909 uint32_t pp_support_state = 0;
5911 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5912 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5913 pp_support_state = PP_STATE_SUPPORT_LS;
5914 pp_state = PP_STATE_LS;
5916 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5917 pp_support_state |= PP_STATE_SUPPORT_CG;
5918 pp_state |= PP_STATE_CG;
5920 if (state == AMD_CG_STATE_UNGATE)
5923 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5927 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5928 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5931 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5932 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5933 pp_support_state = PP_STATE_SUPPORT_LS;
5934 pp_state = PP_STATE_LS;
5936 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5937 pp_support_state |= PP_STATE_SUPPORT_CG;
5938 pp_state |= PP_STATE_CG;
5940 if (state == AMD_CG_STATE_UNGATE)
5943 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5947 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5948 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5951 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5952 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5953 pp_support_state = PP_STATE_SUPPORT_LS;
5954 pp_state = PP_STATE_LS;
5957 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5958 pp_support_state |= PP_STATE_SUPPORT_CG;
5959 pp_state |= PP_STATE_CG;
5962 if (state == AMD_CG_STATE_UNGATE)
5965 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5969 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5970 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5973 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5974 pp_support_state = PP_STATE_SUPPORT_LS;
5976 if (state == AMD_CG_STATE_UNGATE)
5979 pp_state = PP_STATE_LS;
5981 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5985 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5986 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5989 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5990 pp_support_state = PP_STATE_SUPPORT_LS;
5992 if (state == AMD_CG_STATE_UNGATE)
5995 pp_state = PP_STATE_LS;
5996 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6000 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6001 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6007 static int gfx_v8_0_set_clockgating_state(void *handle,
6008 enum amd_clockgating_state state)
6010 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6012 if (amdgpu_sriov_vf(adev))
6015 switch (adev->asic_type) {
6019 gfx_v8_0_update_gfx_clock_gating(adev,
6020 state == AMD_CG_STATE_GATE);
6023 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6025 case CHIP_POLARIS10:
6026 case CHIP_POLARIS11:
6027 case CHIP_POLARIS12:
6029 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6037 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6039 return ring->adev->wb.wb[ring->rptr_offs];
6042 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6044 struct amdgpu_device *adev = ring->adev;
6046 if (ring->use_doorbell)
6047 /* XXX check if swapping is necessary on BE */
6048 return ring->adev->wb.wb[ring->wptr_offs];
6050 return RREG32(mmCP_RB0_WPTR);
6053 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6055 struct amdgpu_device *adev = ring->adev;
6057 if (ring->use_doorbell) {
6058 /* XXX check if swapping is necessary on BE */
6059 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6060 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6062 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6063 (void)RREG32(mmCP_RB0_WPTR);
6067 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6069 u32 ref_and_mask, reg_mem_engine;
6071 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6072 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6075 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6078 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6085 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6086 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6089 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6090 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6091 WAIT_REG_MEM_FUNCTION(3) | /* == */
6093 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6094 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6095 amdgpu_ring_write(ring, ref_and_mask);
6096 amdgpu_ring_write(ring, ref_and_mask);
6097 amdgpu_ring_write(ring, 0x20); /* poll interval */
6100 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6102 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6103 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6106 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6107 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6111 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6112 struct amdgpu_job *job,
6113 struct amdgpu_ib *ib,
6116 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6117 u32 header, control = 0;
6119 if (ib->flags & AMDGPU_IB_FLAG_CE)
6120 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6122 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6124 control |= ib->length_dw | (vmid << 24);
6126 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6127 control |= INDIRECT_BUFFER_PRE_ENB(1);
6129 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6130 gfx_v8_0_ring_emit_de_meta(ring);
6133 amdgpu_ring_write(ring, header);
6134 amdgpu_ring_write(ring,
6138 (ib->gpu_addr & 0xFFFFFFFC));
6139 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6140 amdgpu_ring_write(ring, control);
6143 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6144 struct amdgpu_job *job,
6145 struct amdgpu_ib *ib,
6148 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6149 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6151 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6152 amdgpu_ring_write(ring,
6156 (ib->gpu_addr & 0xFFFFFFFC));
6157 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6158 amdgpu_ring_write(ring, control);
6161 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6162 u64 seq, unsigned flags)
6164 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6165 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6167 /* EVENT_WRITE_EOP - flush caches, send int */
6168 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6169 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6171 EOP_TC_WB_ACTION_EN |
6172 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6174 amdgpu_ring_write(ring, addr & 0xfffffffc);
6175 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6176 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6177 amdgpu_ring_write(ring, lower_32_bits(seq));
6178 amdgpu_ring_write(ring, upper_32_bits(seq));
6182 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6184 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6185 uint32_t seq = ring->fence_drv.sync_seq;
6186 uint64_t addr = ring->fence_drv.gpu_addr;
6188 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6189 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6190 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6191 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6192 amdgpu_ring_write(ring, addr & 0xfffffffc);
6193 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6194 amdgpu_ring_write(ring, seq);
6195 amdgpu_ring_write(ring, 0xffffffff);
6196 amdgpu_ring_write(ring, 4); /* poll interval */
6199 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6200 unsigned vmid, uint64_t pd_addr)
6202 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6204 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6206 /* wait for the invalidate to complete */
6207 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6208 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6209 WAIT_REG_MEM_FUNCTION(0) | /* always */
6210 WAIT_REG_MEM_ENGINE(0))); /* me */
6211 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6212 amdgpu_ring_write(ring, 0);
6213 amdgpu_ring_write(ring, 0); /* ref */
6214 amdgpu_ring_write(ring, 0); /* mask */
6215 amdgpu_ring_write(ring, 0x20); /* poll interval */
6217 /* compute doesn't have PFP */
6219 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6220 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6221 amdgpu_ring_write(ring, 0x0);
6225 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6227 return ring->adev->wb.wb[ring->wptr_offs];
6230 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6232 struct amdgpu_device *adev = ring->adev;
6234 /* XXX check if swapping is necessary on BE */
6235 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6236 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6239 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6242 struct amdgpu_device *adev = ring->adev;
6243 int pipe_num, tmp, reg;
6244 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6246 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6248 /* first me only has 2 entries, GFX and HP3D */
6252 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6254 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6258 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6259 struct amdgpu_ring *ring,
6264 struct amdgpu_ring *iring;
6266 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6267 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6269 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6271 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6273 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6274 /* Clear all reservations - everyone reacquires all resources */
6275 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6276 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6279 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6280 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6283 /* Lower all pipes without a current reservation */
6284 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6285 iring = &adev->gfx.gfx_ring[i];
6286 pipe = amdgpu_gfx_queue_to_bit(adev,
6290 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6291 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6294 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6295 iring = &adev->gfx.compute_ring[i];
6296 pipe = amdgpu_gfx_queue_to_bit(adev,
6300 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6301 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6305 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6308 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6309 struct amdgpu_ring *ring,
6312 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6313 uint32_t queue_priority = acquire ? 0xf : 0x0;
6315 mutex_lock(&adev->srbm_mutex);
6316 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6318 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6319 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6321 vi_srbm_select(adev, 0, 0, 0, 0);
6322 mutex_unlock(&adev->srbm_mutex);
6324 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6325 enum drm_sched_priority priority)
6327 struct amdgpu_device *adev = ring->adev;
6328 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6330 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6333 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6334 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6337 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6341 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6342 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6344 /* RELEASE_MEM - flush caches, send int */
6345 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6346 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6348 EOP_TC_WB_ACTION_EN |
6349 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6351 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6352 amdgpu_ring_write(ring, addr & 0xfffffffc);
6353 amdgpu_ring_write(ring, upper_32_bits(addr));
6354 amdgpu_ring_write(ring, lower_32_bits(seq));
6355 amdgpu_ring_write(ring, upper_32_bits(seq));
6358 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6359 u64 seq, unsigned int flags)
6361 /* we only allocate 32bit for each seq wb address */
6362 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6364 /* write fence seq to the "addr" */
6365 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6366 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6367 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6368 amdgpu_ring_write(ring, lower_32_bits(addr));
6369 amdgpu_ring_write(ring, upper_32_bits(addr));
6370 amdgpu_ring_write(ring, lower_32_bits(seq));
6372 if (flags & AMDGPU_FENCE_FLAG_INT) {
6373 /* set register to trigger INT */
6374 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6375 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6376 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6377 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6378 amdgpu_ring_write(ring, 0);
6379 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6383 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6385 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6386 amdgpu_ring_write(ring, 0);
6389 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6393 if (amdgpu_sriov_vf(ring->adev))
6394 gfx_v8_0_ring_emit_ce_meta(ring);
6396 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6397 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6398 gfx_v8_0_ring_emit_vgt_flush(ring);
6399 /* set load_global_config & load_global_uconfig */
6401 /* set load_cs_sh_regs */
6403 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6406 /* set load_ce_ram if preamble presented */
6407 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6410 /* still load_ce_ram if this is the first time preamble presented
6411 * although there is no context switch happens.
6413 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6417 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6418 amdgpu_ring_write(ring, dw2);
6419 amdgpu_ring_write(ring, 0);
6422 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6426 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6427 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6428 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6429 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6430 ret = ring->wptr & ring->buf_mask;
6431 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6435 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6439 BUG_ON(offset > ring->buf_mask);
6440 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6442 cur = (ring->wptr & ring->buf_mask) - 1;
6443 if (likely(cur > offset))
6444 ring->ring[offset] = cur - offset;
6446 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6449 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6451 struct amdgpu_device *adev = ring->adev;
6453 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6454 amdgpu_ring_write(ring, 0 | /* src: register*/
6455 (5 << 8) | /* dst: memory */
6456 (1 << 20)); /* write confirm */
6457 amdgpu_ring_write(ring, reg);
6458 amdgpu_ring_write(ring, 0);
6459 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6460 adev->virt.reg_val_offs * 4));
6461 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6462 adev->virt.reg_val_offs * 4));
6465 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6470 switch (ring->funcs->type) {
6471 case AMDGPU_RING_TYPE_GFX:
6472 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6474 case AMDGPU_RING_TYPE_KIQ:
6475 cmd = 1 << 16; /* no inc addr */
6482 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6483 amdgpu_ring_write(ring, cmd);
6484 amdgpu_ring_write(ring, reg);
6485 amdgpu_ring_write(ring, 0);
6486 amdgpu_ring_write(ring, val);
6489 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6491 struct amdgpu_device *adev = ring->adev;
6494 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6495 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6496 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6497 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6498 WREG32(mmSQ_CMD, value);
6501 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6502 enum amdgpu_interrupt_state state)
6504 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6505 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6508 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6510 enum amdgpu_interrupt_state state)
6512 u32 mec_int_cntl, mec_int_cntl_reg;
6515 * amdgpu controls only the first MEC. That's why this function only
6516 * handles the setting of interrupts for this specific MEC. All other
6517 * pipes' interrupts are set by amdkfd.
6523 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6526 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6529 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6532 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6535 DRM_DEBUG("invalid pipe %d\n", pipe);
6539 DRM_DEBUG("invalid me %d\n", me);
6544 case AMDGPU_IRQ_STATE_DISABLE:
6545 mec_int_cntl = RREG32(mec_int_cntl_reg);
6546 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6547 WREG32(mec_int_cntl_reg, mec_int_cntl);
6549 case AMDGPU_IRQ_STATE_ENABLE:
6550 mec_int_cntl = RREG32(mec_int_cntl_reg);
6551 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6552 WREG32(mec_int_cntl_reg, mec_int_cntl);
6559 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6560 struct amdgpu_irq_src *source,
6562 enum amdgpu_interrupt_state state)
6564 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6565 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6570 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6571 struct amdgpu_irq_src *source,
6573 enum amdgpu_interrupt_state state)
6575 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6576 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6581 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6582 struct amdgpu_irq_src *src,
6584 enum amdgpu_interrupt_state state)
6587 case AMDGPU_CP_IRQ_GFX_EOP:
6588 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6590 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6591 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6593 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6594 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6596 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6597 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6599 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6600 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6602 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6603 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6605 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6606 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6608 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6609 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6611 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6612 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6620 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6621 struct amdgpu_irq_src *source,
6623 enum amdgpu_interrupt_state state)
6628 case AMDGPU_IRQ_STATE_DISABLE:
6632 case AMDGPU_IRQ_STATE_ENABLE:
6640 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6641 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6642 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6643 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6644 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6645 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6647 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6649 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6651 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6653 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6655 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6657 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6659 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6665 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6666 struct amdgpu_irq_src *source,
6668 enum amdgpu_interrupt_state state)
6673 case AMDGPU_IRQ_STATE_DISABLE:
6677 case AMDGPU_IRQ_STATE_ENABLE:
6685 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6691 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6692 struct amdgpu_irq_src *source,
6693 struct amdgpu_iv_entry *entry)
6696 u8 me_id, pipe_id, queue_id;
6697 struct amdgpu_ring *ring;
6699 DRM_DEBUG("IH: CP EOP\n");
6700 me_id = (entry->ring_id & 0x0c) >> 2;
6701 pipe_id = (entry->ring_id & 0x03) >> 0;
6702 queue_id = (entry->ring_id & 0x70) >> 4;
6706 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6710 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6711 ring = &adev->gfx.compute_ring[i];
6712 /* Per-queue interrupt is supported for MEC starting from VI.
6713 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6715 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6716 amdgpu_fence_process(ring);
6723 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6724 struct amdgpu_iv_entry *entry)
6726 u8 me_id, pipe_id, queue_id;
6727 struct amdgpu_ring *ring;
6730 me_id = (entry->ring_id & 0x0c) >> 2;
6731 pipe_id = (entry->ring_id & 0x03) >> 0;
6732 queue_id = (entry->ring_id & 0x70) >> 4;
6736 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6740 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6741 ring = &adev->gfx.compute_ring[i];
6742 if (ring->me == me_id && ring->pipe == pipe_id &&
6743 ring->queue == queue_id)
6744 drm_sched_fault(&ring->sched);
6750 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6751 struct amdgpu_irq_src *source,
6752 struct amdgpu_iv_entry *entry)
6754 DRM_ERROR("Illegal register access in command stream\n");
6755 gfx_v8_0_fault(adev, entry);
6759 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6760 struct amdgpu_irq_src *source,
6761 struct amdgpu_iv_entry *entry)
6763 DRM_ERROR("Illegal instruction in command stream\n");
6764 gfx_v8_0_fault(adev, entry);
6768 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6769 struct amdgpu_irq_src *source,
6770 struct amdgpu_iv_entry *entry)
6772 DRM_ERROR("CP EDC/ECC error detected.");
6776 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6778 u32 enc, se_id, sh_id, cu_id;
6780 int sq_edc_source = -1;
6782 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6783 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6787 DRM_INFO("SQ general purpose intr detected:"
6788 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6789 "host_cmd_overflow %d, cmd_timestamp %d,"
6790 "reg_timestamp %d, thread_trace_buff_full %d,"
6791 "wlt %d, thread_trace %d.\n",
6793 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6794 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6795 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6796 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6797 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6798 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6799 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6800 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6806 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6807 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6810 * This function can be called either directly from ISR
6811 * or from BH in which case we can access SQ_EDC_INFO
6815 mutex_lock(&adev->grbm_idx_mutex);
6816 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6818 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6820 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6821 mutex_unlock(&adev->grbm_idx_mutex);
6825 sprintf(type, "instruction intr");
6827 sprintf(type, "EDC/ECC error");
6831 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6832 "trap %s, sq_ed_info.source %s.\n",
6833 type, se_id, sh_id, cu_id,
6834 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6835 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6836 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6837 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6838 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6842 DRM_ERROR("SQ invalid encoding type\n.");
6846 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6849 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6850 struct sq_work *sq_work = container_of(work, struct sq_work, work);
6852 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6855 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6856 struct amdgpu_irq_src *source,
6857 struct amdgpu_iv_entry *entry)
6859 unsigned ih_data = entry->src_data[0];
6862 * Try to submit work so SQ_EDC_INFO can be accessed from
6863 * BH. If previous work submission hasn't finished yet
6864 * just print whatever info is possible directly from the ISR.
6866 if (work_pending(&adev->gfx.sq_work.work)) {
6867 gfx_v8_0_parse_sq_irq(adev, ih_data);
6869 adev->gfx.sq_work.ih_data = ih_data;
6870 schedule_work(&adev->gfx.sq_work.work);
6876 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6878 .early_init = gfx_v8_0_early_init,
6879 .late_init = gfx_v8_0_late_init,
6880 .sw_init = gfx_v8_0_sw_init,
6881 .sw_fini = gfx_v8_0_sw_fini,
6882 .hw_init = gfx_v8_0_hw_init,
6883 .hw_fini = gfx_v8_0_hw_fini,
6884 .suspend = gfx_v8_0_suspend,
6885 .resume = gfx_v8_0_resume,
6886 .is_idle = gfx_v8_0_is_idle,
6887 .wait_for_idle = gfx_v8_0_wait_for_idle,
6888 .check_soft_reset = gfx_v8_0_check_soft_reset,
6889 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6890 .soft_reset = gfx_v8_0_soft_reset,
6891 .post_soft_reset = gfx_v8_0_post_soft_reset,
6892 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6893 .set_powergating_state = gfx_v8_0_set_powergating_state,
6894 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6897 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6898 .type = AMDGPU_RING_TYPE_GFX,
6900 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6901 .support_64bit_ptrs = false,
6902 .get_rptr = gfx_v8_0_ring_get_rptr,
6903 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6904 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6905 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6907 7 + /* PIPELINE_SYNC */
6908 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6909 8 + /* FENCE for VM_FLUSH */
6910 20 + /* GDS switch */
6911 4 + /* double SWITCH_BUFFER,
6912 the first COND_EXEC jump to the place just
6913 prior to this double SWITCH_BUFFER */
6921 8 + 8 + /* FENCE x2 */
6922 2, /* SWITCH_BUFFER */
6923 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6924 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6925 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6926 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6927 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6928 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6929 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6930 .test_ring = gfx_v8_0_ring_test_ring,
6931 .test_ib = gfx_v8_0_ring_test_ib,
6932 .insert_nop = amdgpu_ring_insert_nop,
6933 .pad_ib = amdgpu_ring_generic_pad_ib,
6934 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6935 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6936 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6937 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6938 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6939 .soft_recovery = gfx_v8_0_ring_soft_recovery,
6942 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6943 .type = AMDGPU_RING_TYPE_COMPUTE,
6945 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6946 .support_64bit_ptrs = false,
6947 .get_rptr = gfx_v8_0_ring_get_rptr,
6948 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6949 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6951 20 + /* gfx_v8_0_ring_emit_gds_switch */
6952 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6953 5 + /* hdp_invalidate */
6954 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6955 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6956 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6957 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6958 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6959 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6960 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6961 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6962 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6963 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6964 .test_ring = gfx_v8_0_ring_test_ring,
6965 .test_ib = gfx_v8_0_ring_test_ib,
6966 .insert_nop = amdgpu_ring_insert_nop,
6967 .pad_ib = amdgpu_ring_generic_pad_ib,
6968 .set_priority = gfx_v8_0_ring_set_priority_compute,
6969 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6972 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6973 .type = AMDGPU_RING_TYPE_KIQ,
6975 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6976 .support_64bit_ptrs = false,
6977 .get_rptr = gfx_v8_0_ring_get_rptr,
6978 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6979 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6981 20 + /* gfx_v8_0_ring_emit_gds_switch */
6982 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6983 5 + /* hdp_invalidate */
6984 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6985 17 + /* gfx_v8_0_ring_emit_vm_flush */
6986 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6987 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6988 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6989 .test_ring = gfx_v8_0_ring_test_ring,
6990 .insert_nop = amdgpu_ring_insert_nop,
6991 .pad_ib = amdgpu_ring_generic_pad_ib,
6992 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6993 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6996 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7000 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7002 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7003 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7005 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7006 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7009 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7010 .set = gfx_v8_0_set_eop_interrupt_state,
7011 .process = gfx_v8_0_eop_irq,
7014 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7015 .set = gfx_v8_0_set_priv_reg_fault_state,
7016 .process = gfx_v8_0_priv_reg_irq,
7019 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7020 .set = gfx_v8_0_set_priv_inst_fault_state,
7021 .process = gfx_v8_0_priv_inst_irq,
7024 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7025 .set = gfx_v8_0_set_cp_ecc_int_state,
7026 .process = gfx_v8_0_cp_ecc_error_irq,
7029 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7030 .set = gfx_v8_0_set_sq_int_state,
7031 .process = gfx_v8_0_sq_irq,
7034 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7036 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7037 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7039 adev->gfx.priv_reg_irq.num_types = 1;
7040 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7042 adev->gfx.priv_inst_irq.num_types = 1;
7043 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7045 adev->gfx.cp_ecc_error_irq.num_types = 1;
7046 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7048 adev->gfx.sq_irq.num_types = 1;
7049 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7052 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7054 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7057 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7059 /* init asci gds info */
7060 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7061 adev->gds.gws.total_size = 64;
7062 adev->gds.oa.total_size = 16;
7064 if (adev->gds.mem.total_size == 64 * 1024) {
7065 adev->gds.mem.gfx_partition_size = 4096;
7066 adev->gds.mem.cs_partition_size = 4096;
7068 adev->gds.gws.gfx_partition_size = 4;
7069 adev->gds.gws.cs_partition_size = 4;
7071 adev->gds.oa.gfx_partition_size = 4;
7072 adev->gds.oa.cs_partition_size = 1;
7074 adev->gds.mem.gfx_partition_size = 1024;
7075 adev->gds.mem.cs_partition_size = 1024;
7077 adev->gds.gws.gfx_partition_size = 16;
7078 adev->gds.gws.cs_partition_size = 16;
7080 adev->gds.oa.gfx_partition_size = 4;
7081 adev->gds.oa.cs_partition_size = 4;
7085 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7093 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7094 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7096 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7099 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7103 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7104 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7106 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7108 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7111 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7113 int i, j, k, counter, active_cu_number = 0;
7114 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7115 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7116 unsigned disable_masks[4 * 2];
7119 memset(cu_info, 0, sizeof(*cu_info));
7121 if (adev->flags & AMD_IS_APU)
7124 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7126 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7128 mutex_lock(&adev->grbm_idx_mutex);
7129 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7130 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7134 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7136 gfx_v8_0_set_user_cu_inactive_bitmap(
7137 adev, disable_masks[i * 2 + j]);
7138 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7139 cu_info->bitmap[i][j] = bitmap;
7141 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7142 if (bitmap & mask) {
7143 if (counter < ao_cu_num)
7149 active_cu_number += counter;
7151 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7152 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7155 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7156 mutex_unlock(&adev->grbm_idx_mutex);
7158 cu_info->number = active_cu_number;
7159 cu_info->ao_cu_mask = ao_cu_mask;
7160 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7161 cu_info->max_waves_per_simd = 10;
7162 cu_info->max_scratch_slots_per_cu = 32;
7163 cu_info->wave_front_size = 64;
7164 cu_info->lds_size = 64;
7167 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7169 .type = AMD_IP_BLOCK_TYPE_GFX,
7173 .funcs = &gfx_v8_0_ip_funcs,
7176 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7178 .type = AMD_IP_BLOCK_TYPE_GFX,
7182 .funcs = &gfx_v8_0_ip_funcs,
7185 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7187 uint64_t ce_payload_addr;
7190 struct vi_ce_ib_state regular;
7191 struct vi_ce_ib_state_chained_ib chained;
7194 if (ring->adev->virt.chained_ib_support) {
7195 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7196 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7197 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7199 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7200 offsetof(struct vi_gfx_meta_data, ce_payload);
7201 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7204 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7205 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7206 WRITE_DATA_DST_SEL(8) |
7208 WRITE_DATA_CACHE_POLICY(0));
7209 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7210 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7211 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7214 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7216 uint64_t de_payload_addr, gds_addr, csa_addr;
7219 struct vi_de_ib_state regular;
7220 struct vi_de_ib_state_chained_ib chained;
7223 csa_addr = amdgpu_csa_vaddr(ring->adev);
7224 gds_addr = csa_addr + 4096;
7225 if (ring->adev->virt.chained_ib_support) {
7226 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7227 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7228 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7229 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7231 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7232 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7233 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7234 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7237 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7238 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7239 WRITE_DATA_DST_SEL(8) |
7241 WRITE_DATA_CACHE_POLICY(0));
7242 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7243 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7244 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);